ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcgesvd.f
Go to the documentation of this file.
1 
2  SUBROUTINE pcgesvd(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU,
3  + VT,IVT,JVT,DESCVT,WORK,LWORK,RWORK,INFO)
4 *
5 * -- ScaLAPACK routine (version 1.7) --
6 * Univ. of Tennessee, Oak Ridge National Laboratory
7 * and Univ. of California Berkeley.
8 * Jan 2006
9 
10 *
11 * .. Scalar Arguments ..
12  CHARACTER JOBU,JOBVT
13  INTEGER IA,INFO,IU,IVT,JA,JU,JVT,LWORK,M,N
14 * ..
15 * .. Array Arguments ..
16  INTEGER DESCA(*),DESCU(*),DESCVT(*)
17  COMPLEX A(*),U(*),VT(*),WORK(*)
18  REAL S(*)
19  REAL RWORK(*)
20 * ..
21 *
22 * Purpose
23 * =======
24 *
25 * PCGESVD computes the singular value decomposition (SVD) of an
26 * M-by-N matrix A, optionally computing the left and/or right
27 * singular vectors. The SVD is written as
28 *
29 * A = U * SIGMA * transpose(V)
30 *
31 * where SIGMA is an M-by-N matrix which is zero except for its
32 * min(M,N) diagonal elements, U is an M-by-M orthogonal matrix, and
33 * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
34 * are the singular values of A and the columns of U and V are the
35 * corresponding right and left singular vectors, respectively. The
36 * singular values are returned in array S in decreasing order and
37 * only the first min(M,N) columns of U and rows of VT = V**T are
38 * computed.
39 *
40 * Notes
41 * =====
42 * Each global data object is described by an associated description
43 * vector. This vector stores the information required to establish
44 * the mapping between an object element and its corresponding process
45 * and memory location.
46 *
47 * Let A be a generic term for any 2D block cyclicly distributed array.
48 * Such a global array has an associated description vector DESCA.
49 * In the following comments, the character _ should be read as
50 * "of the global array".
51 *
52 * NOTATION STORED IN EXPLANATION
53 * --------------- -------------- --------------------------------------
54 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
55 * DTYPE_A = 1.
56 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
57 * the BLACS process grid A is distribu-
58 * ted over. The context itself is glo-
59 * bal, but the handle (the integer
60 * value) may vary.
61 * M_A (global) DESCA( M_ ) The number of rows in the global
62 * array A.
63 * N_A (global) DESCA( N_ ) The number of columns in the global
64 * array A.
65 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
66 * the rows of the array.
67 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
68 * the columns of the array.
69 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
70 * row of the array A is distributed.
71 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
72 * first column of the array A is
73 * distributed.
74 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
75 * array. LLD_A >= MAX(1,LOCr(M_A)).
76 *
77 * Let K be the number of rows or columns of a distributed matrix, and
78 * assume that its process grid has dimension r x c. LOCr( K ) denotes
79 * the number of elements of K that a process would receive if K were
80 * distributed over the r processes of its process column. Similarly,
81 * LOCc( K ) denotes the number of elements of K that a process would
82 * receive if K were distributed over the c processes of its process
83 * row. The values of LOCr() and LOCc() may be determined via a call
84 * to the ScaLAPACK tool function, NUMROC:
85 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
86 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
87 * An upper bound for these quantities may be computed by:
88 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
89 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
90 *
91 * Arguments
92 * =========
93 *
94 * MP = number of local rows in A and U
95 * NQ = number of local columns in A and VT
96 * SIZE = min( M, N )
97 * SIZEQ = number of local columns in U
98 * SIZEP = number of local rows in VT
99 *
100 * JOBU (global input) CHARACTER*1
101 * Specifies options for computing U:
102 * = 'V': the first SIZE columns of U (the left singular
103 * vectors) are returned in the array U;
104 * = 'N': no columns of U (no left singular vectors) are
105 * computed.
106 *
107 * JOBVT (global input) CHARACTER*1
108 * Specifies options for computing V**T:
109 * = 'V': the first SIZE rows of V**T (the right singular
110 * vectors) are returned in the array VT;
111 * = 'N': no rows of V**T (no right singular vectors) are
112 * computed.
113 *
114 * M (global input) INTEGER
115 * The number of rows of the input matrix A. M >= 0.
116 *
117 * N (global input) INTEGER
118 * The number of columns of the input matrix A. N >= 0.
119 *
120 * A (local input/workspace) block cyclic COMPLEX
121 * array,
122 * global dimension (M, N), local dimension (MP, NQ)
123 * On exit, the contents of A are destroyed.
124 *
125 * IA (global input) INTEGER
126 * The row index in the global array A indicating the first
127 * row of sub( A ).
128 *
129 * JA (global input) INTEGER
130 * The column index in the global array A indicating the
131 * first column of sub( A ).
132 *
133 * DESCA (global input) INTEGER array of dimension DLEN_
134 * The array descriptor for the distributed matrix A.
135 *
136 * S (global output) REAL array, dimension SIZE
137 * The singular values of A, sorted so that S(i) >= S(i+1).
138 *
139 * U (local output) COMPLEX array, local dimension
140 * (MP, SIZEQ), global dimension (M, SIZE)
141 * if JOBU = 'V', U contains the first min(m,n) columns of U
142 * if JOBU = 'N', U is not referenced.
143 *
144 * IU (global input) INTEGER
145 * The row index in the global array U indicating the first
146 * row of sub( U ).
147 *
148 * JU (global input) INTEGER
149 * The column index in the global array U indicating the
150 * first column of sub( U ).
151 *
152 * DESCU (global input) INTEGER array of dimension DLEN_
153 * The array descriptor for the distributed matrix U.
154 *
155 * VT (local output) COMPLEX array, local dimension
156 * (SIZEP, NQ), global dimension (SIZE, N).
157 * If JOBVT = 'V', VT contains the first SIZE rows of
158 * V**T. If JOBVT = 'N', VT is not referenced.
159 *
160 * IVT (global input) INTEGER
161 * The row index in the global array VT indicating the first
162 * row of sub( VT ).
163 *
164 * JVT (global input) INTEGER
165 * The column index in the global array VT indicating the
166 * first column of sub( VT ).
167 *
168 * DESCVT (global input) INTEGER array of dimension DLEN_
169 * The array descriptor for the distributed matrix VT.
170 *
171 * WORK (local workspace/output) COMPLEX array, dimension
172 * (LWORK)
173 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
174 *
175 * LWORK (local input) INTEGER
176 * The dimension of the array WORK.
177 *
178 * LWORK >= 1 + 2*SIZEB + MAX(WATOBD, WBDTOSVD),
179 *
180 * where SIZEB = MAX(M,N), and WATOBD and WBDTOSVD refer,
181 * respectively, to the workspace required to bidiagonalize
182 * the matrix A and to go from the bidiagonal matrix to the
183 * singular value decomposition U*S*VT.
184 *
185 * For WATOBD, the following holds:
186 *
187 * WATOBD = MAX(MAX(WPCLANGE,WPCGEBRD),
188 * MAX(WPCLARED2D,WP(pre)LARED1D)),
189 *
190 * where WPCLANGE, WPCLARED1D, WPCLARED2D, WPCGEBRD are the
191 * workspaces required respectively for the subprograms
192 * PCLANGE, PSLARED1D, PSLARED2D, PCGEBRD. Using the
193 * standard notation
194 *
195 * MP = NUMROC( M, MB, MYROW, DESCA( CTXT_ ), NPROW),
196 * NQ = NUMROC( N, NB, MYCOL, DESCA( LLD_ ), NPCOL),
197 *
198 * the workspaces required for the above subprograms are
199 *
200 * WPCLANGE = MP,
201 * WPSLARED1D = NQ0,
202 * WPSLARED2D = MP0,
203 * WPCGEBRD = NB*(MP + NQ + 1) + NQ,
204 *
205 * where NQ0 and MP0 refer, respectively, to the values obtained
206 * at MYCOL = 0 and MYROW = 0. In general, the upper limit for
207 * the workspace is given by a workspace required on
208 * processor (0,0):
209 *
210 * WATOBD <= NB*(MP0 + NQ0 + 1) + NQ0.
211 *
212 * In case of a homogeneous process grid this upper limit can
213 * be used as an estimate of the minimum workspace for every
214 * processor.
215 *
216 * For WBDTOSVD, the following holds:
217 *
218 * WBDTOSVD = SIZE*(WANTU*NRU + WANTVT*NCVT) +
219 * MAX(WCBDSQR,
220 * MAX(WANTU*WPCORMBRQLN, WANTVT*WPCORMBRPRT)),
221 *
222 * where
223 *
224 * 1, if left(right) singular vectors are wanted
225 * WANTU(WANTVT) =
226 * 0, otherwise
227 *
228 * and WCBDSQR, WPCORMBRQLN and WPCORMBRPRT refer respectively
229 * to the workspace required for the subprograms CBDSQR,
230 * PCUNMBR(QLN), and PCUNMBR(PRT), where QLN and PRT are the
231 * values of the arguments VECT, SIDE, and TRANS in the call
232 * to PCUNMBR. NRU is equal to the local number of rows of
233 * the matrix U when distributed 1-dimensional "column" of
234 * processes. Analogously, NCVT is equal to the local number
235 * of columns of the matrix VT when distributed across
236 * 1-dimensional "row" of processes. Calling the LAPACK
237 * procedure CBDSQR requires
238 *
239 * WCBDSQR = MAX(1, 4*SIZE )
240 *
241 * on every processor. Finally,
242 *
243 * WPCORMBRQLN = MAX( (NB*(NB-1))/2, (SIZEQ+MP)*NB)+NB*NB,
244 * WPCORMBRPRT = MAX( (MB*(MB-1))/2, (SIZEP+NQ)*MB )+MB*MB,
245 *
246 * If LWORK = -1, then LWORK is global input and a workspace
247 * query is assumed; the routine only calculates the minimum
248 * size for the work array. The required workspace is returned
249 * as the first element of WORK and no error message is issued
250 * by PXERBLA.
251 *
252 * RWORK (workspace) REAL array, dimension (1+4*SIZEB)
253 * On exit, if INFO = 0, RWORK(1) returns the necessary size
254 * for RWORK.
255 *
256 * INFO (output) INTEGER
257 * = 0: successful exit
258 * < 0: if INFO = -i, the i-th argument had an illegal value
259 
260 * > 0: if CBDSQR did not converge
261 * If INFO = MIN(M,N) + 1, then PCGESVD has detected
262 * heterogeneity by finding that eigenvalues were not
263 * identical across the process grid. In this case, the
264 * accuracy of the results from PCGESVD cannot be
265 * guaranteed.
266 *
267 * =====================================================================
268 *
269 * The results of PCGEBRD, and therefore PCGESVD, may vary slightly
270 * from run to run with the same input data. If repeatability is an
271 * issue, call BLACS_SET with the appropriate option after defining
272 * the process grid.
273 *
274 * Alignment requirements
275 * ======================
276 *
277 * The routine PCGESVD inherits the same alignement requirement as
278 * the routine PCGEBRD, namely:
279 *
280 * The distributed submatrix sub( A ) must verify some alignment proper-
281 * ties, namely the following expressions should be true:
282 * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA )
283 * where NB = MB_A = NB_A,
284 * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ),
285 *
286 * =====================================================================
287 *
288 *
289 * .. Parameters ..
290  INTEGER BLOCK_CYCLIC_2D,DLEN_,DTYPE_,CTXT_,M_,N_,MB_,NB_,RSRC_,
291  + csrc_,lld_,ithval
292  parameter(block_cyclic_2d=1,dlen_=9,dtype_=1,ctxt_=2,m_=3,n_=4,
293  + mb_=5,nb_=6,rsrc_=7,csrc_=8,lld_=9,ithval=10)
294  COMPLEX ZERO,ONE
295  parameter(zero= ((0.0e+0,0.0e+0)),one= ((1.0e+0,0.0e+0)))
296  REAL DZERO,DONE
297  parameter(dzero=0.0d+0,done=1.0d+0)
298 * ..
299 * .. Local Scalars ..
300  CHARACTER UPLO
301  INTEGER CONTEXTC,CONTEXTR,I,INDD,INDD2,INDE,INDE2,INDTAUP,INDTAUQ,
302  + indu,indv,indwork,ioffd,ioffe,iscale,j,k,ldu,ldvt,llwork,
303  + lwmin,maxim,mb,mp,mypcol,mypcolc,mypcolr,myprow,myprowc,
304  + myprowr,nb,ncvt,npcol,npcolc,npcolr,nprocs,nprow,nprowc,
305  + nprowr,nq,nru,SIZE,sizeb,sizep,sizepos,sizeq,wantu,wantvt,
306  + watobd,wbdtosvd,wcbdsqr,wpcgebrd,wpclange,wpcormbrprt,
307  + wpcormbrqln
308  REAL ANRM,BIGNUM,EPS,RMAX,RMIN,SAFMIN,SIGMA,SMLNUM
309 * ..
310 * .. Local Arrays ..
311  INTEGER DESCTU(DLEN_),DESCTVT(DLEN_),IDUM1(3),IDUM2(3)
312  REAL C(1,1)
313 * ..
314 * .. External Functions ..
315  LOGICAL LSAME
316  INTEGER NUMROC
317  REAL PSLAMCH,PCLANGE
318  EXTERNAL lsame,numroc,pdlamch,pzlange
319 * ..
320 * .. External Subroutines ..
321  EXTERNAL blacs_get,blacs_gridexit,blacs_gridinfo,blacs_gridinit,
322  + chk1mat,cbdsqr,descinit,sgamn2d,sgamx2d,sscal,igamx2d,
323  + igebr2d,igebs2d,pchk1mat,pcgebrd,pcgemr2d,pslared1d,
325 * ..
326 * .. Intrinsic Functions ..
327  INTRINSIC max,min,sqrt,real
328  INTRINSIC cmplx
329 * ..
330 * .. Executable Statements ..
331 * This is just to keep ftnchek happy
332  IF (block_cyclic_2d*dtype_*lld_*mb_*m_*nb_*n_.LT.0) RETURN
333 *
334  CALL blacs_gridinfo(desca(ctxt_),nprow,npcol,myprow,mypcol)
335  iscale = 0
336  info = 0
337 *
338  IF (nprow.EQ.-1) THEN
339  info = - (800+ctxt_)
340  ELSE
341 *
342  SIZE = min(m,n)
343  sizeb = max(m,n)
344  nprocs = nprow*npcol
345  IF (m.GE.n) THEN
346  ioffd = ja - 1
347  ioffe = ia - 1
348  sizepos = 1
349  ELSE
350  ioffd = ia - 1
351  ioffe = ja - 1
352  sizepos = 3
353  END IF
354 *
355  IF (lsame(jobu,'V')) THEN
356  wantu = 1
357  ELSE
358  wantu = 0
359  END IF
360  IF (lsame(jobvt,'V')) THEN
361  wantvt = 1
362  ELSE
363  wantvt = 0
364  END IF
365 *
366  CALL chk1mat(m,3,n,4,ia,ja,desca,8,info)
367  IF (wantu.EQ.1) THEN
368  CALL chk1mat(m,3,SIZE,sizepos,iu,ju,descu,13,info)
369  END IF
370  IF (wantvt.EQ.1) THEN
371  CALL chk1mat(SIZE,sizepos,n,4,ivt,jvt,descvt,17,info)
372  END IF
373  CALL igamx2d(desca(ctxt_),'A',' ',1,1,info,1,1,1,-1,-1,0)
374 *
375  IF (info.EQ.0) THEN
376 *
377 * Set up pointers into the WORK array.
378 *
379  indd = 2
380  inde = indd + sizeb + ioffd
381  indd2 = inde + sizeb + ioffe
382  inde2 = indd2 + sizeb + ioffd
383 *
384  indtauq = 2
385  indtaup = indtauq + sizeb + ja - 1
386  indwork = indtaup + sizeb + ia - 1
387  llwork = lwork - indwork + 1
388 *
389 * Initialize contexts for "column" and "row" process matrices.
390 *
391  CALL blacs_get(desca(ctxt_),10,contextc)
392  CALL blacs_gridinit(contextc,'R',nprocs,1)
393  CALL blacs_gridinfo(contextc,nprowc,npcolc,myprowc,
394  + mypcolc)
395  CALL blacs_get(desca(ctxt_),10,contextr)
396  CALL blacs_gridinit(contextr,'R',1,nprocs)
397  CALL blacs_gridinfo(contextr,nprowr,npcolr,myprowr,
398  + mypcolr)
399 *
400 * Set local dimensions of matrices (this is for MB=NB=1).
401 *
402  nru = numroc(m,1,myprowc,0,nprocs)
403  ncvt = numroc(n,1,mypcolr,0,nprocs)
404  nb = desca(nb_)
405  mb = desca(mb_)
406  mp = numroc(m,mb,myprow,desca(rsrc_),nprow)
407  nq = numroc(n,nb,mypcol,desca(csrc_),npcol)
408  IF (wantvt.EQ.1) THEN
409  sizep = numroc(SIZE,descvt(mb_),myprow,descvt(rsrc_),
410  + nprow)
411  ELSE
412  sizep = 0
413  END IF
414  IF (wantu.EQ.1) THEN
415  sizeq = numroc(SIZE,descu(nb_),mypcol,descu(csrc_),
416  + npcol)
417  ELSE
418  sizeq = 0
419  END IF
420 *
421 * Transmit MAX(NQ0, MP0).
422 *
423  IF (myprow.EQ.0 .AND. mypcol.EQ.0) THEN
424  maxim = max(nq,mp)
425  CALL igebs2d(desca(ctxt_),'All',' ',1,1,maxim,1)
426  ELSE
427  CALL igebr2d(desca(ctxt_),'All',' ',1,1,maxim,1,0,0)
428  END IF
429 *
430  wpclange = mp
431  wpcgebrd = nb* (mp+nq+1) + nq
432  watobd = max(max(wpclange,wpcgebrd),maxim)
433 *
434  wcbdsqr = max(1,4*size)
435  wpcormbrqln = max((nb* (nb-1))/2, (sizeq+mp)*nb) + nb*nb
436  wpcormbrprt = max((mb* (mb-1))/2, (sizep+nq)*mb) + mb*mb
437  wbdtosvd = size* (wantu*nru+wantvt*ncvt) +
438  + max(wcbdsqr,max(wantu*wpcormbrqln,
439  + wantvt*wpcormbrprt))
440 *
441 * Finally, calculate required workspace.
442 *
443  lwmin = 1 + 2*sizeb + max(watobd,wbdtosvd)
444  work(1) = cmplx(lwmin,0d+00)
445  rwork(1) = real(1+4*sizeb)
446 *
447  IF (wantu.NE.1 .AND. .NOT. (lsame(jobu,'N'))) THEN
448  info = -1
449  ELSE IF (wantvt.NE.1 .AND. .NOT. (lsame(jobvt,'N'))) THEN
450  info = -2
451  ELSE IF (lwork.LT.lwmin .AND. lwork.NE.-1) THEN
452  info = -19
453  END IF
454 *
455  END IF
456 *
457  idum1(1) = wantu
458  idum1(2) = wantvt
459  IF (lwork.EQ.-1) THEN
460  idum1(3) = -1
461  ELSE
462  idum1(3) = 1
463  END IF
464  idum2(1) = 1
465  idum2(2) = 2
466  idum2(3) = 19
467  CALL pchk1mat(m,3,n,4,ia,ja,desca,8,3,idum1,idum2,info)
468  IF (info.EQ.0) THEN
469  IF (wantu.EQ.1) THEN
470  CALL pchk1mat(m,3,SIZE,4,iu,ju,descu,13,0,idum1,idum2,
471  + info)
472  END IF
473  IF (wantvt.EQ.1) THEN
474  CALL pchk1mat(SIZE,3,n,4,ivt,jvt,descvt,17,0,idum1,
475  + idum2,info)
476  END IF
477  END IF
478 *
479  END IF
480 *
481  IF (info.NE.0) THEN
482  CALL pxerbla(desca(ctxt_),'PCGESVD',-info)
483  RETURN
484  ELSE IF (lwork.EQ.-1) THEN
485  GO TO 40
486  END IF
487 *
488 * Quick return if possible.
489 *
490  IF (m.LE.0 .OR. n.LE.0) GO TO 40
491 *
492 * Get machine constants.
493 *
494  safmin = pslamch(desca(ctxt_),'Safe minimum')
495  eps = pslamch(desca(ctxt_),'Precision')
496  smlnum = safmin/eps
497  bignum = done/smlnum
498  rmin = sqrt(smlnum)
499  rmax = min(sqrt(bignum),done/sqrt(sqrt(safmin)))
500 *
501 * Scale matrix to allowable range, if necessary.
502 *
503  anrm = pclange('1',m,n,a,ia,ja,desca,work(indwork))
504  IF (anrm.GT.dzero .AND. anrm.LT.rmin) THEN
505  iscale = 1
506  sigma = rmin/anrm
507  ELSE IF (anrm.GT.rmax) THEN
508  iscale = 1
509  sigma = rmax/anrm
510  END IF
511 *
512  IF (iscale.EQ.1) THEN
513  CALL pclascl('G',done,sigma,m,n,a,ia,ja,desca,info)
514  END IF
515 *
516  CALL pcgebrd(m,n,a,ia,ja,desca,rwork(indd),rwork(inde),
517  + work(indtauq),work(indtaup),work(indwork),llwork,
518  + info)
519 *
520 * Copy D and E to all processes.
521 * Array D is in local array of dimension:
522 * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise.
523 * Array E is in local array of dimension
524 * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise.
525 *
526  IF (m.GE.n) THEN
527 * Distribute D
528  CALL pslared1d(n+ioffd,ia,ja,desca,rwork(indd),rwork(indd2),
529  + work(indwork),llwork)
530 * Distribute E
531  CALL pslared2d(m+ioffe,ia,ja,desca,rwork(inde),rwork(inde2),
532  + work(indwork),llwork)
533  ELSE
534 * Distribute D
535  CALL pslared2d(m+ioffd,ia,ja,desca,rwork(indd),rwork(indd2),
536  + work(indwork),llwork)
537 * Distribute E
538  CALL pslared1d(n+ioffe,ia,ja,desca,rwork(inde),rwork(inde2),
539  + work(indwork),llwork)
540  END IF
541 *
542 * Prepare for calling PCBDSQR.
543 *
544  IF (m.GE.n) THEN
545  uplo = 'U'
546  ELSE
547  uplo = 'L'
548  END IF
549 *
550  indu = indwork
551  indv = indu + size*nru*wantu
552  indwork = indv + size*ncvt*wantvt
553 *
554  ldu = max(1,nru)
555  ldvt = max(1,size)
556 *
557  CALL descinit(desctu,m,SIZE,1,1,0,0,contextc,ldu,info)
558  CALL descinit(desctvt,SIZE,n,1,1,0,0,contextr,ldvt,info)
559 *
560  IF (wantu.EQ.1) THEN
561  CALL pclaset('Full',m,SIZE,zero,one,work(indu),1,1,desctu)
562  ELSE
563  nru = 0
564  END IF
565 *
566  IF (wantvt.EQ.1) THEN
567  CALL pclaset('Full',SIZE,n,zero,one,work(indv),1,1,desctvt)
568  ELSE
569  ncvt = 0
570  END IF
571 *
572  CALL cbdsqr(uplo,SIZE,ncvt,nru,0,rwork(indd2+ioffd),
573  + rwork(inde2+ioffe),work(indv),SIZE,work(indu),ldu,c,1,
574  + work(indwork),info)
575 *
576 * Redistribute elements of U and VT in the block-cyclic fashion.
577 *
578  IF (wantu.EQ.1) CALL pcgemr2d(m,SIZE,work(indu),1,1,desctu,u,iu,
579  + ju,descu,descu(ctxt_))
580 *
581  IF (wantvt.EQ.1) CALL pcgemr2d(SIZE,n,work(indv),1,1,desctvt,vt,
582  + ivt,jvt,descvt,descvt(ctxt_))
583 *
584 * Set to ZERO "non-square" elements of the larger matrices U, VT.
585 *
586  IF (m.GT.n .AND. wantu.EQ.1) THEN
587  CALL pclaset('Full',m-SIZE,SIZE,zero,zero,u,ia+SIZE,ju,descu)
588  ELSE IF (n.GT.m .AND. wantvt.EQ.1) THEN
589  CALL pclaset('Full',SIZE,n-SIZE,zero,zero,vt,ivt,jvt+SIZE,
590  + descvt)
591  END IF
592 *
593 * Multiply Householder rotations from bidiagonalized matrix.
594 *
595  IF (wantu.EQ.1) CALL pcunmbr('Q','L','N',m,SIZE,n,a,ia,ja,desca,
596  + work(indtauq),u,iu,ju,descu,
597  + work(indwork),llwork,info)
598 *
599  IF (wantvt.EQ.1) CALL pcunmbr('P','R','C',SIZE,n,m,a,ia,ja,desca,
600  + work(indtaup),vt,ivt,jvt,descvt,
601  + work(indwork),llwork,info)
602 *
603 * Copy singular values into output array S.
604 *
605  DO 10 i = 1,SIZE
606  s(i) = rwork(indd2+ioffd+i-1)
607  10 CONTINUE
608 *
609 * If matrix was scaled, then rescale singular values appropriately.
610 *
611  IF (iscale.EQ.1) THEN
612  CALL sscal(SIZE,one/sigma,s,1)
613  END IF
614 *
615 * Compare every ith eigenvalue, or all if there are only a few,
616 * across the process grid to check for heterogeneity.
617 *
618  IF (size.LE.ithval) THEN
619  j = SIZE
620  k = 1
621  ELSE
622  j = size/ithval
623  k = ithval
624  END IF
625 *
626  DO 20 i = 1,j
627  rwork(i+inde) = s((i-1)*k+1)
628  rwork(i+indd2) = s((i-1)*k+1)
629  20 CONTINUE
630 *
631  CALL sgamn2d(desca(ctxt_),'a',' ',j,1,rwork(1+inde),j,1,1,-1,-1,0)
632  CALL sgamx2d(desca(ctxt_),'a',' ',j,1,rwork(1+indd2),j,1,1,-1,-1,
633  + 0)
634 *
635  DO 30 i = 1,j
636  IF ((rwork(i+inde)-rwork(i+indd2)).NE.dzero) THEN
637  info = SIZE + 1
638  END IF
639  30 CONTINUE
640 *
641  40 CONTINUE
642 *
643  CALL blacs_gridexit(contextc)
644  CALL blacs_gridexit(contextr)
645 *
646 * End of PCGESVD
647 *
648  RETURN
649  END
cmplx
float cmplx[2]
Definition: pblas.h:132
pcgesvd
subroutine pcgesvd(JOBU, JOBVT, M, N, A, IA, JA, DESCA, S, U, IU, JU, DESCU, VT, IVT, JVT, DESCVT, WORK, LWORK, RWORK, INFO)
Definition: pcgesvd.f:4
max
#define max(A, B)
Definition: pcgemr.c:180
pzlange
double precision function pzlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pzlange.f:3
pcgebrd
subroutine pcgebrd(M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
Definition: pcgebrd.f:3
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
pslared1d
subroutine pslared1d(N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK)
Definition: pslared1d.f:2
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
pclaset
subroutine pclaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: pcblastst.f:7508
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pslared2d
subroutine pslared2d(N, IA, JA, DESC, BYROW, BYALL, WORK, LWORK)
Definition: pslared2d.f:2
pdlamch
double precision function pdlamch(ICTXT, CMACH)
Definition: pdblastst.f:6769
pclascl
subroutine pclascl(TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, INFO)
Definition: pclascl.f:3
pcunmbr
subroutine pcunmbr(VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, C, IC, JC, DESCC, WORK, LWORK, INFO)
Definition: pcunmbr.f:3
min
#define min(A, B)
Definition: pcgemr.c:181