ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pslaqr4.f
Go to the documentation of this file.
1  SUBROUTINE pslaqr4( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI,
2  $ ILOZ, IHIZ, Z, DESCZ, T, LDT, V, LDV, WORK,
3  $ LWORK, INFO )
4 *
5 * Contribution from the Department of Computing Science and HPC2N,
6 * Umea University, Sweden
7 *
8 * -- ScaLAPACK auxiliary routine (version 2.0.2) --
9 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10 * May 1 2012
11 *
12  IMPLICIT NONE
13 *
14 * .. Scalar Arguments ..
15  LOGICAL WANTT, WANTZ
16  INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDT, LDV, LWORK, N
17 * ..
18 * .. Array Arguments ..
19  INTEGER DESCA( * ), DESCZ( * )
20  REAL A( * ), T( LDT, * ), V( LDV, * ), WI( * ),
21  $ work( * ), wr( * ), z( * )
22 * ..
23 *
24 * Purpose
25 * =======
26 *
27 * PSLAQR4 is an auxiliary routine used to find the Schur decomposition
28 * and or eigenvalues of a matrix already in Hessenberg form from cols
29 * ILO to IHI. This routine requires that the active block is small
30 * enough, i.e. IHI-ILO+1 .LE. LDT, so that it can be solved by LAPACK.
31 * Normally, it is called by PSLAQR1. All the inputs are assumed to be
32 * valid without checking.
33 *
34 * Notes
35 * =====
36 *
37 * Each global data object is described by an associated description
38 * vector. This vector stores the information required to establish
39 * the mapping between an object element and its corresponding process
40 * and memory location.
41 *
42 * Let A be a generic term for any 2D block cyclicly distributed array.
43 * Such a global array has an associated description vector DESCA.
44 * In the following comments, the character _ should be read as
45 * "of the global array".
46 *
47 * NOTATION STORED IN EXPLANATION
48 * --------------- -------------- --------------------------------------
49 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
50 * DTYPE_A = 1.
51 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
52 * the BLACS process grid A is distribu-
53 * ted over. The context itself is glo-
54 * bal, but the handle (the integer
55 * value) may vary.
56 * M_A (global) DESCA( M_ ) The number of rows in the global
57 * array A.
58 * N_A (global) DESCA( N_ ) The number of columns in the global
59 * array A.
60 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
61 * the rows of the array.
62 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
63 * the columns of the array.
64 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
65 * row of the array A is distributed.
66 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
67 * first column of the array A is
68 * distributed.
69 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
70 * array. LLD_A >= MAX(1,LOCr(M_A)).
71 *
72 * Let K be the number of rows or columns of a distributed matrix,
73 * and assume that its process grid has dimension p x q.
74 * LOCr( K ) denotes the number of elements of K that a process
75 * would receive if K were distributed over the p processes of its
76 * process column.
77 * Similarly, LOCc( K ) denotes the number of elements of K that a
78 * process would receive if K were distributed over the q processes of
79 * its process row.
80 * The values of LOCr() and LOCc() may be determined via a call to the
81 * ScaLAPACK tool function, NUMROC:
82 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
83 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
84 * An upper bound for these quantities may be computed by:
85 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
86 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
87 *
88 * Arguments
89 * =========
90 *
91 * WANTT (global input) LOGICAL
92 * = .TRUE. : the full Schur form T is required;
93 * = .FALSE.: only eigenvalues are required.
94 *
95 * WANTZ (global input) LOGICAL
96 * = .TRUE. : the matrix of Schur vectors Z is required;
97 * = .FALSE.: Schur vectors are not required.
98 *
99 * N (global input) INTEGER
100 * The order of the Hessenberg matrix A (and Z if WANTZ).
101 * N >= 0.
102 *
103 * ILO (global input) INTEGER
104 * IHI (global input) INTEGER
105 * It is assumed that A is already upper quasi-triangular in
106 * rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless
107 * ILO = 1). PSLAQR4 works primarily with the Hessenberg
108 * submatrix in rows and columns ILO to IHI, but applies
109 * transformations to all of H if WANTT is .TRUE..
110 * 1 <= ILO <= max(1,IHI); IHI <= N.
111 *
112 * A (global input/output) REAL array, dimension
113 * (DESCA(LLD_),*)
114 * On entry, the upper Hessenberg matrix A.
115 * On exit, if WANTT is .TRUE., A is upper quasi-triangular in
116 * rows and columns ILO:IHI, with any 2-by-2 or larger diagonal
117 * blocks not yet in standard form. If WANTT is .FALSE., the
118 * contents of A are unspecified on exit.
119 *
120 * DESCA (global and local input) INTEGER array of dimension DLEN_.
121 * The array descriptor for the distributed matrix A.
122 *
123 * WR (global replicated output) REAL array,
124 * dimension (N)
125 * WI (global replicated output) REAL array,
126 * dimension (N)
127 * The real and imaginary parts, respectively, of the computed
128 * eigenvalues ILO to IHI are stored in the corresponding
129 * elements of WR and WI. If two eigenvalues are computed as a
130 * complex conjugate pair, they are stored in consecutive
131 * elements of WR and WI, say the i-th and (i+1)th, with
132 * WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
133 * eigenvalues are stored in the same order as on the diagonal
134 * of the Schur form returned in A. A may be returned with
135 * larger diagonal blocks until the next release.
136 *
137 * ILOZ (global input) INTEGER
138 * IHIZ (global input) INTEGER
139 * Specify the rows of Z to which transformations must be
140 * applied if WANTZ is .TRUE..
141 * 1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
142 *
143 * Z (global input/output) REAL array.
144 * If WANTZ is .TRUE., on entry Z must contain the current
145 * matrix Z of transformations accumulated by PDHSEQR, and on
146 * exit Z has been updated; transformations are applied only to
147 * the submatrix Z(ILOZ:IHIZ,ILO:IHI).
148 * If WANTZ is .FALSE., Z is not referenced.
149 *
150 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
151 * The array descriptor for the distributed matrix Z.
152 *
153 * T (local workspace) REAL array, dimension LDT*NW.
154 *
155 * LDT (local input) INTEGER
156 * The leading dimension of the array T.
157 * LDT >= IHI-ILO+1.
158 *
159 * V (local workspace) REAL array, dimension LDV*NW.
160 *
161 * LDV (local input) INTEGER
162 * The leading dimension of the array V.
163 * LDV >= IHI-ILO+1.
164 *
165 * WORK (local workspace) REAL array, dimension LWORK.
166 *
167 * LWORK (local input) INTEGER
168 * The dimension of the work array WORK.
169 * LWORK >= IHI-ILO+1.
170 * WORK(LWORK) is a local array and LWORK is assumed big enough.
171 * Typically LWORK >= 4*LDS*LDS if this routine is called by
172 * PSLAQR1. (LDS = 385, see PSLAQR1)
173 *
174 * INFO (global output) INTEGER
175 * < 0: parameter number -INFO incorrect or inconsistent;
176 * = 0: successful exit;
177 * > 0: PSLAQR4 failed to compute all the eigenvalues ILO to IHI
178 * in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
179 * elements i+1:ihi of WR and WI contain those eigenvalues
180 * which have been successfully computed.
181 *
182 * ================================================================
183 * Implemented by
184 * Meiyue Shao, Department of Computing Science and HPC2N,
185 * Umea University, Sweden
186 *
187 * ================================================================
188 * References:
189 * B. Kagstrom, D. Kressner, and M. Shao,
190 * On Aggressive Early Deflation in Parallel Variants of the QR
191 * Algorithm.
192 * Para 2010, to appear.
193 *
194 * ================================================================
195 * .. Parameters ..
196  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
197  $ LLD_, MB_, M_, NB_, N_, RSRC_
198  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
199  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
200  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
201  REAL ZERO, ONE
202  PARAMETER ( ZERO = 0.0, one = 1.0 )
203 * ..
204 * .. Local Scalars ..
205  INTEGER CONTXT, HBL, I, I1, I2, IAFIRST, ICOL, ICOL1,
206  $ ICOL2, II, IROW, IROW1, IROW2, ITMP1, ITMP2,
207  $ ierr, j, jafirst, jj, k, l, lda, ldz, lldtmp,
208  $ mycol, myrow, node, npcol, nprow, nh, nmin, nz,
209  $ hstep, vstep, kkrow, kkcol, kln, ltop, left,
210  $ right, up, down, d1, d2
211 * ..
212 * .. Local Arrays ..
213  INTEGER DESCT( 9 ), DESCV( 9 ), DESCWH( 9 ),
214  $ DESCWV( 9 )
215 * ..
216 * .. External Functions ..
217  INTEGER NUMROC, ILAENV
218  EXTERNAL NUMROC, ILAENV
219 * ..
220 * .. External Subroutines ..
221  EXTERNAL blacs_gridinfo, infog2l, slaset,
222  $ slahqr, slaqr4, descinit, psgemm, psgemr2d,
223  $ sgemm, slamov, sgesd2d, sgerv2d,
224  $ sgebs2d, sgebr2d, igebs2d, igebr2d
225 * ..
226 * .. Intrinsic Functions ..
227  INTRINSIC max, min, mod
228 * ..
229 * .. Executable Statements ..
230 *
231  info = 0
232 *
233  nh = ihi - ilo + 1
234  nz = ihiz - iloz + 1
235  IF( n.EQ.0 .OR. nh.EQ.0 )
236  $ RETURN
237 *
238 * NODE (IAFIRST,JAFIRST) OWNS A(1,1)
239 *
240  hbl = desca( mb_ )
241  contxt = desca( ctxt_ )
242  lda = desca( lld_ )
243  iafirst = desca( rsrc_ )
244  jafirst = desca( csrc_ )
245  ldz = descz( lld_ )
246  CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
247  node = myrow*npcol + mycol
248  left = mod( mycol+npcol-1, npcol )
249  right = mod( mycol+1, npcol )
250  up = mod( myrow+nprow-1, nprow )
251  down = mod( myrow+1, nprow )
252 *
253 * I1 and I2 are the indices of the first row and last column of A
254 * to which transformations must be applied.
255 *
256  i = ihi
257  l = ilo
258  IF( wantt ) THEN
259  i1 = 1
260  i2 = n
261  ltop = 1
262  ELSE
263  i1 = l
264  i2 = i
265  ltop = l
266  END IF
267 *
268 * Copy the diagonal block to local and call LAPACK.
269 *
270  CALL infog2l( ilo, ilo, desca, nprow, npcol, myrow, mycol,
271  $ irow, icol, ii, jj )
272  IF ( myrow .EQ. ii ) THEN
273  CALL descinit( desct, nh, nh, nh, nh, ii, jj, contxt,
274  $ ldt, ierr )
275  CALL descinit( descv, nh, nh, nh, nh, ii, jj, contxt,
276  $ ldv, ierr )
277  ELSE
278  CALL descinit( desct, nh, nh, nh, nh, ii, jj, contxt,
279  $ 1, ierr )
280  CALL descinit( descv, nh, nh, nh, nh, ii, jj, contxt,
281  $ 1, ierr )
282  END IF
283  CALL psgemr2d( nh, nh, a, ilo, ilo, desca, t, 1, 1, desct,
284  $ contxt )
285  IF ( myrow .EQ. ii .AND. mycol .EQ. jj ) THEN
286  CALL slaset( 'All', nh, nh, zero, one, v, ldv )
287  nmin = ilaenv( 12, 'SLAQR3', 'SV', nh, 1, nh, lwork )
288  IF( nh .GT. nmin ) THEN
289  CALL slaqr4( .true., .true., nh, 1, nh, t, ldt, wr( ilo ),
290  $ wi( ilo ), 1, nh, v, ldv, work, lwork, info )
291 * Clean up the scratch used by SLAQR4.
292  CALL slaset( 'L', nh-2, nh-2, zero, zero, t( 3, 1 ), ldt )
293  ELSE
294  CALL slahqr( .true., .true., nh, 1, nh, t, ldt, wr( ilo ),
295  $ wi( ilo ), 1, nh, v, ldv, info )
296  END IF
297  CALL sgebs2d( contxt, 'All', ' ', nh, nh, v, ldv )
298  CALL igebs2d( contxt, 'All', ' ', 1, 1, info, 1 )
299  ELSE
300  CALL sgebr2d( contxt, 'All', ' ', nh, nh, v, ldv, ii, jj )
301  CALL igebr2d( contxt, 'All', ' ', 1, 1, info, 1, ii, jj )
302  END IF
303  IF( info .NE. 0 ) info = info+ilo-1
304 *
305 * Copy the local matrix back to the diagonal block.
306 *
307  CALL psgemr2d( nh, nh, t, 1, 1, desct, a, ilo, ilo, desca,
308  $ contxt )
309 *
310 * Update T and Z.
311 *
312  IF( mod( ilo-1, hbl )+nh .LE. hbl ) THEN
313 *
314 * Simplest case: the diagonal block is located on one processor.
315 * Call SGEMM directly to perform the update.
316 *
317  hstep = lwork / nh
318  vstep = hstep
319 *
320  IF( wantt ) THEN
321 *
322 * Update horizontal slab in A.
323 *
324  CALL infog2l( ilo, i+1, desca, nprow, npcol, myrow,
325  $ mycol, irow, icol, ii, jj )
326  IF( myrow .EQ. ii ) THEN
327  icol1 = numroc( n, hbl, mycol, jafirst, npcol )
328  DO 10 kkcol = icol, icol1, hstep
329  kln = min( hstep, icol1-kkcol+1 )
330  CALL sgemm( 'T', 'N', nh, kln, nh, one, v,
331  $ ldv, a( irow+(kkcol-1)*lda ), lda, zero, work,
332  $ nh )
333  CALL slamov( 'A', nh, kln, work, nh,
334  $ a( irow+(kkcol-1)*lda ), lda )
335  10 CONTINUE
336  END IF
337 *
338 * Update vertical slab in A.
339 *
340  CALL infog2l( ltop, ilo, desca, nprow, npcol, myrow,
341  $ mycol, irow, icol, ii, jj )
342  IF( mycol .EQ. jj ) THEN
343  CALL infog2l( ilo-1, ilo, desca, nprow, npcol,
344  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
345  IF( myrow .NE. itmp1 ) irow1 = irow1-1
346  DO 20 kkrow = irow, irow1, vstep
347  kln = min( vstep, irow1-kkrow+1 )
348  CALL sgemm( 'N', 'N', kln, nh, nh, one,
349  $ a( kkrow+(icol-1)*lda ), lda, v, ldv, zero,
350  $ work, kln )
351  CALL slamov( 'A', kln, nh, work, kln,
352  $ a( kkrow+(icol-1)*lda ), lda )
353  20 CONTINUE
354  END IF
355  END IF
356 *
357 * Update vertical slab in Z.
358 *
359  IF( wantz ) THEN
360  CALL infog2l( iloz, ilo, descz, nprow, npcol, myrow,
361  $ mycol, irow, icol, ii, jj )
362  IF( mycol .EQ. jj ) THEN
363  CALL infog2l( ihiz, ilo, descz, nprow, npcol,
364  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
365  IF( myrow .NE. itmp1 ) irow1 = irow1-1
366  DO 30 kkrow = irow, irow1, vstep
367  kln = min( vstep, irow1-kkrow+1 )
368  CALL sgemm( 'N', 'N', kln, nh, nh, one,
369  $ z( kkrow+(icol-1)*ldz ), ldz, v, ldv, zero,
370  $ work, kln )
371  CALL slamov( 'A', kln, nh, work, kln,
372  $ z( kkrow+(icol-1)*ldz ), ldz )
373  30 CONTINUE
374  END IF
375  END IF
376 *
377  ELSE IF( mod( ilo-1, hbl )+nh .LE. 2*hbl ) THEN
378 *
379 * More complicated case: the diagonal block lay on a 2x2
380 * processor mesh.
381 * Call SGEMM locally and communicate by pair.
382 *
383  d1 = hbl - mod( ilo-1, hbl )
384  d2 = nh - d1
385  hstep = lwork / nh
386  vstep = hstep
387 *
388  IF( wantt ) THEN
389 *
390 * Update horizontal slab in A.
391 *
392  CALL infog2l( ilo, i+1, desca, nprow, npcol, myrow,
393  $ mycol, irow, icol, ii, jj )
394  IF( myrow .EQ. up ) THEN
395  IF( myrow .EQ. ii ) THEN
396  icol1 = numroc( n, hbl, mycol, jafirst, npcol )
397  DO 40 kkcol = icol, icol1, hstep
398  kln = min( hstep, icol1-kkcol+1 )
399  CALL sgemm( 'T', 'N', nh, kln, nh, one, v,
400  $ nh, a( irow+(kkcol-1)*lda ), lda, zero,
401  $ work, nh )
402  CALL slamov( 'A', nh, kln, work, nh,
403  $ a( irow+(kkcol-1)*lda ), lda )
404  40 CONTINUE
405  END IF
406  ELSE
407  IF( myrow .EQ. ii ) THEN
408  icol1 = numroc( n, hbl, mycol, jafirst, npcol )
409  DO 50 kkcol = icol, icol1, hstep
410  kln = min( hstep, icol1-kkcol+1 )
411  CALL sgemm( 'T', 'N', d2, kln, d1, one,
412  $ v( 1, d1+1 ), ldv, a( irow+(kkcol-1)*lda ),
413  $ lda, zero, work( d1+1 ), nh )
414  CALL sgesd2d( contxt, d2, kln, work( d1+1 ),
415  $ nh, down, mycol )
416  CALL sgerv2d( contxt, d1, kln, work, nh, down,
417  $ mycol )
418  CALL sgemm( 'T', 'N', d1, kln, d1, one,
419  $ v, ldv, a( irow+(kkcol-1)*lda ), lda, one,
420  $ work, nh )
421  CALL slamov( 'A', d1, kln, work, nh,
422  $ a( irow+(kkcol-1)*lda ), lda )
423  50 CONTINUE
424  ELSE IF( up .EQ. ii ) THEN
425  icol1 = numroc( n, hbl, mycol, jafirst, npcol )
426  DO 60 kkcol = icol, icol1, hstep
427  kln = min( hstep, icol1-kkcol+1 )
428  CALL sgemm( 'T', 'N', d1, kln, d2, one,
429  $ v( d1+1, 1 ), ldv, a( irow+(kkcol-1)*lda ),
430  $ lda, zero, work, nh )
431  CALL sgesd2d( contxt, d1, kln, work, nh, up,
432  $ mycol )
433  CALL sgerv2d( contxt, d2, kln, work( d1+1 ),
434  $ nh, up, mycol )
435  CALL sgemm( 'T', 'N', d2, kln, d2, one,
436  $ v( d1+1, d1+1 ), ldv,
437  $ a( irow+(kkcol-1)*lda ), lda, one,
438  $ work( d1+1 ), nh )
439  CALL slamov( 'A', d2, kln, work( d1+1 ), nh,
440  $ a( irow+(kkcol-1)*lda ), lda )
441  60 CONTINUE
442  END IF
443  END IF
444 *
445 * Update vertical slab in A.
446 *
447  CALL infog2l( ltop, ilo, desca, nprow, npcol, myrow,
448  $ mycol, irow, icol, ii, jj )
449  IF( mycol .EQ. left ) THEN
450  IF( mycol .EQ. jj ) THEN
451  CALL infog2l( ilo-1, ilo, desca, nprow, npcol,
452  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
453  IF( myrow .NE. itmp1 ) irow1 = irow1-1
454  DO 70 kkrow = irow, irow1, vstep
455  kln = min( vstep, irow1-kkrow+1 )
456  CALL sgemm( 'N', 'N', kln, nh, nh, one,
457  $ a( kkrow+(icol-1)*lda ), lda, v, ldv,
458  $ zero, work, kln )
459  CALL slamov( 'A', kln, nh, work, kln,
460  $ a( kkrow+(icol-1)*lda ), lda )
461  70 CONTINUE
462  END IF
463  ELSE
464  IF( mycol .EQ. jj ) THEN
465  CALL infog2l( ilo-1, ilo, desca, nprow, npcol,
466  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
467  IF( myrow .NE. itmp1 ) irow1 = irow1-1
468  DO 80 kkrow = irow, irow1, vstep
469  kln = min( vstep, irow1-kkrow+1 )
470  CALL sgemm( 'N', 'N', kln, d2, d1, one,
471  $ a( kkrow+(icol-1)*lda ), lda, v( 1, d1+1 ),
472  $ ldv, zero, work( 1+d1*kln ), kln )
473  CALL sgesd2d( contxt, kln, d2, work( 1+d1*kln ),
474  $ kln, myrow, right )
475  CALL sgerv2d( contxt, kln, d1, work, kln, myrow,
476  $ right )
477  CALL sgemm( 'N', 'N', kln, d1, d1, one,
478  $ a( kkrow+(icol-1)*lda ), lda, v, ldv, one,
479  $ work, kln )
480  CALL slamov( 'A', kln, d1, work, kln,
481  $ a( kkrow+(icol-1)*lda ), lda )
482  80 CONTINUE
483  ELSE IF ( left .EQ. jj ) THEN
484  CALL infog2l( ilo-1, ilo, desca, nprow, npcol,
485  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
486  IF( myrow .NE. itmp1 ) irow1 = irow1-1
487  DO 90 kkrow = irow, irow1, vstep
488  kln = min( vstep, irow1-kkrow+1 )
489  CALL sgemm( 'N', 'N', kln, d1, d2, one,
490  $ a( kkrow+(icol-1)*lda ), lda, v( d1+1, 1 ),
491  $ ldv, zero, work, kln )
492  CALL sgesd2d( contxt, kln, d1, work, kln, myrow,
493  $ left )
494  CALL sgerv2d( contxt, kln, d2, work( 1+d1*kln ),
495  $ kln, myrow, left )
496  CALL sgemm( 'N', 'N', kln, d2, d2, one,
497  $ a( kkrow+(icol-1)*lda ), lda, v( d1+1, d1+1 ),
498  $ ldv, one, work( 1+d1*kln ), kln )
499  CALL slamov( 'A', kln, d2, work( 1+d1*kln ), kln,
500  $ a( kkrow+(icol-1)*lda ), lda )
501  90 CONTINUE
502  END IF
503  END IF
504  END IF
505 *
506 * Update vertical slab in Z.
507 *
508  IF( wantz ) THEN
509  CALL infog2l( iloz, ilo, descz, nprow, npcol, myrow,
510  $ mycol, irow, icol, ii, jj )
511  IF( mycol .EQ. left ) THEN
512  IF( mycol .EQ. jj ) THEN
513  CALL infog2l( ihiz, ilo, descz, nprow, npcol,
514  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
515  IF( myrow .NE. itmp1 ) irow1 = irow1-1
516  DO 100 kkrow = irow, irow1, vstep
517  kln = min( vstep, irow1-kkrow+1 )
518  CALL sgemm( 'N', 'N', kln, nh, nh, one,
519  $ z( kkrow+(icol-1)*ldz ), ldz, v, ldv, zero,
520  $ work, kln )
521  CALL slamov( 'A', kln, nh, work, kln,
522  $ z( kkrow+(icol-1)*ldz ), ldz )
523  100 CONTINUE
524  END IF
525  ELSE
526  IF( mycol .EQ. jj ) THEN
527  CALL infog2l( ihiz, ilo, descz, nprow, npcol,
528  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
529  IF( myrow .NE. itmp1 ) irow1 = irow1-1
530  DO 110 kkrow = irow, irow1, vstep
531  kln = min( vstep, irow1-kkrow+1 )
532  CALL sgemm( 'N', 'N', kln, d2, d1, one,
533  $ z( kkrow+(icol-1)*ldz ), ldz, v( 1, d1+1 ),
534  $ ldv, zero, work( 1+d1*kln ), kln )
535  CALL sgesd2d( contxt, kln, d2, work( 1+d1*kln ),
536  $ kln, myrow, right )
537  CALL sgerv2d( contxt, kln, d1, work, kln, myrow,
538  $ right )
539  CALL sgemm( 'N', 'N', kln, d1, d1, one,
540  $ z( kkrow+(icol-1)*ldz ), ldz, v, ldv, one,
541  $ work, kln )
542  CALL slamov( 'A', kln, d1, work, kln,
543  $ z( kkrow+(icol-1)*ldz ), ldz )
544  110 CONTINUE
545  ELSE IF( left .EQ. jj ) THEN
546  CALL infog2l( ihiz, ilo, descz, nprow, npcol,
547  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
548  IF( myrow .NE. itmp1 ) irow1 = irow1-1
549  DO 120 kkrow = irow, irow1, vstep
550  kln = min( vstep, irow1-kkrow+1 )
551  CALL sgemm( 'N', 'N', kln, d1, d2, one,
552  $ z( kkrow+(icol-1)*ldz ), ldz, v( d1+1, 1 ),
553  $ ldv, zero, work, kln )
554  CALL sgesd2d( contxt, kln, d1, work, kln, myrow,
555  $ left )
556  CALL sgerv2d( contxt, kln, d2, work( 1+d1*kln ),
557  $ kln, myrow, left )
558  CALL sgemm( 'N', 'N', kln, d2, d2, one,
559  $ z( kkrow+(icol-1)*ldz ), ldz,
560  $ v( d1+1, d1+1 ), ldv, one, work( 1+d1*kln ),
561  $ kln )
562  CALL slamov( 'A', kln, d2, work( 1+d1*kln ),
563  $ kln, z( kkrow+(icol-1)*ldz ), ldz )
564  120 CONTINUE
565  END IF
566  END IF
567  END IF
568 *
569  ELSE
570 *
571 * Most complicated case: the diagonal block lay across the border
572 * of the processor mesh.
573 * Treat V as a distributed matrix and call PSGEMM.
574 *
575  hstep = lwork / nh * npcol
576  vstep = lwork / nh * nprow
577  lldtmp = numroc( nh, nh, myrow, 0, nprow )
578  lldtmp = max( 1, lldtmp )
579  CALL descinit( descv, nh, nh, nh, nh, 0, 0, contxt,
580  $ lldtmp, ierr )
581  CALL descinit( descwh, nh, hstep, nh, lwork / nh, 0, 0,
582  $ contxt, lldtmp, ierr )
583 *
584  IF( wantt ) THEN
585 *
586 * Update horizontal slab in A.
587 *
588  DO 130 kkcol = i+1, n, hstep
589  kln = min( hstep, n-kkcol+1 )
590  CALL psgemm( 'T', 'N', nh, kln, nh, one, v, 1, 1,
591  $ descv, a, ilo, kkcol, desca, zero, work, 1, 1,
592  $ descwh )
593  CALL psgemr2d( nh, kln, work, 1, 1, descwh, a,
594  $ ilo, kkcol, desca, contxt )
595  130 CONTINUE
596 *
597 * Update vertical slab in A.
598 *
599  DO 140 kkrow = ltop, ilo-1, vstep
600  kln = min( vstep, ilo-kkrow )
601  lldtmp = numroc( kln, lwork / nh, myrow, 0, nprow )
602  lldtmp = max( 1, lldtmp )
603  CALL descinit( descwv, kln, nh, lwork / nh, nh, 0, 0,
604  $ contxt, lldtmp, ierr )
605  CALL psgemm( 'N', 'N', kln, nh, nh, one, a, kkrow,
606  $ ilo, desca, v, 1, 1, descv, zero, work, 1, 1,
607  $ descwv )
608  CALL psgemr2d( kln, nh, work, 1, 1, descwv, a, kkrow,
609  $ ilo, desca, contxt )
610  140 CONTINUE
611  END IF
612 *
613 * Update vertical slab in Z.
614 *
615  IF( wantz ) THEN
616  DO 150 kkrow = iloz, ihiz, vstep
617  kln = min( vstep, ihiz-kkrow+1 )
618  lldtmp = numroc( kln, lwork / nh, myrow, 0, nprow )
619  lldtmp = max( 1, lldtmp )
620  CALL descinit( descwv, kln, nh, lwork / nh, nh, 0, 0,
621  $ contxt, lldtmp, ierr )
622  CALL psgemm( 'N', 'N', kln, nh, nh, one, z, kkrow,
623  $ ilo, descz, v, 1, 1, descv, zero, work, 1, 1,
624  $ descwv )
625  CALL psgemr2d( kln, nh, work, 1, 1, descwv, z,
626  $ kkrow, ilo, descz, contxt )
627  150 CONTINUE
628  END IF
629  END IF
630 *
631 * END OF PSLAQR4
632 *
633  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pslaqr4
subroutine pslaqr4(WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, ILOZ, IHIZ, Z, DESCZ, T, LDT, V, LDV, WORK, LWORK, INFO)
Definition: pslaqr4.f:4
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
min
#define min(A, B)
Definition: pcgemr.c:181