SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pslaqr4.f
Go to the documentation of this file.
1 SUBROUTINE pslaqr4( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI,
2 $ ILOZ, IHIZ, Z, DESCZ, T, LDT, V, LDV, WORK,
3 $ LWORK, INFO )
4*
5* Contribution from the Department of Computing Science and HPC2N,
6* Umea University, Sweden
7*
8* -- ScaLAPACK auxiliary routine (version 2.0.2) --
9* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10* May 1 2012
11*
12 IMPLICIT NONE
13*
14* .. Scalar Arguments ..
15 LOGICAL WANTT, WANTZ
16 INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDT, LDV, LWORK, N
17* ..
18* .. Array Arguments ..
19 INTEGER DESCA( * ), DESCZ( * )
20 REAL A( * ), T( LDT, * ), V( LDV, * ), WI( * ),
21 $ work( * ), wr( * ), z( * )
22* ..
23*
24* Purpose
25* =======
26*
27* PSLAQR4 is an auxiliary routine used to find the Schur decomposition
28* and or eigenvalues of a matrix already in Hessenberg form from cols
29* ILO to IHI. This routine requires that the active block is small
30* enough, i.e. IHI-ILO+1 .LE. LDT, so that it can be solved by LAPACK.
31* Normally, it is called by PSLAQR1. All the inputs are assumed to be
32* valid without checking.
33*
34* Notes
35* =====
36*
37* Each global data object is described by an associated description
38* vector. This vector stores the information required to establish
39* the mapping between an object element and its corresponding process
40* and memory location.
41*
42* Let A be a generic term for any 2D block cyclicly distributed array.
43* Such a global array has an associated description vector DESCA.
44* In the following comments, the character _ should be read as
45* "of the global array".
46*
47* NOTATION STORED IN EXPLANATION
48* --------------- -------------- --------------------------------------
49* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
50* DTYPE_A = 1.
51* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
52* the BLACS process grid A is distribu-
53* ted over. The context itself is glo-
54* bal, but the handle (the integer
55* value) may vary.
56* M_A (global) DESCA( M_ ) The number of rows in the global
57* array A.
58* N_A (global) DESCA( N_ ) The number of columns in the global
59* array A.
60* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
61* the rows of the array.
62* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
63* the columns of the array.
64* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
65* row of the array A is distributed.
66* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
67* first column of the array A is
68* distributed.
69* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
70* array. LLD_A >= MAX(1,LOCr(M_A)).
71*
72* Let K be the number of rows or columns of a distributed matrix,
73* and assume that its process grid has dimension p x q.
74* LOCr( K ) denotes the number of elements of K that a process
75* would receive if K were distributed over the p processes of its
76* process column.
77* Similarly, LOCc( K ) denotes the number of elements of K that a
78* process would receive if K were distributed over the q processes of
79* its process row.
80* The values of LOCr() and LOCc() may be determined via a call to the
81* ScaLAPACK tool function, NUMROC:
82* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
83* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
84* An upper bound for these quantities may be computed by:
85* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
86* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
87*
88* Arguments
89* =========
90*
91* WANTT (global input) LOGICAL
92* = .TRUE. : the full Schur form T is required;
93* = .FALSE.: only eigenvalues are required.
94*
95* WANTZ (global input) LOGICAL
96* = .TRUE. : the matrix of Schur vectors Z is required;
97* = .FALSE.: Schur vectors are not required.
98*
99* N (global input) INTEGER
100* The order of the Hessenberg matrix A (and Z if WANTZ).
101* N >= 0.
102*
103* ILO (global input) INTEGER
104* IHI (global input) INTEGER
105* It is assumed that A is already upper quasi-triangular in
106* rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless
107* ILO = 1). PSLAQR4 works primarily with the Hessenberg
108* submatrix in rows and columns ILO to IHI, but applies
109* transformations to all of H if WANTT is .TRUE..
110* 1 <= ILO <= max(1,IHI); IHI <= N.
111*
112* A (global input/output) REAL array, dimension
113* (DESCA(LLD_),*)
114* On entry, the upper Hessenberg matrix A.
115* On exit, if WANTT is .TRUE., A is upper quasi-triangular in
116* rows and columns ILO:IHI, with any 2-by-2 or larger diagonal
117* blocks not yet in standard form. If WANTT is .FALSE., the
118* contents of A are unspecified on exit.
119*
120* DESCA (global and local input) INTEGER array of dimension DLEN_.
121* The array descriptor for the distributed matrix A.
122*
123* WR (global replicated output) REAL array,
124* dimension (N)
125* WI (global replicated output) REAL array,
126* dimension (N)
127* The real and imaginary parts, respectively, of the computed
128* eigenvalues ILO to IHI are stored in the corresponding
129* elements of WR and WI. If two eigenvalues are computed as a
130* complex conjugate pair, they are stored in consecutive
131* elements of WR and WI, say the i-th and (i+1)th, with
132* WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
133* eigenvalues are stored in the same order as on the diagonal
134* of the Schur form returned in A. A may be returned with
135* larger diagonal blocks until the next release.
136*
137* ILOZ (global input) INTEGER
138* IHIZ (global input) INTEGER
139* Specify the rows of Z to which transformations must be
140* applied if WANTZ is .TRUE..
141* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
142*
143* Z (global input/output) REAL array.
144* If WANTZ is .TRUE., on entry Z must contain the current
145* matrix Z of transformations accumulated by PDHSEQR, and on
146* exit Z has been updated; transformations are applied only to
147* the submatrix Z(ILOZ:IHIZ,ILO:IHI).
148* If WANTZ is .FALSE., Z is not referenced.
149*
150* DESCZ (global and local input) INTEGER array of dimension DLEN_.
151* The array descriptor for the distributed matrix Z.
152*
153* T (local workspace) REAL array, dimension LDT*NW.
154*
155* LDT (local input) INTEGER
156* The leading dimension of the array T.
157* LDT >= IHI-ILO+1.
158*
159* V (local workspace) REAL array, dimension LDV*NW.
160*
161* LDV (local input) INTEGER
162* The leading dimension of the array V.
163* LDV >= IHI-ILO+1.
164*
165* WORK (local workspace) REAL array, dimension LWORK.
166*
167* LWORK (local input) INTEGER
168* The dimension of the work array WORK.
169* LWORK >= IHI-ILO+1.
170* WORK(LWORK) is a local array and LWORK is assumed big enough.
171* Typically LWORK >= 4*LDS*LDS if this routine is called by
172* PSLAQR1. (LDS = 385, see PSLAQR1)
173*
174* INFO (global output) INTEGER
175* < 0: parameter number -INFO incorrect or inconsistent;
176* = 0: successful exit;
177* > 0: PSLAQR4 failed to compute all the eigenvalues ILO to IHI
178* in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
179* elements i+1:ihi of WR and WI contain those eigenvalues
180* which have been successfully computed.
181*
182* ================================================================
183* Implemented by
184* Meiyue Shao, Department of Computing Science and HPC2N,
185* Umea University, Sweden
186*
187* ================================================================
188* References:
189* B. Kagstrom, D. Kressner, and M. Shao,
190* On Aggressive Early Deflation in Parallel Variants of the QR
191* Algorithm.
192* Para 2010, to appear.
193*
194* ================================================================
195* .. Parameters ..
196 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
197 $ LLD_, MB_, M_, NB_, N_, RSRC_
198 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
199 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
200 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
201 REAL ZERO, ONE
202 PARAMETER ( ZERO = 0.0, one = 1.0 )
203* ..
204* .. Local Scalars ..
205 INTEGER CONTXT, HBL, I, I1, I2, IAFIRST, ICOL, ICOL1,
206 $ ICOL2, II, IROW, IROW1, IROW2, ITMP1, ITMP2,
207 $ ierr, j, jafirst, jj, k, l, lda, ldz, lldtmp,
208 $ mycol, myrow, node, npcol, nprow, nh, nmin, nz,
209 $ hstep, vstep, kkrow, kkcol, kln, ltop, left,
210 $ right, up, down, d1, d2
211* ..
212* .. Local Arrays ..
213 INTEGER DESCT( 9 ), DESCV( 9 ), DESCWH( 9 ),
214 $ DESCWV( 9 )
215* ..
216* .. External Functions ..
217 INTEGER NUMROC, ILAENV
218 EXTERNAL NUMROC, ILAENV
219* ..
220* .. External Subroutines ..
221 EXTERNAL blacs_gridinfo, infog2l, slaset,
222 $ slahqr, slaqr4, descinit, psgemm, psgemr2d,
223 $ sgemm, slamov, sgesd2d, sgerv2d,
224 $ sgebs2d, sgebr2d, igebs2d, igebr2d
225* ..
226* .. Intrinsic Functions ..
227 INTRINSIC max, min, mod
228* ..
229* .. Executable Statements ..
230*
231 info = 0
232*
233 nh = ihi - ilo + 1
234 nz = ihiz - iloz + 1
235 IF( n.EQ.0 .OR. nh.EQ.0 )
236 $ RETURN
237*
238* NODE (IAFIRST,JAFIRST) OWNS A(1,1)
239*
240 hbl = desca( mb_ )
241 contxt = desca( ctxt_ )
242 lda = desca( lld_ )
243 iafirst = desca( rsrc_ )
244 jafirst = desca( csrc_ )
245 ldz = descz( lld_ )
246 CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
247 node = myrow*npcol + mycol
248 left = mod( mycol+npcol-1, npcol )
249 right = mod( mycol+1, npcol )
250 up = mod( myrow+nprow-1, nprow )
251 down = mod( myrow+1, nprow )
252*
253* I1 and I2 are the indices of the first row and last column of A
254* to which transformations must be applied.
255*
256 i = ihi
257 l = ilo
258 IF( wantt ) THEN
259 i1 = 1
260 i2 = n
261 ltop = 1
262 ELSE
263 i1 = l
264 i2 = i
265 ltop = l
266 END IF
267*
268* Copy the diagonal block to local and call LAPACK.
269*
270 CALL infog2l( ilo, ilo, desca, nprow, npcol, myrow, mycol,
271 $ irow, icol, ii, jj )
272 IF ( myrow .EQ. ii ) THEN
273 CALL descinit( desct, nh, nh, nh, nh, ii, jj, contxt,
274 $ ldt, ierr )
275 CALL descinit( descv, nh, nh, nh, nh, ii, jj, contxt,
276 $ ldv, ierr )
277 ELSE
278 CALL descinit( desct, nh, nh, nh, nh, ii, jj, contxt,
279 $ 1, ierr )
280 CALL descinit( descv, nh, nh, nh, nh, ii, jj, contxt,
281 $ 1, ierr )
282 END IF
283 CALL psgemr2d( nh, nh, a, ilo, ilo, desca, t, 1, 1, desct,
284 $ contxt )
285 IF ( myrow .EQ. ii .AND. mycol .EQ. jj ) THEN
286 CALL slaset( 'All', nh, nh, zero, one, v, ldv )
287 nmin = ilaenv( 12, 'SLAQR3', 'SV', nh, 1, nh, lwork )
288 IF( nh .GT. nmin ) THEN
289 CALL slaqr4( .true., .true., nh, 1, nh, t, ldt, wr( ilo ),
290 $ wi( ilo ), 1, nh, v, ldv, work, lwork, info )
291* Clean up the scratch used by SLAQR4.
292 CALL slaset( 'L', nh-2, nh-2, zero, zero, t( 3, 1 ), ldt )
293 ELSE
294 CALL slahqr( .true., .true., nh, 1, nh, t, ldt, wr( ilo ),
295 $ wi( ilo ), 1, nh, v, ldv, info )
296 END IF
297 CALL sgebs2d( contxt, 'All', ' ', nh, nh, v, ldv )
298 CALL igebs2d( contxt, 'All', ' ', 1, 1, info, 1 )
299 ELSE
300 CALL sgebr2d( contxt, 'All', ' ', nh, nh, v, ldv, ii, jj )
301 CALL igebr2d( contxt, 'All', ' ', 1, 1, info, 1, ii, jj )
302 END IF
303 IF( info .NE. 0 ) info = info+ilo-1
304*
305* Copy the local matrix back to the diagonal block.
306*
307 CALL psgemr2d( nh, nh, t, 1, 1, desct, a, ilo, ilo, desca,
308 $ contxt )
309*
310* Update T and Z.
311*
312 IF( mod( ilo-1, hbl )+nh .LE. hbl ) THEN
313*
314* Simplest case: the diagonal block is located on one processor.
315* Call SGEMM directly to perform the update.
316*
317 hstep = lwork / nh
318 vstep = hstep
319*
320 IF( wantt ) THEN
321*
322* Update horizontal slab in A.
323*
324 CALL infog2l( ilo, i+1, desca, nprow, npcol, myrow,
325 $ mycol, irow, icol, ii, jj )
326 IF( myrow .EQ. ii ) THEN
327 icol1 = numroc( n, hbl, mycol, jafirst, npcol )
328 DO 10 kkcol = icol, icol1, hstep
329 kln = min( hstep, icol1-kkcol+1 )
330 CALL sgemm( 'T', 'N', nh, kln, nh, one, v,
331 $ ldv, a( irow+(kkcol-1)*lda ), lda, zero, work,
332 $ nh )
333 CALL slamov( 'A', nh, kln, work, nh,
334 $ a( irow+(kkcol-1)*lda ), lda )
335 10 CONTINUE
336 END IF
337*
338* Update vertical slab in A.
339*
340 CALL infog2l( ltop, ilo, desca, nprow, npcol, myrow,
341 $ mycol, irow, icol, ii, jj )
342 IF( mycol .EQ. jj ) THEN
343 CALL infog2l( ilo-1, ilo, desca, nprow, npcol,
344 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
345 IF( myrow .NE. itmp1 ) irow1 = irow1-1
346 DO 20 kkrow = irow, irow1, vstep
347 kln = min( vstep, irow1-kkrow+1 )
348 CALL sgemm( 'N', 'N', kln, nh, nh, one,
349 $ a( kkrow+(icol-1)*lda ), lda, v, ldv, zero,
350 $ work, kln )
351 CALL slamov( 'A', kln, nh, work, kln,
352 $ a( kkrow+(icol-1)*lda ), lda )
353 20 CONTINUE
354 END IF
355 END IF
356*
357* Update vertical slab in Z.
358*
359 IF( wantz ) THEN
360 CALL infog2l( iloz, ilo, descz, nprow, npcol, myrow,
361 $ mycol, irow, icol, ii, jj )
362 IF( mycol .EQ. jj ) THEN
363 CALL infog2l( ihiz, ilo, descz, nprow, npcol,
364 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
365 IF( myrow .NE. itmp1 ) irow1 = irow1-1
366 DO 30 kkrow = irow, irow1, vstep
367 kln = min( vstep, irow1-kkrow+1 )
368 CALL sgemm( 'N', 'N', kln, nh, nh, one,
369 $ z( kkrow+(icol-1)*ldz ), ldz, v, ldv, zero,
370 $ work, kln )
371 CALL slamov( 'A', kln, nh, work, kln,
372 $ z( kkrow+(icol-1)*ldz ), ldz )
373 30 CONTINUE
374 END IF
375 END IF
376*
377 ELSE IF( mod( ilo-1, hbl )+nh .LE. 2*hbl ) THEN
378*
379* More complicated case: the diagonal block lay on a 2x2
380* processor mesh.
381* Call SGEMM locally and communicate by pair.
382*
383 d1 = hbl - mod( ilo-1, hbl )
384 d2 = nh - d1
385 hstep = lwork / nh
386 vstep = hstep
387*
388 IF( wantt ) THEN
389*
390* Update horizontal slab in A.
391*
392 CALL infog2l( ilo, i+1, desca, nprow, npcol, myrow,
393 $ mycol, irow, icol, ii, jj )
394 IF( myrow .EQ. up ) THEN
395 IF( myrow .EQ. ii ) THEN
396 icol1 = numroc( n, hbl, mycol, jafirst, npcol )
397 DO 40 kkcol = icol, icol1, hstep
398 kln = min( hstep, icol1-kkcol+1 )
399 CALL sgemm( 'T', 'N', nh, kln, nh, one, v,
400 $ nh, a( irow+(kkcol-1)*lda ), lda, zero,
401 $ work, nh )
402 CALL slamov( 'A', nh, kln, work, nh,
403 $ a( irow+(kkcol-1)*lda ), lda )
404 40 CONTINUE
405 END IF
406 ELSE
407 IF( myrow .EQ. ii ) THEN
408 icol1 = numroc( n, hbl, mycol, jafirst, npcol )
409 DO 50 kkcol = icol, icol1, hstep
410 kln = min( hstep, icol1-kkcol+1 )
411 CALL sgemm( 'T', 'N', d2, kln, d1, one,
412 $ v( 1, d1+1 ), ldv, a( irow+(kkcol-1)*lda ),
413 $ lda, zero, work( d1+1 ), nh )
414 CALL sgesd2d( contxt, d2, kln, work( d1+1 ),
415 $ nh, down, mycol )
416 CALL sgerv2d( contxt, d1, kln, work, nh, down,
417 $ mycol )
418 CALL sgemm( 'T', 'N', d1, kln, d1, one,
419 $ v, ldv, a( irow+(kkcol-1)*lda ), lda, one,
420 $ work, nh )
421 CALL slamov( 'A', d1, kln, work, nh,
422 $ a( irow+(kkcol-1)*lda ), lda )
423 50 CONTINUE
424 ELSE IF( up .EQ. ii ) THEN
425 icol1 = numroc( n, hbl, mycol, jafirst, npcol )
426 DO 60 kkcol = icol, icol1, hstep
427 kln = min( hstep, icol1-kkcol+1 )
428 CALL sgemm( 'T', 'N', d1, kln, d2, one,
429 $ v( d1+1, 1 ), ldv, a( irow+(kkcol-1)*lda ),
430 $ lda, zero, work, nh )
431 CALL sgesd2d( contxt, d1, kln, work, nh, up,
432 $ mycol )
433 CALL sgerv2d( contxt, d2, kln, work( d1+1 ),
434 $ nh, up, mycol )
435 CALL sgemm( 'T', 'N', d2, kln, d2, one,
436 $ v( d1+1, d1+1 ), ldv,
437 $ a( irow+(kkcol-1)*lda ), lda, one,
438 $ work( d1+1 ), nh )
439 CALL slamov( 'A', d2, kln, work( d1+1 ), nh,
440 $ a( irow+(kkcol-1)*lda ), lda )
441 60 CONTINUE
442 END IF
443 END IF
444*
445* Update vertical slab in A.
446*
447 CALL infog2l( ltop, ilo, desca, nprow, npcol, myrow,
448 $ mycol, irow, icol, ii, jj )
449 IF( mycol .EQ. left ) THEN
450 IF( mycol .EQ. jj ) THEN
451 CALL infog2l( ilo-1, ilo, desca, nprow, npcol,
452 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
453 IF( myrow .NE. itmp1 ) irow1 = irow1-1
454 DO 70 kkrow = irow, irow1, vstep
455 kln = min( vstep, irow1-kkrow+1 )
456 CALL sgemm( 'N', 'N', kln, nh, nh, one,
457 $ a( kkrow+(icol-1)*lda ), lda, v, ldv,
458 $ zero, work, kln )
459 CALL slamov( 'A', kln, nh, work, kln,
460 $ a( kkrow+(icol-1)*lda ), lda )
461 70 CONTINUE
462 END IF
463 ELSE
464 IF( mycol .EQ. jj ) THEN
465 CALL infog2l( ilo-1, ilo, desca, nprow, npcol,
466 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
467 IF( myrow .NE. itmp1 ) irow1 = irow1-1
468 DO 80 kkrow = irow, irow1, vstep
469 kln = min( vstep, irow1-kkrow+1 )
470 CALL sgemm( 'N', 'N', kln, d2, d1, one,
471 $ a( kkrow+(icol-1)*lda ), lda, v( 1, d1+1 ),
472 $ ldv, zero, work( 1+d1*kln ), kln )
473 CALL sgesd2d( contxt, kln, d2, work( 1+d1*kln ),
474 $ kln, myrow, right )
475 CALL sgerv2d( contxt, kln, d1, work, kln, myrow,
476 $ right )
477 CALL sgemm( 'N', 'N', kln, d1, d1, one,
478 $ a( kkrow+(icol-1)*lda ), lda, v, ldv, one,
479 $ work, kln )
480 CALL slamov( 'A', kln, d1, work, kln,
481 $ a( kkrow+(icol-1)*lda ), lda )
482 80 CONTINUE
483 ELSE IF ( left .EQ. jj ) THEN
484 CALL infog2l( ilo-1, ilo, desca, nprow, npcol,
485 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
486 IF( myrow .NE. itmp1 ) irow1 = irow1-1
487 DO 90 kkrow = irow, irow1, vstep
488 kln = min( vstep, irow1-kkrow+1 )
489 CALL sgemm( 'N', 'N', kln, d1, d2, one,
490 $ a( kkrow+(icol-1)*lda ), lda, v( d1+1, 1 ),
491 $ ldv, zero, work, kln )
492 CALL sgesd2d( contxt, kln, d1, work, kln, myrow,
493 $ left )
494 CALL sgerv2d( contxt, kln, d2, work( 1+d1*kln ),
495 $ kln, myrow, left )
496 CALL sgemm( 'N', 'N', kln, d2, d2, one,
497 $ a( kkrow+(icol-1)*lda ), lda, v( d1+1, d1+1 ),
498 $ ldv, one, work( 1+d1*kln ), kln )
499 CALL slamov( 'A', kln, d2, work( 1+d1*kln ), kln,
500 $ a( kkrow+(icol-1)*lda ), lda )
501 90 CONTINUE
502 END IF
503 END IF
504 END IF
505*
506* Update vertical slab in Z.
507*
508 IF( wantz ) THEN
509 CALL infog2l( iloz, ilo, descz, nprow, npcol, myrow,
510 $ mycol, irow, icol, ii, jj )
511 IF( mycol .EQ. left ) THEN
512 IF( mycol .EQ. jj ) THEN
513 CALL infog2l( ihiz, ilo, descz, nprow, npcol,
514 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
515 IF( myrow .NE. itmp1 ) irow1 = irow1-1
516 DO 100 kkrow = irow, irow1, vstep
517 kln = min( vstep, irow1-kkrow+1 )
518 CALL sgemm( 'N', 'N', kln, nh, nh, one,
519 $ z( kkrow+(icol-1)*ldz ), ldz, v, ldv, zero,
520 $ work, kln )
521 CALL slamov( 'A', kln, nh, work, kln,
522 $ z( kkrow+(icol-1)*ldz ), ldz )
523 100 CONTINUE
524 END IF
525 ELSE
526 IF( mycol .EQ. jj ) THEN
527 CALL infog2l( ihiz, ilo, descz, nprow, npcol,
528 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
529 IF( myrow .NE. itmp1 ) irow1 = irow1-1
530 DO 110 kkrow = irow, irow1, vstep
531 kln = min( vstep, irow1-kkrow+1 )
532 CALL sgemm( 'N', 'N', kln, d2, d1, one,
533 $ z( kkrow+(icol-1)*ldz ), ldz, v( 1, d1+1 ),
534 $ ldv, zero, work( 1+d1*kln ), kln )
535 CALL sgesd2d( contxt, kln, d2, work( 1+d1*kln ),
536 $ kln, myrow, right )
537 CALL sgerv2d( contxt, kln, d1, work, kln, myrow,
538 $ right )
539 CALL sgemm( 'N', 'N', kln, d1, d1, one,
540 $ z( kkrow+(icol-1)*ldz ), ldz, v, ldv, one,
541 $ work, kln )
542 CALL slamov( 'A', kln, d1, work, kln,
543 $ z( kkrow+(icol-1)*ldz ), ldz )
544 110 CONTINUE
545 ELSE IF( left .EQ. jj ) THEN
546 CALL infog2l( ihiz, ilo, descz, nprow, npcol,
547 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
548 IF( myrow .NE. itmp1 ) irow1 = irow1-1
549 DO 120 kkrow = irow, irow1, vstep
550 kln = min( vstep, irow1-kkrow+1 )
551 CALL sgemm( 'N', 'N', kln, d1, d2, one,
552 $ z( kkrow+(icol-1)*ldz ), ldz, v( d1+1, 1 ),
553 $ ldv, zero, work, kln )
554 CALL sgesd2d( contxt, kln, d1, work, kln, myrow,
555 $ left )
556 CALL sgerv2d( contxt, kln, d2, work( 1+d1*kln ),
557 $ kln, myrow, left )
558 CALL sgemm( 'N', 'N', kln, d2, d2, one,
559 $ z( kkrow+(icol-1)*ldz ), ldz,
560 $ v( d1+1, d1+1 ), ldv, one, work( 1+d1*kln ),
561 $ kln )
562 CALL slamov( 'A', kln, d2, work( 1+d1*kln ),
563 $ kln, z( kkrow+(icol-1)*ldz ), ldz )
564 120 CONTINUE
565 END IF
566 END IF
567 END IF
568*
569 ELSE
570*
571* Most complicated case: the diagonal block lay across the border
572* of the processor mesh.
573* Treat V as a distributed matrix and call PSGEMM.
574*
575 hstep = lwork / nh * npcol
576 vstep = lwork / nh * nprow
577 lldtmp = numroc( nh, nh, myrow, 0, nprow )
578 lldtmp = max( 1, lldtmp )
579 CALL descinit( descv, nh, nh, nh, nh, 0, 0, contxt,
580 $ lldtmp, ierr )
581 CALL descinit( descwh, nh, hstep, nh, lwork / nh, 0, 0,
582 $ contxt, lldtmp, ierr )
583*
584 IF( wantt ) THEN
585*
586* Update horizontal slab in A.
587*
588 DO 130 kkcol = i+1, n, hstep
589 kln = min( hstep, n-kkcol+1 )
590 CALL psgemm( 'T', 'N', nh, kln, nh, one, v, 1, 1,
591 $ descv, a, ilo, kkcol, desca, zero, work, 1, 1,
592 $ descwh )
593 CALL psgemr2d( nh, kln, work, 1, 1, descwh, a,
594 $ ilo, kkcol, desca, contxt )
595 130 CONTINUE
596*
597* Update vertical slab in A.
598*
599 DO 140 kkrow = ltop, ilo-1, vstep
600 kln = min( vstep, ilo-kkrow )
601 lldtmp = numroc( kln, lwork / nh, myrow, 0, nprow )
602 lldtmp = max( 1, lldtmp )
603 CALL descinit( descwv, kln, nh, lwork / nh, nh, 0, 0,
604 $ contxt, lldtmp, ierr )
605 CALL psgemm( 'N', 'N', kln, nh, nh, one, a, kkrow,
606 $ ilo, desca, v, 1, 1, descv, zero, work, 1, 1,
607 $ descwv )
608 CALL psgemr2d( kln, nh, work, 1, 1, descwv, a, kkrow,
609 $ ilo, desca, contxt )
610 140 CONTINUE
611 END IF
612*
613* Update vertical slab in Z.
614*
615 IF( wantz ) THEN
616 DO 150 kkrow = iloz, ihiz, vstep
617 kln = min( vstep, ihiz-kkrow+1 )
618 lldtmp = numroc( kln, lwork / nh, myrow, 0, nprow )
619 lldtmp = max( 1, lldtmp )
620 CALL descinit( descwv, kln, nh, lwork / nh, nh, 0, 0,
621 $ contxt, lldtmp, ierr )
622 CALL psgemm( 'N', 'N', kln, nh, nh, one, z, kkrow,
623 $ ilo, descz, v, 1, 1, descv, zero, work, 1, 1,
624 $ descwv )
625 CALL psgemr2d( kln, nh, work, 1, 1, descwv, z,
626 $ kkrow, ilo, descz, contxt )
627 150 CONTINUE
628 END IF
629 END IF
630*
631* END OF PSLAQR4
632*
633 END
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pslaqr4(wantt, wantz, n, ilo, ihi, a, desca, wr, wi, iloz, ihiz, z, descz, t, ldt, v, ldv, work, lwork, info)
Definition pslaqr4.f:4