SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdlaqr2.f
Go to the documentation of this file.
1 SUBROUTINE pdlaqr2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA,
2 $ ILOZ, IHIZ, Z, DESCZ, NS, ND, SR, SI, T, LDT,
3 $ V, LDV, WR, WI, WORK, LWORK )
4*
5* Contribution from the Department of Computing Science and HPC2N,
6* Umea University, Sweden
7*
8* -- ScaLAPACK routine (version 2.0.2) --
9* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10* May 1 2012
11*
12 IMPLICIT NONE
13*
14* .. Scalar Arguments ..
15 INTEGER IHIZ, ILOZ, KBOT, KTOP, LDT, LDV, LWORK, N, ND,
16 $ NS, NW
17 LOGICAL WANTT, WANTZ
18* ..
19* .. Array Arguments ..
20 INTEGER DESCA( * ), DESCZ( * )
21 DOUBLE PRECISION A( * ), SI( KBOT ), SR( KBOT ), T( LDT, * ),
22 $ v( ldv, * ), work( * ), wi( * ), wr( * ),
23 $ z( * )
24* ..
25*
26* Purpose
27* =======
28*
29* Aggressive early deflation:
30*
31* PDLAQR2 accepts as input an upper Hessenberg matrix A and performs an
32* orthogonal similarity transformation designed to detect and deflate
33* fully converged eigenvalues from a trailing principal submatrix. On
34* output A has been overwritten by a new Hessenberg matrix that is a
35* perturbation of an orthogonal similarity transformation of A. It is
36* to be hoped that the final version of H has many zero subdiagonal
37* entries.
38*
39* This routine handles small deflation windows which is affordable by
40* one processor. Normally, it is called by PDLAQR1. All the inputs are
41* assumed to be valid without checking.
42*
43* Notes
44* =====
45*
46* Each global data object is described by an associated description
47* vector. This vector stores the information required to establish
48* the mapping between an object element and its corresponding process
49* and memory location.
50*
51* Let A be a generic term for any 2D block cyclicly distributed array.
52* Such a global array has an associated description vector DESCA.
53* In the following comments, the character _ should be read as
54* "of the global array".
55*
56* NOTATION STORED IN EXPLANATION
57* --------------- -------------- --------------------------------------
58* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
59* DTYPE_A = 1.
60* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
61* the BLACS process grid A is distribu-
62* ted over. The context itself is glo-
63* bal, but the handle (the integer
64* value) may vary.
65* M_A (global) DESCA( M_ ) The number of rows in the global
66* array A.
67* N_A (global) DESCA( N_ ) The number of columns in the global
68* array A.
69* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
70* the rows of the array.
71* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
72* the columns of the array.
73* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
74* row of the array A is distributed.
75* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
76* first column of the array A is
77* distributed.
78* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
79* array. LLD_A >= MAX(1,LOCr(M_A)).
80*
81* Let K be the number of rows or columns of a distributed matrix,
82* and assume that its process grid has dimension p x q.
83* LOCr( K ) denotes the number of elements of K that a process
84* would receive if K were distributed over the p processes of its
85* process column.
86* Similarly, LOCc( K ) denotes the number of elements of K that a
87* process would receive if K were distributed over the q processes of
88* its process row.
89* The values of LOCr() and LOCc() may be determined via a call to the
90* ScaLAPACK tool function, NUMROC:
91* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
92* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
93* An upper bound for these quantities may be computed by:
94* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
95* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
96*
97* Arguments
98* =========
99*
100* WANTT (global input) LOGICAL
101* If .TRUE., then the Hessenberg matrix H is fully updated
102* so that the quasi-triangular Schur factor may be
103* computed (in cooperation with the calling subroutine).
104* If .FALSE., then only enough of H is updated to preserve
105* the eigenvalues.
106*
107* WANTZ (global input) LOGICAL
108* If .TRUE., then the orthogonal matrix Z is updated so
109* so that the orthogonal Schur factor may be computed
110* (in cooperation with the calling subroutine).
111* If .FALSE., then Z is not referenced.
112*
113* N (global input) INTEGER
114* The order of the matrix H and (if WANTZ is .TRUE.) the
115* order of the orthogonal matrix Z.
116*
117* KTOP (global input) INTEGER
118* KBOT (global input) INTEGER
119* It is assumed without a check that either
120* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
121* determine an isolated block along the diagonal of the
122* Hessenberg matrix. However, H(KTOP,KTOP-1)=0 is not
123* essentially necessary if WANTT is .TRUE. .
124*
125* NW (global input) INTEGER
126* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
127* Normally NW .GE. 3 if PDLAQR2 is called by PDLAQR1.
128*
129* A (local input/output) DOUBLE PRECISION array, dimension
130* (DESCH(LLD_),*)
131* On input the initial N-by-N section of A stores the
132* Hessenberg matrix undergoing aggressive early deflation.
133* On output A has been transformed by an orthogonal
134* similarity transformation, perturbed, and the returned
135* to Hessenberg form that (it is to be hoped) has some
136* zero subdiagonal entries.
137*
138* DESCA (global and local input) INTEGER array of dimension DLEN_.
139* The array descriptor for the distributed matrix A.
140*
141* ILOZ (global input) INTEGER
142* IHIZ (global input) INTEGER
143* Specify the rows of Z to which transformations must be
144* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
145*
146* Z (input/output) DOUBLE PRECISION array, dimension
147* (DESCH(LLD_),*)
148* IF WANTZ is .TRUE., then on output, the orthogonal
149* similarity transformation mentioned above has been
150* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
151* If WANTZ is .FALSE., then Z is unreferenced.
152*
153* DESCZ (global and local input) INTEGER array of dimension DLEN_.
154* The array descriptor for the distributed matrix Z.
155*
156* NS (global output) INTEGER
157* The number of unconverged (ie approximate) eigenvalues
158* returned in SR and SI that may be used as shifts by the
159* calling subroutine.
160*
161* ND (global output) INTEGER
162* The number of converged eigenvalues uncovered by this
163* subroutine.
164*
165* SR (global output) DOUBLE PRECISION array, dimension KBOT
166* SI (global output) DOUBLE PRECISION array, dimension KBOT
167* On output, the real and imaginary parts of approximate
168* eigenvalues that may be used for shifts are stored in
169* SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
170* SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
171* On proc #0, the real and imaginary parts of converged
172* eigenvalues are stored in SR(KBOT-ND+1) through SR(KBOT) and
173* SI(KBOT-ND+1) through SI(KBOT), respectively. On other
174* processors, these entries are set to zero.
175*
176* T (local workspace) DOUBLE PRECISION array, dimension LDT*NW.
177*
178* LDT (local input) INTEGER
179* The leading dimension of the array T.
180* LDT >= NW.
181*
182* V (local workspace) DOUBLE PRECISION array, dimension LDV*NW.
183*
184* LDV (local input) INTEGER
185* The leading dimension of the array V.
186* LDV >= NW.
187*
188* WR (local workspace) DOUBLE PRECISION array, dimension KBOT.
189* WI (local workspace) DOUBLE PRECISION array, dimension KBOT.
190*
191* WORK (local workspace) DOUBLE PRECISION array, dimension LWORK.
192*
193* LWORK (local input) INTEGER
194* WORK(LWORK) is a local array and LWORK is assumed big enough
195* so that LWORK >= NW*NW.
196*
197* ================================================================
198* Implemented by
199* Meiyue Shao, Department of Computing Science and HPC2N,
200* Umea University, Sweden
201*
202* ================================================================
203* References:
204* B. Kagstrom, D. Kressner, and M. Shao,
205* On Aggressive Early Deflation in Parallel Variants of the QR
206* Algorithm.
207* Para 2010, to appear.
208*
209* ================================================================
210* .. Parameters ..
211 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
212 $ LLD_, MB_, M_, NB_, N_, RSRC_
213 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
214 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
215 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
216 DOUBLE PRECISION ZERO, ONE
217 PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
218* ..
219* .. Local Scalars ..
220 INTEGER CONTXT, HBL, I, I1, I2, IAFIRST, ICOL, ICOL1,
221 $ ICOL2, INFO, II, IROW, IROW1, IROW2, ITMP1,
222 $ itmp2, j, jafirst, jj, k, l, lda, ldz, lldtmp,
223 $ mycol, myrow, node, npcol, nprow, dblk,
224 $ hstep, vstep, kkrow, kkcol, kln, ltop, left,
225 $ right, up, down, d1, d2
226* ..
227* .. Local Arrays ..
228 INTEGER DESCT( 9 ), DESCV( 9 ), DESCWH( 9 ),
229 $ DESCWV( 9 )
230* ..
231* .. External Functions ..
232 INTEGER NUMROC
233 EXTERNAL NUMROC
234* ..
235* .. External Subroutines ..
236 EXTERNAL blacs_gridinfo, infog2l, dlaset,
237 $ dlaqr3, descinit, pdgemm, pdgemr2d, dgemm,
238 $ dlamov, dgesd2d, dgerv2d, dgebs2d, dgebr2d,
239 $ igebs2d, igebr2d
240* ..
241* .. Intrinsic Functions ..
242 INTRINSIC max, min, mod
243* ..
244* .. Executable Statements ..
245*
246 info = 0
247*
248 IF( n.EQ.0 )
249 $ RETURN
250*
251* NODE (IAFIRST,JAFIRST) OWNS A(1,1)
252*
253 hbl = desca( mb_ )
254 contxt = desca( ctxt_ )
255 lda = desca( lld_ )
256 iafirst = desca( rsrc_ )
257 jafirst = desca( csrc_ )
258 ldz = descz( lld_ )
259 CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
260 node = myrow*npcol + mycol
261 left = mod( mycol+npcol-1, npcol )
262 right = mod( mycol+1, npcol )
263 up = mod( myrow+nprow-1, nprow )
264 down = mod( myrow+1, nprow )
265*
266* I1 and I2 are the indices of the first row and last column of A
267* to which transformations must be applied.
268*
269 i = kbot
270 l = ktop
271 IF( wantt ) THEN
272 i1 = 1
273 i2 = n
274 ltop = 1
275 ELSE
276 i1 = l
277 i2 = i
278 ltop = l
279 END IF
280*
281* Begin Aggressive Early Deflation.
282*
283 dblk = nw
284 CALL infog2l( i-dblk+1, i-dblk+1, desca, nprow, npcol, myrow,
285 $ mycol, irow, icol, ii, jj )
286 IF ( myrow .EQ. ii ) THEN
287 CALL descinit( desct, dblk, dblk, dblk, dblk, ii, jj, contxt,
288 $ ldt, info )
289 CALL descinit( descv, dblk, dblk, dblk, dblk, ii, jj, contxt,
290 $ ldv, info )
291 ELSE
292 CALL descinit( desct, dblk, dblk, dblk, dblk, ii, jj, contxt,
293 $ 1, info )
294 CALL descinit( descv, dblk, dblk, dblk, dblk, ii, jj, contxt,
295 $ 1, info )
296 END IF
297 CALL pdgemr2d( dblk, dblk, a, i-dblk+1, i-dblk+1, desca, t, 1, 1,
298 $ desct, contxt )
299 IF ( myrow .EQ. ii .AND. mycol .EQ. jj ) THEN
300 CALL dlaset( 'All', dblk, dblk, zero, one, v, ldv )
301 CALL dlaqr3( .true., .true., dblk, 1, dblk, dblk-1, t, ldt, 1,
302 $ dblk, v, ldv, ns, nd, wr, wi, work, dblk, dblk,
303 $ work( dblk*dblk+1 ), dblk, dblk, work( 2*dblk*dblk+1 ),
304 $ dblk, work( 3*dblk*dblk+1 ), lwork-3*dblk*dblk )
305 CALL dgebs2d( contxt, 'All', ' ', dblk, dblk, v, ldv )
306 CALL igebs2d( contxt, 'All', ' ', 1, 1, nd, 1 )
307 ELSE
308 CALL dgebr2d( contxt, 'All', ' ', dblk, dblk, v, ldv, ii, jj )
309 CALL igebr2d( contxt, 'All', ' ', 1, 1, nd, 1, ii, jj )
310 END IF
311*
312 IF( nd .GT. 0 ) THEN
313*
314* Copy the local matrix back to the diagonal block.
315*
316 CALL pdgemr2d( dblk, dblk, t, 1, 1, desct, a, i-dblk+1,
317 $ i-dblk+1, desca, contxt )
318*
319* Update T and Z.
320*
321 IF( mod( i-dblk, hbl )+dblk .LE. hbl ) THEN
322*
323* Simplest case: the deflation window is located on one
324* processor.
325* Call DGEMM directly to perform the update.
326*
327 hstep = lwork / dblk
328 vstep = hstep
329*
330* Update horizontal slab in A.
331*
332 IF( wantt ) THEN
333 CALL infog2l( i-dblk+1, i+1, desca, nprow, npcol, myrow,
334 $ mycol, irow, icol, ii, jj )
335 IF( myrow .EQ. ii ) THEN
336 icol1 = numroc( n, hbl, mycol, jafirst, npcol )
337 DO 10 kkcol = icol, icol1, hstep
338 kln = min( hstep, icol1-kkcol+1 )
339 CALL dgemm( 'T', 'N', dblk, kln, dblk, one, v,
340 $ ldv, a( irow+(kkcol-1)*lda ), lda, zero, work,
341 $ dblk )
342 CALL dlamov( 'A', dblk, kln, work, dblk,
343 $ a( irow+(kkcol-1)*lda ), lda )
344 10 CONTINUE
345 END IF
346 END IF
347*
348* Update vertical slab in A.
349*
350 CALL infog2l( ltop, i-dblk+1, desca, nprow, npcol, myrow,
351 $ mycol, irow, icol, ii, jj )
352 IF( mycol .EQ. jj ) THEN
353 CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,
354 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
355 IF( myrow .NE. itmp1 ) irow1 = irow1-1
356 DO 20 kkrow = irow, irow1, vstep
357 kln = min( vstep, irow1-kkrow+1 )
358 CALL dgemm( 'N', 'N', kln, dblk, dblk, one,
359 $ a( kkrow+(icol-1)*lda ), lda, v, ldv, zero, work,
360 $ kln )
361 CALL dlamov( 'A', kln, dblk, work, kln,
362 $ a( kkrow+(icol-1)*lda ), lda )
363 20 CONTINUE
364 END IF
365*
366* Update vertical slab in Z.
367*
368 IF( wantz ) THEN
369 CALL infog2l( iloz, i-dblk+1, descz, nprow, npcol, myrow,
370 $ mycol, irow, icol, ii, jj )
371 IF( mycol .EQ. jj ) THEN
372 CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,
373 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
374 IF( myrow .NE. itmp1 ) irow1 = irow1-1
375 DO 30 kkrow = irow, irow1, vstep
376 kln = min( vstep, irow1-kkrow+1 )
377 CALL dgemm( 'N', 'N', kln, dblk, dblk, one,
378 $ z( kkrow+(icol-1)*ldz ), ldz, v, ldv, zero,
379 $ work, kln )
380 CALL dlamov( 'A', kln, dblk, work, kln,
381 $ z( kkrow+(icol-1)*ldz ), ldz )
382 30 CONTINUE
383 END IF
384 END IF
385*
386 ELSE IF( mod( i-dblk, hbl )+dblk .LE. 2*hbl ) THEN
387*
388* More complicated case: the deflation window lay on a 2x2
389* processor mesh.
390* Call DGEMM locally and communicate by pair.
391*
392 d1 = hbl - mod( i-dblk, hbl )
393 d2 = dblk - d1
394 hstep = lwork / dblk
395 vstep = hstep
396*
397* Update horizontal slab in A.
398*
399 IF( wantt ) THEN
400 CALL infog2l( i-dblk+1, i+1, desca, nprow, npcol, myrow,
401 $ mycol, irow, icol, ii, jj )
402 IF( myrow .EQ. up ) THEN
403 IF( myrow .EQ. ii ) THEN
404 icol1 = numroc( n, hbl, mycol, jafirst, npcol )
405 DO 40 kkcol = icol, icol1, hstep
406 kln = min( hstep, icol1-kkcol+1 )
407 CALL dgemm( 'T', 'N', dblk, kln, dblk, one, v,
408 $ dblk, a( irow+(kkcol-1)*lda ), lda, zero,
409 $ work, dblk )
410 CALL dlamov( 'A', dblk, kln, work, dblk,
411 $ a( irow+(kkcol-1)*lda ), lda )
412 40 CONTINUE
413 END IF
414 ELSE
415 IF( myrow .EQ. ii ) THEN
416 icol1 = numroc( n, hbl, mycol, jafirst, npcol )
417 DO 50 kkcol = icol, icol1, hstep
418 kln = min( hstep, icol1-kkcol+1 )
419 CALL dgemm( 'T', 'N', d2, kln, d1, one,
420 $ v( 1, d1+1 ), ldv, a( irow+(kkcol-1)*lda ),
421 $ lda, zero, work( d1+1 ), dblk )
422 CALL dgesd2d( contxt, d2, kln, work( d1+1 ),
423 $ dblk, down, mycol )
424 CALL dgerv2d( contxt, d1, kln, work, dblk, down,
425 $ mycol )
426 CALL dgemm( 'T', 'N', d1, kln, d1, one,
427 $ v, ldv, a( irow+(kkcol-1)*lda ), lda, one,
428 $ work, dblk )
429 CALL dlamov( 'A', d1, kln, work, dblk,
430 $ a( irow+(kkcol-1)*lda ), lda )
431 50 CONTINUE
432 ELSE IF( up .EQ. ii ) THEN
433 icol1 = numroc( n, hbl, mycol, jafirst, npcol )
434 DO 60 kkcol = icol, icol1, hstep
435 kln = min( hstep, icol1-kkcol+1 )
436 CALL dgemm( 'T', 'N', d1, kln, d2, one,
437 $ v( d1+1, 1 ), ldv, a( irow+(kkcol-1)*lda ),
438 $ lda, zero, work, dblk )
439 CALL dgesd2d( contxt, d1, kln, work, dblk, up,
440 $ mycol )
441 CALL dgerv2d( contxt, d2, kln, work( d1+1 ),
442 $ dblk, up, mycol )
443 CALL dgemm( 'T', 'N', d2, kln, d2, one,
444 $ v( d1+1, d1+1 ), ldv,
445 $ a( irow+(kkcol-1)*lda ), lda, one,
446 $ work( d1+1 ), dblk )
447 CALL dlamov( 'A', d2, kln, work( d1+1 ), dblk,
448 $ a( irow+(kkcol-1)*lda ), lda )
449 60 CONTINUE
450 END IF
451 END IF
452 END IF
453*
454* Update vertical slab in A.
455*
456 CALL infog2l( ltop, i-dblk+1, desca, nprow, npcol, myrow,
457 $ mycol, irow, icol, ii, jj )
458 IF( mycol .EQ. left ) THEN
459 IF( mycol .EQ. jj ) THEN
460 CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,
461 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
462 IF( myrow .NE. itmp1 ) irow1 = irow1-1
463 DO 70 kkrow = irow, irow1, vstep
464 kln = min( vstep, irow1-kkrow+1 )
465 CALL dgemm( 'N', 'N', kln, dblk, dblk, one,
466 $ a( kkrow+(icol-1)*lda ), lda, v, ldv, zero,
467 $ work, kln )
468 CALL dlamov( 'A', kln, dblk, work, kln,
469 $ a( kkrow+(icol-1)*lda ), lda )
470 70 CONTINUE
471 END IF
472 ELSE
473 IF( mycol .EQ. jj ) THEN
474 CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,
475 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
476 IF( myrow .NE. itmp1 ) irow1 = irow1-1
477 DO 80 kkrow = irow, irow1, vstep
478 kln = min( vstep, irow1-kkrow+1 )
479 CALL dgemm( 'N', 'N', kln, d2, d1, one,
480 $ a( kkrow+(icol-1)*lda ), lda,
481 $ v( 1, d1+1 ), ldv, zero, work( 1+d1*kln ),
482 $ kln )
483 CALL dgesd2d( contxt, kln, d2, work( 1+d1*kln ),
484 $ kln, myrow, right )
485 CALL dgerv2d( contxt, kln, d1, work, kln, myrow,
486 $ right )
487 CALL dgemm( 'N', 'N', kln, d1, d1, one,
488 $ a( kkrow+(icol-1)*lda ), lda, v, ldv, one,
489 $ work, kln )
490 CALL dlamov( 'A', kln, d1, work, kln,
491 $ a( kkrow+(icol-1)*lda ), lda )
492 80 CONTINUE
493 ELSE IF ( left .EQ. jj ) THEN
494 CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,
495 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
496 IF( myrow .NE. itmp1 ) irow1 = irow1-1
497 DO 90 kkrow = irow, irow1, vstep
498 kln = min( vstep, irow1-kkrow+1 )
499 CALL dgemm( 'N', 'N', kln, d1, d2, one,
500 $ a( kkrow+(icol-1)*lda ), lda, v( d1+1, 1 ),
501 $ ldv, zero, work, kln )
502 CALL dgesd2d( contxt, kln, d1, work, kln, myrow,
503 $ left )
504 CALL dgerv2d( contxt, kln, d2, work( 1+d1*kln ),
505 $ kln, myrow, left )
506 CALL dgemm( 'N', 'N', kln, d2, d2, one,
507 $ a( kkrow+(icol-1)*lda ), lda, v( d1+1, d1+1 ),
508 $ ldv, one, work( 1+d1*kln ), kln )
509 CALL dlamov( 'A', kln, d2, work( 1+d1*kln ), kln,
510 $ a( kkrow+(icol-1)*lda ), lda )
511 90 CONTINUE
512 END IF
513 END IF
514*
515* Update vertical slab in Z.
516*
517 IF( wantz ) THEN
518 CALL infog2l( iloz, i-dblk+1, descz, nprow, npcol, myrow,
519 $ mycol, irow, icol, ii, jj )
520 IF( mycol .EQ. left ) THEN
521 IF( mycol .EQ. jj ) THEN
522 CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,
523 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
524 IF( myrow .NE. itmp1 ) irow1 = irow1-1
525 DO 100 kkrow = irow, irow1, vstep
526 kln = min( vstep, irow1-kkrow+1 )
527 CALL dgemm( 'N', 'N', kln, dblk, dblk, one,
528 $ z( kkrow+(icol-1)*ldz ), ldz, v, ldv, zero,
529 $ work, kln )
530 CALL dlamov( 'A', kln, dblk, work, kln,
531 $ z( kkrow+(icol-1)*ldz ), ldz )
532 100 CONTINUE
533 END IF
534 ELSE
535 IF( mycol .EQ. jj ) THEN
536 CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,
537 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
538 IF( myrow .NE. itmp1 ) irow1 = irow1-1
539 DO 110 kkrow = irow, irow1, vstep
540 kln = min( vstep, irow1-kkrow+1 )
541 CALL dgemm( 'N', 'N', kln, d2, d1, one,
542 $ z( kkrow+(icol-1)*ldz ), ldz,
543 $ v( 1, d1+1 ), ldv, zero, work( 1+d1*kln ),
544 $ kln )
545 CALL dgesd2d( contxt, kln, d2, work( 1+d1*kln ),
546 $ kln, myrow, right )
547 CALL dgerv2d( contxt, kln, d1, work, kln, myrow,
548 $ right )
549 CALL dgemm( 'N', 'N', kln, d1, d1, one,
550 $ z( kkrow+(icol-1)*ldz ), ldz, v, ldv, one,
551 $ work, kln )
552 CALL dlamov( 'A', kln, d1, work, kln,
553 $ z( kkrow+(icol-1)*ldz ), ldz )
554 110 CONTINUE
555 ELSE IF( left .EQ. jj ) THEN
556 CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,
557 $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
558 IF( myrow .NE. itmp1 ) irow1 = irow1-1
559 DO 120 kkrow = irow, irow1, vstep
560 kln = min( vstep, irow1-kkrow+1 )
561 CALL dgemm( 'N', 'N', kln, d1, d2, one,
562 $ z( kkrow+(icol-1)*ldz ), ldz,
563 $ v( d1+1, 1 ), ldv, zero, work, kln )
564 CALL dgesd2d( contxt, kln, d1, work, kln, myrow,
565 $ left )
566 CALL dgerv2d( contxt, kln, d2, work( 1+d1*kln ),
567 $ kln, myrow, left )
568 CALL dgemm( 'N', 'N', kln, d2, d2, one,
569 $ z( kkrow+(icol-1)*ldz ), ldz,
570 $ v( d1+1, d1+1 ), ldv, one,
571 $ work( 1+d1*kln ), kln )
572 CALL dlamov( 'A', kln, d2, work( 1+d1*kln ),
573 $ kln, z( kkrow+(icol-1)*ldz ), ldz )
574 120 CONTINUE
575 END IF
576 END IF
577 END IF
578*
579 ELSE
580*
581* Most complicated case: the deflation window lay across the
582* border of the processor mesh.
583* Treat V as a distributed matrix and call PDGEMM.
584*
585 hstep = lwork / dblk * npcol
586 vstep = lwork / dblk * nprow
587 lldtmp = numroc( dblk, dblk, myrow, 0, nprow )
588 lldtmp = max( 1, lldtmp )
589 CALL descinit( descv, dblk, dblk, dblk, dblk, 0, 0, contxt,
590 $ lldtmp, info )
591 CALL descinit( descwh, dblk, hstep, dblk, lwork / dblk, 0,
592 $ 0, contxt, lldtmp, info )
593*
594* Update horizontal slab in A.
595*
596 IF( wantt ) THEN
597 DO 130 kkcol = i+1, n, hstep
598 kln = min( hstep, n-kkcol+1 )
599 CALL pdgemm( 'T', 'N', dblk, kln, dblk, one, v, 1, 1,
600 $ descv, a, i-dblk+1, kkcol, desca, zero, work, 1,
601 $ 1, descwh )
602 CALL pdgemr2d( dblk, kln, work, 1, 1, descwh, a,
603 $ i-dblk+1, kkcol, desca, contxt )
604 130 CONTINUE
605 END IF
606*
607* Update vertical slab in A.
608*
609 DO 140 kkrow = ltop, i-dblk, vstep
610 kln = min( vstep, i-dblk-kkrow+1 )
611 lldtmp = numroc( kln, lwork / dblk, myrow, 0, nprow )
612 lldtmp = max( 1, lldtmp )
613 CALL descinit( descwv, kln, dblk, lwork / dblk, dblk, 0,
614 $ 0, contxt, lldtmp, info )
615 CALL pdgemm( 'N', 'N', kln, dblk, dblk, one, a, kkrow,
616 $ i-dblk+1, desca, v, 1, 1, descv, zero, work, 1, 1,
617 $ descwv )
618 CALL pdgemr2d( kln, dblk, work, 1, 1, descwv, a, kkrow,
619 $ i-dblk+1, desca, contxt )
620 140 CONTINUE
621*
622* Update vertical slab in Z.
623*
624 IF( wantz ) THEN
625 DO 150 kkrow = iloz, ihiz, vstep
626 kln = min( vstep, ihiz-kkrow+1 )
627 lldtmp = numroc( kln, lwork / dblk, myrow, 0, nprow )
628 lldtmp = max( 1, lldtmp )
629 CALL descinit( descwv, kln, dblk, lwork / dblk, dblk,
630 $ 0, 0, contxt, lldtmp, info )
631 CALL pdgemm( 'N', 'N', kln, dblk, dblk, one, z, kkrow,
632 $ i-dblk+1, descz, v, 1, 1, descv, zero, work, 1,
633 $ 1, descwv )
634 CALL pdgemr2d( kln, dblk, work, 1, 1, descwv, z,
635 $ kkrow, i-dblk+1, descz, contxt )
636 150 CONTINUE
637 END IF
638 END IF
639*
640* Extract converged eigenvalues.
641*
642 ii = 0
643 160 CONTINUE
644 IF( ii .EQ. nd-1 .OR. wi( dblk-ii ) .EQ. zero ) THEN
645 IF( node .EQ. 0 ) THEN
646 sr( i-ii ) = wr( dblk-ii )
647 ELSE
648 sr( i-ii ) = zero
649 END IF
650 si( i-ii ) = zero
651 ii = ii + 1
652 ELSE
653 IF( node .EQ. 0 ) THEN
654 sr( i-ii-1 ) = wr( dblk-ii-1 )
655 sr( i-ii ) = wr( dblk-ii )
656 si( i-ii-1 ) = wi( dblk-ii-1 )
657 si( i-ii ) = wi( dblk-ii )
658 ELSE
659 sr( i-ii-1 ) = zero
660 sr( i-ii ) = zero
661 si( i-ii-1 ) = zero
662 si( i-ii ) = zero
663 END IF
664 ii = ii + 2
665 END IF
666 IF( ii .LT. nd ) GOTO 160
667 END IF
668*
669* END OF PDLAQR2
670*
671 END
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdlaqr2(wantt, wantz, n, ktop, kbot, nw, a, desca, iloz, ihiz, z, descz, ns, nd, sr, si, t, ldt, v, ldv, wr, wi, work, lwork)
Definition pdlaqr2.f:4