SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdlaqr1.f
Go to the documentation of this file.
1 RECURSIVE SUBROUTINE pdlaqr1( WANTT, WANTZ, N, ILO, IHI, A,
2 $ DESCA, WR, WI, ILOZ, IHIZ, Z,
3 $ DESCZ, WORK, LWORK, IWORK,
4 $ ILWORK, INFO )
5*
6* Contribution from the Department of Computing Science and HPC2N,
7* Umea University, Sweden
8*
9* -- ScaLAPACK auxiliary routine (version 2.0.1) --
10* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
11* Univ. of Colorado Denver and University of California, Berkeley.
12* January, 2012
13*
14 IMPLICIT NONE
15*
16* .. Scalar Arguments ..
17 LOGICAL wantt, wantz
18 INTEGER ihi, ihiz, ilo, iloz, ilwork, info, lwork, n
19* ..
20* .. Array Arguments ..
21 INTEGER desca( * ), descz( * ), iwork( * )
22 DOUBLE PRECISION a( * ), wi( * ), work( * ), wr( * ), z( * )
23* ..
24*
25* Purpose
26* =======
27*
28* PDLAQR1 is an auxiliary routine used to find the Schur decomposition
29* and or eigenvalues of a matrix already in Hessenberg form from
30* cols ILO to IHI.
31*
32* This is a modified version of PDLAHQR from ScaLAPACK version 1.7.3.
33* The following modifications were made:
34* o Recently removed workspace query functionality was added.
35* o Aggressive early deflation is implemented.
36* o Aggressive deflation (looking for two consecutive small
37* subdiagonal elements by PDLACONSB) is abandoned.
38* o The returned Schur form is now in canonical form, i.e., the
39* returned 2-by-2 blocks really correspond to complex conjugate
40* pairs of eigenvalues.
41* o For some reason, the original version of PDLAHQR sometimes did
42* not read out the converged eigenvalues correclty. This is now
43* fixed.
44*
45* Notes
46* =====
47*
48* Each global data object is described by an associated description
49* vector. This vector stores the information required to establish
50* the mapping between an object element and its corresponding process
51* and memory location.
52*
53* Let A be a generic term for any 2D block cyclicly distributed array.
54* Such a global array has an associated description vector DESCA.
55* In the following comments, the character _ should be read as
56* "of the global array".
57*
58* NOTATION STORED IN EXPLANATION
59* --------------- -------------- --------------------------------------
60* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
61* DTYPE_A = 1.
62* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
63* the BLACS process grid A is distribu-
64* ted over. The context itself is glo-
65* bal, but the handle (the integer
66* value) may vary.
67* M_A (global) DESCA( M_ ) The number of rows in the global
68* array A.
69* N_A (global) DESCA( N_ ) The number of columns in the global
70* array A.
71* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
72* the rows of the array.
73* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
74* the columns of the array.
75* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
76* row of the array A is distributed.
77* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
78* first column of the array A is
79* distributed.
80* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
81* array. LLD_A >= MAX(1,LOCr(M_A)).
82*
83* Let K be the number of rows or columns of a distributed matrix,
84* and assume that its process grid has dimension p x q.
85* LOCr( K ) denotes the number of elements of K that a process
86* would receive if K were distributed over the p processes of its
87* process column.
88* Similarly, LOCc( K ) denotes the number of elements of K that a
89* process would receive if K were distributed over the q processes of
90* its process row.
91* The values of LOCr() and LOCc() may be determined via a call to the
92* ScaLAPACK tool function, NUMROC:
93* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
94* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
95* An upper bound for these quantities may be computed by:
96* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
97* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
98*
99* Arguments
100* =========
101*
102* WANTT (global input) LOGICAL
103* = .TRUE. : the full Schur form T is required;
104* = .FALSE.: only eigenvalues are required.
105*
106* WANTZ (global input) LOGICAL
107* = .TRUE. : the matrix of Schur vectors Z is required;
108* = .FALSE.: Schur vectors are not required.
109*
110* N (global input) INTEGER
111* The order of the Hessenberg matrix A (and Z if WANTZ).
112* N >= 0.
113*
114* ILO (global input) INTEGER
115* IHI (global input) INTEGER
116* It is assumed that A is already upper quasi-triangular in
117* rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless
118* ILO = 1). PDLAQR1 works primarily with the Hessenberg
119* submatrix in rows and columns ILO to IHI, but applies
120* transformations to all of H if WANTT is .TRUE..
121* 1 <= ILO <= max(1,IHI); IHI <= N.
122*
123* A (global input/output) DOUBLE PRECISION array, dimension
124* (DESCA(LLD_),*)
125* On entry, the upper Hessenberg matrix A.
126* On exit, if WANTT is .TRUE., A is upper quasi-triangular in
127* rows and columns ILO:IHI, with any 2-by-2 or larger diagonal
128* blocks not yet in standard form. If WANTT is .FALSE., the
129* contents of A are unspecified on exit.
130*
131* DESCA (global and local input) INTEGER array of dimension DLEN_.
132* The array descriptor for the distributed matrix A.
133*
134* WR (global replicated output) DOUBLE PRECISION array,
135* dimension (N)
136* WI (global replicated output) DOUBLE PRECISION array,
137* dimension (N)
138* The real and imaginary parts, respectively, of the computed
139* eigenvalues ILO to IHI are stored in the corresponding
140* elements of WR and WI. If two eigenvalues are computed as a
141* complex conjugate pair, they are stored in consecutive
142* elements of WR and WI, say the i-th and (i+1)th, with
143* WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
144* eigenvalues are stored in the same order as on the diagonal
145* of the Schur form returned in A. A may be returned with
146* larger diagonal blocks until the next release.
147*
148* ILOZ (global input) INTEGER
149* IHIZ (global input) INTEGER
150* Specify the rows of Z to which transformations must be
151* applied if WANTZ is .TRUE..
152* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
153*
154* Z (global input/output) DOUBLE PRECISION array.
155* If WANTZ is .TRUE., on entry Z must contain the current
156* matrix Z of transformations accumulated by PDHSEQR, and on
157* exit Z has been updated; transformations are applied only to
158* the submatrix Z(ILOZ:IHIZ,ILO:IHI).
159* If WANTZ is .FALSE., Z is not referenced.
160*
161* DESCZ (global and local input) INTEGER array of dimension DLEN_.
162* The array descriptor for the distributed matrix Z.
163*
164* WORK (local output) DOUBLE PRECISION array of size LWORK
165*
166* LWORK (local input) INTEGER
167* WORK(LWORK) is a local array and LWORK is assumed big enough
168* so that LWORK >= 6*N + 6*385*385 +
169* MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCc(N),
170* 7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) )
171*
172* IWORK (global and local input) INTEGER array of size ILWORK
173*
174* ILWORK (local input) INTEGER
175* This holds the some of the IBLK integer arrays. This is held
176* as a place holder for the next release.
177*
178* INFO (global output) INTEGER
179* < 0: parameter number -INFO incorrect or inconsistent
180* = 0: successful exit
181* > 0: PDLAQR1 failed to compute all the eigenvalues ILO to IHI
182* in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
183* elements i+1:ihi of WR and WI contain those eigenvalues
184* which have been successfully computed.
185*
186* Logic:
187* This algorithm is very similar to _LAHQR. Unlike _LAHQR,
188* instead of sending one double shift through the largest
189* unreduced submatrix, this algorithm sends multiple double shifts
190* and spaces them apart so that there can be parallelism across
191* several processor row/columns. Another critical difference is
192* that this algorithm aggregrates multiple transforms together in
193* order to apply them in a block fashion.
194*
195* Important Local Variables:
196* IBLK = The maximum number of bulges that can be computed.
197* Currently fixed. Future releases this won't be fixed.
198* HBL = The square block size (HBL=DESCA(MB_)=DESCA(NB_))
199* ROTN = The number of transforms to block together
200* NBULGE = The number of bulges that will be attempted on the
201* current submatrix.
202* IBULGE = The current number of bulges started.
203* K1(*),K2(*) = The current bulge loops from K1(*) to K2(*).
204*
205* Subroutines:
206* This routine calls:
207* PDLAWIL -> Given the shift, get the transformation
208* DLASORTE -> Pair up eigenvalues so that reals are paired.
209* PDLACP3 -> Parallel array to local replicated array copy &
210* back.
211* DLAREF -> Row/column reflector applier. Core routine here.
212* PDLASMSUB -> Finds negligible subdiagonal elements.
213*
214* Current Notes and/or Restrictions:
215* 1.) This code requires the distributed block size to be square
216* and at least six (6); unlike simpler codes like LU, this
217* algorithm is extremely sensitive to block size. Unwise
218* choices of too small a block size can lead to bad
219* performance.
220* 2.) This code requires A and Z to be distributed identically
221* and have identical contxts.
222* 3.) This release currently does not have a routine for
223* resolving the Schur blocks into regular 2x2 form after
224* this code is completed. Because of this, a significant
225* performance impact is required while the deflation is done
226* by sometimes a single column of processors.
227* 4.) This code does not currently block the initial transforms
228* so that none of the rows or columns for any bulge are
229* completed until all are started. To offset pipeline
230* start-up it is recommended that at least 2*LCM(NPROW,NPCOL)
231* bulges are used (if possible)
232* 5.) The maximum number of bulges currently supported is fixed at
233* 32. In future versions this will be limited only by the
234* incoming WORK array.
235* 6.) The matrix A must be in upper Hessenberg form. If elements
236* below the subdiagonal are nonzero, the resulting transforms
237* may be nonsimilar. This is also true with the LAPACK
238* routine.
239* 7.) For this release, it is assumed RSRC_=CSRC_=0
240* 8.) Currently, all the eigenvalues are distributed to all the
241* nodes. Future releases will probably distribute the
242* eigenvalues by the column partitioning.
243* 9.) The internals of this routine are subject to change.
244*
245* Implemented by: G. Henry, November 17, 1996
246*
247* Modified by Robert Granat and Meiyue Shao, Department of Computing
248* Science and HPC2N, Umea University, Sweden
249*
250* =====================================================================
251*
252* .. Parameters ..
253 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
254 $ lld_, mb_, m_, nb_, n_, rsrc_
255 PARAMETER ( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
256 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
257 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
258 DOUBLE PRECISION zero, one, half
259 PARAMETER ( zero = 0.0d+0, one = 1.0d+0, half = 0.5d+0 )
260 DOUBLE PRECISION const
261 parameter( const = 1.50d+0 )
262 INTEGER iblk, lds
263 parameter( iblk = 32, lds = 12*iblk+1 )
264* ..
265* .. Local Scalars ..
266 INTEGER contxt, down, hbl, i, i1, i2, iafirst, ibulge,
267 $ icbuf, icol, icol1, icol2, ierr, ii,
268 $ irbuf, irow, irow1, irow2, ispec, istart,
269 $ istartcol, istartrow, istop, isub,
270 $ itermax, itmp1, itmp2, itn, its, j, jafirst,
271 $ jblk, jj, k, ki, l, lcmrc, lda, ldz, left,
272 $ lihih, lihiz, liloh, liloz, locali1, locali2,
273 $ localk, localm, m, modkm1, mycol, myrow,
274 $ nbulge, nh, node, npcol, nprow, nr, num, nz,
275 $ right, rotn, up, vecsidx, totit, totns, totsw,
276 $ dblk, nibble, nd, ns, ltop, lwkopt, s1, s2, s3
277 DOUBLE PRECISION ave, disc, h00, h10, h11, h12, h21, h22, h33,
278 $ h43h34, h44, ovfl, s, smlnum, sum, t1, t1copy,
279 $ t2, t3, ulp, unfl, v1save, v2, v2save, v3,
280 $ v3save, sn, cs, swap
281 LOGICAL aed
282* ..
283* .. Local Arrays ..
284 INTEGER icurcol( iblk ), icurrow( iblk ), k1( iblk ),
285 $ k2( iblk ), kcol( iblk ), kp2col( iblk ),
286 $ kp2row( iblk ), krow( iblk ), localk2( iblk )
287 DOUBLE PRECISION smalla( 6, 6, iblk ), vcopy( 3 )
288* ..
289* .. External Functions ..
290 INTEGER ilcm, numroc, ilaenv
291 DOUBLE PRECISION pdlamch
292 EXTERNAL ilcm, numroc, ilaenv, pdlamch
293* ..
294* .. External Subroutines ..
295 EXTERNAL blacs_gridinfo, dcopy, dgebr2d, dgebs2d,
296 $ dgerv2d, dgesd2d, dgsum2d, dlahqr, dlaref,
297 $ dlarfg, dlasorte, igamn2d, infog1l, infog2l,
299 $ pdlawil, pxerbla, dlanv2, pdlaqr2, pdlaqr4
300* ..
301* .. Intrinsic Functions ..
302 INTRINSIC abs, dble, max, min, mod, sign, sqrt
303* ..
304* .. Executable Statements ..
305*
306 info = 0
307*
308 itermax = 30*( ihi-ilo+1 )
309 IF( n.EQ.0 )
310 $ RETURN
311*
312* NODE (IAFIRST,JAFIRST) OWNS A(1,1)
313*
314 hbl = desca( mb_ )
315 contxt = desca( ctxt_ )
316 lda = desca( lld_ )
317 iafirst = desca( rsrc_ )
318 jafirst = desca( csrc_ )
319 ldz = descz( lld_ )
320 CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
321 node = myrow*npcol + mycol
322 num = nprow*npcol
323 left = mod( mycol+npcol-1, npcol )
324 right = mod( mycol+1, npcol )
325 up = mod( myrow+nprow-1, nprow )
326 down = mod( myrow+1, nprow )
327 lcmrc = ilcm( nprow, npcol )
328 totit = 0
329 totns = 0
330 totsw = 0
331*
332* Determine the number of columns we have so we can check workspace
333*
334 localk = numroc( n, hbl, mycol, jafirst, npcol )
335 jj = n / hbl
336 IF( jj*hbl.LT.n )
337 $ jj = jj + 1
338 jj = 7*jj / lcmrc
339 lwkopt = int( 6*n+max( 3*max( lda, ldz )+2*localk, jj )
340 $ +6*lds*lds )
341 IF( lwork.EQ.-1 .OR. ilwork.EQ.-1 ) THEN
342 work( 1 ) = dble( lwkopt )
343 iwork( 1 ) = 3
344 RETURN
345 ELSEIF( lwork.LT.lwkopt ) THEN
346 info = -15
347 END IF
348 IF( descz( ctxt_ ).NE.desca( ctxt_ ) ) THEN
349 info = -( 1300+ctxt_ )
350 END IF
351 IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
352 info = -( 700+nb_ )
353 END IF
354 IF( descz( mb_ ).NE.descz( nb_ ) ) THEN
355 info = -( 1300+nb_ )
356 END IF
357 IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
358 info = -( 1300+mb_ )
359 END IF
360 IF( ( ilo.GT.n ) .OR. ( ilo.LT.1 ) ) THEN
361 info = -4
362 END IF
363 IF( ( ihi.GT.n ) .OR. ( ihi.LT.1 ) ) THEN
364 info = -5
365 END IF
366 IF( hbl.LT.5 ) THEN
367 info = -( 700+mb_ )
368 END IF
369 CALL igamn2d( contxt, 'ALL', ' ', 1, 1, info, 1, itmp1, itmp2, -1,
370 $ -1, -1 )
371 IF( info.LT.0 ) THEN
372 CALL pxerbla( contxt, 'PDLAQR1', -info )
373 work( 1 ) = dble( lwkopt )
374 RETURN
375 END IF
376*
377* Set work array indices
378*
379 s1 = 0
380 s2 = s1+lds*lds
381 s3 = s2+lds*lds
382 vecsidx = s3+4*lds*lds
383 isub = vecsidx+3*n
384 irbuf = isub+n
385 icbuf = irbuf+n
386*
387* Find a value for ROTN
388*
389 rotn = hbl / 3
390 rotn = max( rotn, hbl-2 )
391 rotn = min( rotn, 1 )
392*
393 IF( ilo.EQ.ihi ) THEN
394 CALL infog2l( ilo, ilo, desca, nprow, npcol, myrow, mycol,
395 $ irow, icol, ii, jj )
396 IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
397 wr( ilo ) = a( ( icol-1 )*lda+irow )
398 ELSE
399 wr( ilo ) = zero
400 END IF
401 wi( ilo ) = zero
402 work( 1 ) = dble( lwkopt )
403 RETURN
404 END IF
405*
406 nh = ihi - ilo + 1
407 nz = ihiz - iloz + 1
408*
409* If the diagonal block is small enough, copy it to local memory and
410* call DLAHQR directly.
411*
412 IF( nh .LE. lds ) THEN
413 CALL pdlaqr4( wantt, wantz, n, ilo, ihi, a, desca, wr, wi,
414 $ iloz, ihiz, z, descz, work( s1+1 ), nh,
415 $ work( s2+1 ), nh, work( s3+1 ), 4*lds*lds,
416 $ info )
417 work( 1 ) = dble( lwkopt )
418 RETURN
419 END IF
420*
421 CALL infog1l( iloz, hbl, nprow, myrow, descz(rsrc_), liloz, lihiz)
422 lihiz = numroc( ihiz, hbl, myrow, descz(rsrc_), nprow )
423*
424* Set machine-dependent constants for the stopping criterion.
425* If NORM(H) <= SQRT(OVFL), overflow should not occur.
426*
427 unfl = pdlamch( contxt, 'SAFE MINIMUM' )
428 ovfl = one / unfl
429 CALL pdlabad( contxt, unfl, ovfl )
430 ulp = pdlamch( contxt, 'PRECISION' )
431 smlnum = unfl*( nh / ulp )
432*
433* I1 and I2 are the indices of the first row and last column of H
434* to which transformations must be applied. If eigenvalues only are
435* being computed, I1 and I2 are set inside the main loop.
436*
437 IF( wantt ) THEN
438 i1 = 1
439 i2 = n
440 END IF
441*
442* ITN is the total number of QR iterations allowed.
443*
444 itn = itermax
445*
446* The main loop begins here. I is the loop index and decreases from
447* IHI to ILO in steps of our schur block size (<=2*IBLK). Each
448* iteration of the loop works with the active submatrix in rows
449* and columns L to I. Eigenvalues I+1 to IHI have already
450* converged. Either L = ILO or the global A(L,L-1) is negligible
451* so that the matrix splits.
452*
453 i = ihi
454 10 CONTINUE
455 l = ilo
456 IF( i.LT.ilo )
457 $ GO TO 450
458*
459* Perform QR iterations on rows and columns ILO to I until a
460* submatrix of order 1 or 2 splits off at the bottom because a
461* subdiagonal element has become negligible.
462*
463 DO 420 its = 0, itn
464 totit = totit + 1
465*
466* Look for a single small subdiagonal element.
467*
468 CALL pdlasmsub( a, desca, i, l, k, smlnum, work( irbuf+1 ),
469 $ lwork-irbuf )
470 l = k
471*
472 IF( l.GT.ilo ) THEN
473*
474* H(L,L-1) is negligible
475*
476 CALL infog2l( l, l-1, desca, nprow, npcol, myrow, mycol,
477 $ irow, icol, itmp1, itmp2 )
478 IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
479 a( ( icol-1 )*lda+irow ) = zero
480 END IF
481 work( isub+l-1 ) = zero
482 END IF
483*
484* Exit from loop if a small submatrix has split off.
485*
486 m = l - 10
487 IF ( l .GT. i - lds )
488 $ GO TO 430
489*
490* Now the active submatrix is in rows and columns L to I. If
491* eigenvalues only are being computed, only the active submatrix
492* need be transformed.
493*
494 IF( .NOT.wantt ) THEN
495 i1 = l
496 i2 = i
497 END IF
498*
499* Copy submatrix of size 2*JBLK and prepare to do generalized
500* Wilkinson shift or an exceptional shift
501*
502 nh = i-l+1
503 aed = .true.
504 jblk = min( iblk, ( nh / 2 )-1 )
505 IF( jblk.GT.lcmrc ) THEN
506*
507* Make sure it's divisible by LCM (we want even workloads!)
508*
509 jblk = jblk - mod( jblk, lcmrc )
510 END IF
511 jblk = min( jblk, 2*lcmrc )
512 jblk = max( jblk, 1 )
513*
514 IF( its.EQ.20 .OR. its.EQ.40 ) THEN
515*
516* Exceptional shift.
517*
518 CALL pdlacp3( 2*jblk, i-2*jblk+1, a, desca, work( s1+1 ),
519 $ lds, -1, -1, 0 )
520 DO 20 ii = 2*jblk, 2, -1
521 work( s1+ii+(ii-1)*lds ) = const*(
522 $ abs( work( s1+ii+(ii-1)*lds ) )+
523 $ abs( work( s1+ii+(ii-2)*lds ) ) )
524 work( s1+ii+(ii-2)*lds ) = zero
525 work( s1+ii-1+(ii-1)*lds ) = zero
526 20 CONTINUE
527 work( s1+1 ) = const*abs( work( s1+1 ) )
528 ELSE
529*
530* Aggressive early deflation.
531*
532 IF( aed ) THEN
533 dblk = ilaenv( 13, 'DLAQR0', 'SV', n, l, i, 4*lds*lds )
534 dblk = max( 2*jblk, dblk ) + 1
535 dblk = min( nh, lds, dblk )
536 CALL pdlaqr2( wantt, wantz, n, l, i, dblk, a, desca,
537 $ iloz, ihiz, z, descz, ns, nd, wr, wi,
538 $ work( s1+1 ), lds, work( s2+1 ), dblk,
539 $ work( irbuf+1 ), work( icbuf+1 ),
540 $ work( s3+1 ), 4*lds*lds )
541*
542* Skip a QR sweep if enough eigenvalues are deflated.
543*
544 nibble = ilaenv( 14, 'DLAQR0', 'SV', n, l, i, 4*lds*lds )
545 nibble = max( 0, nibble )
546 i = i - nd
547 dblk = dblk - nd
548 IF( 100*nd .GT. nibble*nh .OR. dblk .LT. 2*jblk ) GOTO 10
549*
550* Use unconverged eigenvalues as shifts for the QR sweep.
551* (This option is turned off because of the quality of
552* these shifts are not so good.)
553*
554* IF( ND.GE.0 .AND. ND+DBLK.GE.64 ) THEN
555 IF( .false. ) THEN
556 CALL dlaset( 'L', dblk-1, dblk-1, zero, zero,
557 $ work( s1+2 ), lds )
558 work( irbuf+1 ) = work( s1+1 )
559 work( icbuf+1 ) = zero
560*
561* Shuffle shifts into pairs of real shifts and pairs of
562* complex conjugate shifts assuming complex conjugate
563* shifts are already adjacent to one another.
564*
565 DO 21 ii = dblk, 3, -2
566 IF( work( icbuf+ii ).NE.-work( icbuf+ii-1 ) ) THEN
567 swap = work( irbuf+ii )
568 work( irbuf+ii ) = work( irbuf+ii-1 )
569 work( irbuf+ii-1 ) = work( irbuf+ii-2 )
570 work( irbuf+ii-2 ) = swap
571 swap = work( icbuf+ii )
572 work( icbuf+ii ) = work( icbuf+ii-1 )
573 work( icbuf+ii-1 ) = work( icbuf+ii-2 )
574 work( icbuf+ii-2 ) = swap
575 END IF
576 21 CONTINUE
577*
578* Copy undeflatable eigenvalues to the diagonal of S1.
579*
580 ii = 2
581 22 CONTINUE
582 IF( work( icbuf+ii ) .EQ. zero ) THEN
583 work( s1+ii+(ii-1)*lds ) = work( irbuf+ii )
584 work( s1+ii+(ii-2)*lds ) = zero
585 ii = ii + 1
586 ELSE
587 work( s1+ii+(ii-1)*lds ) = work( irbuf+ii )
588 work( s1+ii+1+ii*lds ) = work( irbuf+ii )
589 work( s1+ii+1+(ii-1)*lds ) = work( icbuf+ii )
590 work( s1+ii+ii*lds ) = -work( icbuf+ii )
591 ii = ii + 2
592 END IF
593 IF( ii .LE. dblk ) GOTO 22
594 ELSE
595 CALL dlahqr( .false., .false., dblk, 1, dblk,
596 $ work( s1+1 ), lds, work( irbuf+1 ),
597 $ work( icbuf+1 ), 1, dblk, z, ldz, ierr )
598 END IF
599 ELSE
600 dblk = 2*jblk
601 CALL pdlacp3( dblk, i-dblk+1, a, desca, work( s1+1 ),
602 $ lds, -1, -1, 0 )
603 CALL dlahqr( .false., .false., dblk, 1, dblk,
604 $ work( s1+1 ), lds, work( irbuf+1 ),
605 $ work( icbuf+1 ), 1, dblk, z, ldz, ierr )
606 END IF
607 totsw = totsw + 1
608*
609* Prepare to use Wilkinson's double shift
610*
611 h44 = work( s1+dblk+(dblk-1)*lds )
612 h33 = work( s1+dblk-1+(dblk-2)*lds )
613 h43h34 = work( s1+dblk-1+(dblk-1)*lds )*
614 $ work( s1+dblk+(dblk-2)*lds )
615 IF( ( jblk.GT.1 ) .AND. ( its.GT.30 ) ) THEN
616 s = work( s1+dblk-1+(dblk-3)*lds )
617 disc = ( h33-h44 )*half
618 disc = disc*disc + h43h34
619 IF( disc.GT.zero ) THEN
620*
621* Real roots: Use Wilkinson's shift twice
622*
623 disc = sqrt( disc )
624 ave = half*( h33+h44 )
625 IF( abs( h33 )-abs( h44 ).GT.zero ) THEN
626 h33 = h33*h44 - h43h34
627 h44 = h33 / ( sign( disc, ave )+ave )
628 ELSE
629 h44 = sign( disc, ave ) + ave
630 END IF
631 h33 = h44
632 h43h34 = zero
633 END IF
634 END IF
635 END IF
636*
637* Look for two consecutive small subdiagonal elements:
638* PDLACONSB is the routine that does this.
639*
640* CALL PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34,
641* $ WORK( IRBUF+1 ), LWORK-IRBUF )
642*
643* Skip small submatrices
644*
645* IF ( M .GE. I - 5 )
646* $ GO TO 80
647*
648* In principle PDLACONSB needs to check all shifts to decide
649* whether two consecutive small subdiagonal entries are suitable
650* as the starting position of the bulge chasing phase. It can be
651* dangerous to check the first pair of shifts only. Moreover it
652* is quite rare to obtain an M which is much larger than L. This
653* process is a bit expensive compared with the benefit.
654* Therefore it is sensible to abandon this routine. Total amount
655* of communications is saved in average.
656*
657 m = l
658*
659* Double-shift QR step
660*
661* NBULGE is the number of bulges that will be attempted
662*
663 istop = min( m+rotn-mod( m, rotn ), i-2 )
664 istop = min( istop, m+hbl-3-mod( m-1, hbl ) )
665 istop = min( istop, i2-2 )
666 istop = max( istop, m )
667 nbulge = ( i-1-istop ) / hbl
668*
669* Do not exceed maximum determined.
670*
671 nbulge = min( nbulge, jblk )
672 IF( nbulge.GT.lcmrc ) THEN
673*
674* Make sure it's divisible by LCM (we want even workloads!)
675*
676 nbulge = nbulge - mod( nbulge, lcmrc )
677 END IF
678 nbulge = max( nbulge, 1 )
679*
680 totns = totns + nbulge*2
681*
682 IF( ( its.NE.20 ) .AND. ( its.NE.40 ) .AND. ( nbulge.GT.1 ) )
683 $ THEN
684*
685* sort the eigenpairs so that they are in twos for double
686* shifts. only call if several need sorting
687*
688* CALL DLASORTE( S1( 2*( JBLK-NBULGE )+1,
689* $ 2*( JBLK-NBULGE )+1 ), 3*IBLK, 2*NBULGE,
690* $ WORK( IRBUF+1 ), IERR )
691 CALL dlasorte( work(s1+dblk-2*nbulge+1+(dblk-2*nbulge)*lds),
692 $ lds, 2*nbulge, work( irbuf+1 ), ierr )
693 END IF
694*
695* IBULGE is the number of bulges going so far
696*
697 ibulge = 1
698*
699* "A" row defs : main row transforms from LOCALK to LOCALI2
700*
701 CALL infog1l( m, hbl, npcol, mycol, desca(csrc_),itmp1,localk )
702 localk = numroc( n, hbl, mycol, desca(csrc_), npcol )
703 CALL infog1l( 1, hbl, npcol, mycol,desca(csrc_),icol1,locali2 )
704 locali2 = numroc( i2, hbl, mycol, desca(csrc_), npcol )
705*
706* "A" col defs : main col transforms from LOCALI1 to LOCALM
707*
708 CALL infog1l( i1, hbl, nprow,myrow,desca(rsrc_),locali1,icol1 )
709 icol1 = numroc( n, hbl, myrow, desca(rsrc_), nprow )
710 CALL infog1l( 1, hbl, nprow, myrow, desca(rsrc_),localm,icol1 )
711 icol1 = numroc( min( m+3, i ), hbl, myrow, desca(rsrc_),nprow )
712*
713* Which row & column will start the bulges
714*
715 istartrow = mod( ( m+1 ) / hbl + iafirst, nprow )
716 istartcol = mod( ( m+1 ) / hbl + jafirst, npcol )
717*
718 CALL infog1l( m, hbl, nprow, myrow, desca(rsrc_), ii, itmp2 )
719 itmp2 = numroc( n, hbl, myrow, desca(rsrc_), nprow )
720 CALL infog1l( m, hbl, npcol, mycol, desca(csrc_), jj, itmp2 )
721 itmp2 = numroc( n, hbl, mycol, desca(csrc_), npcol )
722 CALL infog1l(1,hbl,nprow,myrow,desca(rsrc_),istop,kp2row( 1 ) )
723 kp2row( 1 ) = numroc( m+2, hbl, myrow, desca(rsrc_), nprow )
724 CALL infog1l(1,hbl,npcol,mycol,desca(csrc_),istop,kp2col( 1 ) )
725 kp2col( 1 ) = numroc( m+2, hbl, mycol, desca(csrc_), npcol )
726*
727* Set all values for bulges. All bulges are stored in
728* intermediate steps as loops over KI. Their current "task"
729* over the global M to I-1 values is always K1(KI) to K2(KI).
730* However, because there are many bulges, K1(KI) & K2(KI) might
731* go past that range while later bulges (KI+1,KI+2,etc..) are
732* finishing up.
733*
734* Rules:
735* If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)<HBL-2
736* If MOD(K1(KI)-1,HBL) = HBL-2 then MOD(K2(KI)-1,HBL)=HBL-2
737* If MOD(K1(KI)-1,HBL) = HBL-1 then MOD(K2(KI)-1,HBL)=HBL-1
738* K2(KI)-K1(KI) <= ROTN
739*
740* We first hit a border when MOD(K1(KI)-1,HBL)=HBL-2 and we hit
741* it again when MOD(K1(KI)-1,HBL)=HBL-1.
742*
743 DO 30 ki = 1, nbulge
744 k1( ki ) = m
745 istop = min( m+rotn-mod( m, rotn ), i-2 )
746 istop = min( istop, m+hbl-3-mod( m-1, hbl ) )
747 istop = min( istop, i2-2 )
748 istop = max( istop, m )
749 k2( ki ) = istop
750 icurrow( ki ) = istartrow
751 icurcol( ki ) = istartcol
752 localk2( ki ) = itmp1
753 krow( ki ) = ii
754 kcol( ki ) = jj
755 IF( ki.GT.1 )
756 $ kp2row( ki ) = kp2row( 1 )
757 IF( ki.GT.1 )
758 $ kp2col( ki ) = kp2col( 1 )
759 30 CONTINUE
760*
761* Get first transform on node who owns M+2,M+2
762*
763 DO 31 itmp1 = 1, 3
764 vcopy(itmp1) = zero
765 31 CONTINUE
766 itmp1 = istartrow
767 itmp2 = istartcol
768 CALL pdlawil( itmp1, itmp2, m, a, desca, h44, h33, h43h34,
769 $ vcopy )
770 v1save = vcopy( 1 )
771 v2save = vcopy( 2 )
772 v3save = vcopy( 3 )
773 IF( k2( ibulge ).LE.i-1 ) THEN
774 40 CONTINUE
775 IF( ( k1( ibulge ).GE.m+5 ) .AND. ( ibulge.LT.nbulge ) )
776 $ THEN
777 IF( ( mod( k2( ibulge )+2, hbl ).EQ.mod( k2( ibulge+1 )+
778 $ 2, hbl ) ) .AND. ( k1( 1 ).LE.i-1 ) ) THEN
779 h44 = work( s1+dblk-2*ibulge+(dblk-2*ibulge-1)*lds )
780 h33 = work( s1+dblk-2*ibulge-1+(dblk-2*ibulge-2)*lds )
781 h43h34 = work( s1+dblk-2*ibulge-1+
782 $ (dblk-2*ibulge-1)*lds )
783 $ *work(s1+dblk-2*ibulge+(dblk-2*ibulge-2)*lds)
784 itmp1 = istartrow
785 itmp2 = istartcol
786 CALL pdlawil( itmp1, itmp2, m, a, desca, h44, h33,
787 $ h43h34, vcopy )
788 v1save = vcopy( 1 )
789 v2save = vcopy( 2 )
790 v3save = vcopy( 3 )
791 ibulge = ibulge + 1
792 END IF
793 END IF
794*
795* When we hit a border, there are row and column transforms that
796* overlap over several processors and the code gets very
797* "congested." As a remedy, when we first hit a border, a 6x6
798* *local* matrix is generated on one node (called SMALLA) and
799* work is done on that. At the end of the border, the data is
800* passed back and everything stays a lot simpler.
801*
802 DO 80 ki = 1, ibulge
803*
804 istart = max( k1( ki ), m )
805 istop = min( k2( ki ), i-1 )
806 k = istart
807 modkm1 = mod( k-1, hbl )
808 IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN
809 DO 81 itmp1 = 1, 6
810 DO 82 itmp2 = 1, 6
811 smalla(itmp1, itmp2, ki) = zero
812 82 CONTINUE
813 81 CONTINUE
814 IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN
815*
816* Copy 6 elements from global A(K-1:K+4,K-1:K+4)
817*
818 CALL infog2l( k+2, k+2, desca, nprow, npcol, myrow,
819 $ mycol, irow1, icol1, itmp1, itmp2 )
820 CALL pdlacp3( min( 6, n-k+2 ), k-1, a, desca,
821 $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
822 $ 0 )
823 END IF
824 IF( modkm1.EQ.hbl-1 ) THEN
825*
826* Copy 6 elements from global A(K-2:K+3,K-2:K+3)
827*
828 CALL infog2l( k+1, k+1, desca, nprow, npcol, myrow,
829 $ mycol, irow1, icol1, itmp1, itmp2 )
830 CALL pdlacp3( min( 6, n-k+3 ), k-2, a, desca,
831 $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
832 $ 0 )
833 END IF
834 END IF
835*
836* DLAHQR used to have a single row application and a single
837* column application to H. Here we do something a little
838* more clever. We break each transformation down into 3
839* parts:
840* 1.) The minimum amount of work it takes to determine
841* a group of ROTN transformations (this is on
842* the critical path.) (Loops 130-180)
843* 2.) The small work it takes so that each of the rows
844* and columns is at the same place. For example,
845* all ROTN row transforms are all complete
846* through some column TMP. (Loops within 190)
847* 3.) The majority of the row and column transforms
848* are then applied in a block fashion.
849* (Loops 290 on.)
850*
851* Each of these three parts are further subdivided into 3
852* parts:
853* A.) Work at the start of a border when
854* MOD(ISTART-1,HBL) = HBL-2
855* B.) Work at the end of a border when
856* MOD(ISTART-1,HBL) = HBL-1
857* C.) Work in the middle of the block when
858* MOD(ISTART-1,HBL) < HBL-2
859*
860 IF( ( myrow.EQ.icurrow( ki ) ) .AND.
861 $ ( mycol.EQ.icurcol( ki ) ) .AND.
862 $ ( modkm1.EQ.hbl-2 ) .AND.
863 $ ( istart.LT.min( i-1, istop+1 ) ) ) THEN
864 k = istart
865 nr = min( 3, i-k+1 )
866 IF( k.GT.m ) THEN
867 CALL dcopy( nr, smalla( 2, 1, ki ), 1, vcopy, 1 )
868 ELSE
869 vcopy( 1 ) = v1save
870 vcopy( 2 ) = v2save
871 vcopy( 3 ) = v3save
872 END IF
873 CALL dlarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )
874 IF( k.GT.m ) THEN
875 smalla( 2, 1, ki ) = vcopy( 1 )
876 smalla( 3, 1, ki ) = zero
877 IF( k.LT.i-1 )
878 $ smalla( 4, 1, ki ) = zero
879 ELSE IF( m.GT.l ) THEN
880 smalla( 2, 1, ki ) = -smalla( 2, 1, ki )
881 END IF
882 v2 = vcopy( 2 )
883 t2 = t1copy*v2
884 work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
885 work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
886 work( vecsidx+( k-1 )*3+3 ) = t1copy
887 END IF
888*
889 IF( ( mod( istop-1, hbl ).EQ.hbl-1 ) .AND.
890 $ ( myrow.EQ.icurrow( ki ) ) .AND.
891 $ ( mycol.EQ.icurcol( ki ) ) .AND.
892 $ ( istart.LE.min( i, istop ) ) ) THEN
893 k = istart
894 nr = min( 3, i-k+1 )
895 IF( k.GT.m ) THEN
896 CALL dcopy( nr, smalla( 3, 2, ki ), 1, vcopy, 1 )
897 ELSE
898 vcopy( 1 ) = v1save
899 vcopy( 2 ) = v2save
900 vcopy( 3 ) = v3save
901 END IF
902 CALL dlarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )
903 IF( k.GT.m ) THEN
904 smalla( 3, 2, ki ) = vcopy( 1 )
905 smalla( 4, 2, ki ) = zero
906 IF( k.LT.i-1 )
907 $ smalla( 5, 2, ki ) = zero
908*
909* Set a subdiagonal to zero now if it's possible
910*
911* H11 = SMALLA(1,1,KI)
912* H10 = SMALLA(2,1,KI)
913* H22 = SMALLA(2,2,KI)
914* IF ( ABS(H10) .LE. MAX(ULP*(ABS(H11)+ABS(H22)),
915* $ SMLNUM) ) THEN
916* SMALLA(2,1,KI) = ZERO
917* WORK(ISUB+K-2) = ZERO
918* END IF
919 ELSE IF( m.GT.l ) THEN
920 smalla( 3, 2, ki ) = -smalla( 3, 2, ki )
921 END IF
922 v2 = vcopy( 2 )
923 t2 = t1copy*v2
924 work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
925 work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
926 work( vecsidx+( k-1 )*3+3 ) = t1copy
927 END IF
928*
929 IF( ( modkm1.EQ.0 ) .AND. ( istart.LE.i-1 ) .AND.
930 $ ( myrow.EQ.icurrow( ki ) ) .AND.
931 $ ( right.EQ.icurcol( ki ) ) ) THEN
932*
933* (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
934*
935 irow1 = krow( ki )
936 icol1 = localk2( ki )
937 IF( istart.GT.m ) THEN
938 vcopy( 1 ) = smalla( 4, 3, ki )
939 vcopy( 2 ) = smalla( 5, 3, ki )
940 vcopy( 3 ) = smalla( 6, 3, ki )
941 nr = min( 3, i-istart+1 )
942 CALL dlarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
943 $ t1copy )
944 a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )
945 a( ( icol1-2 )*lda+irow1+1 ) = zero
946 IF( istart.LT.i-1 ) THEN
947 a( ( icol1-2 )*lda+irow1+2 ) = zero
948 END IF
949 ELSE
950 IF( m.GT.l ) THEN
951 a( ( icol1-2 )*lda+irow1 ) = -a( ( icol1-2 )*
952 $ lda+irow1 )
953 END IF
954 END IF
955 END IF
956*
957 IF( ( myrow.EQ.icurrow( ki ) ) .AND.
958 $ ( mycol.EQ.icurcol( ki ) ) .AND.
959 $ ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-
960 $ 1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-
961 $ 1 ) ) ) ) THEN
962*
963* (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
964*
965 irow1 = krow( ki )
966 icol1 = localk2( ki )
967 DO 70 k = istart, istop
968*
969* Create and do these transforms
970*
971 nr = min( 3, i-k+1 )
972 IF( k.GT.m ) THEN
973 IF( mod( k-1, hbl ).EQ.0 ) THEN
974 vcopy( 1 ) = smalla( 4, 3, ki )
975 vcopy( 2 ) = smalla( 5, 3, ki )
976 vcopy( 3 ) = smalla( 6, 3, ki )
977 ELSE
978 vcopy( 1 ) = a( ( icol1-2 )*lda+irow1 )
979 vcopy( 2 ) = a( ( icol1-2 )*lda+irow1+1 )
980 IF( nr.EQ.3 ) THEN
981 vcopy( 3 ) = a( ( icol1-2 )*lda+irow1+2 )
982 END IF
983 END IF
984 ELSE
985 vcopy( 1 ) = v1save
986 vcopy( 2 ) = v2save
987 vcopy( 3 ) = v3save
988 END IF
989 CALL dlarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
990 $ t1copy )
991 IF( k.GT.m ) THEN
992 IF( mod( k-1, hbl ).GT.0 ) THEN
993 a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )
994 a( ( icol1-2 )*lda+irow1+1 ) = zero
995 IF( k.LT.i-1 ) THEN
996 a( ( icol1-2 )*lda+irow1+2 ) = zero
997 END IF
998*
999* Set a subdiagonal to zero now if it's possible
1000*
1001* IF ( (IROW1.GT.2) .AND. (ICOL1.GT.2) .AND.
1002* $ (MOD(K-1,HBL) .GT. 1) ) THEN
1003* H11 = A((ICOL1-3)*LDA+IROW1-2)
1004* H10 = A((ICOL1-3)*LDA+IROW1-1)
1005* H22 = A((ICOL1-2)*LDA+IROW1-1)
1006* IF ( ABS(H10).LE.MAX(ULP*(ABS(H11)+ABS(H22)),
1007* $ SMLNUM) ) THEN
1008* A((ICOL1-3)*LDA+IROW1-1) = ZERO
1009* END IF
1010* END IF
1011 END IF
1012 ELSE IF( m.GT.l ) THEN
1013 IF( mod( k-1, hbl ).GT.0 ) THEN
1014 a( ( icol1-2 )*lda+irow1 ) = -a( ( icol1-2 )*
1015 $ lda+irow1 )
1016 END IF
1017 END IF
1018 v2 = vcopy( 2 )
1019 t2 = t1copy*v2
1020 work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
1021 work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
1022 work( vecsidx+( k-1 )*3+3 ) = t1copy
1023 t1 = t1copy
1024 IF( k.LT.istop ) THEN
1025*
1026* Do some work so next step is ready...
1027*
1028 v3 = vcopy( 3 )
1029 t3 = t1*v3
1030 DO 50 j = icol1, min( k2( ki )+1, i-1 ) +
1031 $ icol1 - k
1032 sum = a( ( j-1 )*lda+irow1 ) +
1033 $ v2*a( ( j-1 )*lda+irow1+1 ) +
1034 $ v3*a( ( j-1 )*lda+irow1+2 )
1035 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*lda+
1036 $ irow1 ) - sum*t1
1037 a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*lda+
1038 $ irow1+1 ) - sum*t2
1039 a( ( j-1 )*lda+irow1+2 ) = a( ( j-1 )*lda+
1040 $ irow1+2 ) - sum*t3
1041 50 CONTINUE
1042 itmp1 = localk2( ki )
1043 DO 60 j = irow1 + 1, irow1 + 3
1044 sum = a( ( icol1-1 )*lda+j ) +
1045 $ v2*a( icol1*lda+j ) +
1046 $ v3*a( ( icol1+1 )*lda+j )
1047 a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*lda+
1048 $ j ) - sum*t1
1049 a( icol1*lda+j ) = a( icol1*lda+j ) - sum*t2
1050 a( ( icol1+1 )*lda+j ) = a( ( icol1+1 )*lda+
1051 $ j ) - sum*t3
1052 60 CONTINUE
1053 END IF
1054 irow1 = irow1 + 1
1055 icol1 = icol1 + 1
1056 70 CONTINUE
1057 END IF
1058*
1059 IF( modkm1.EQ.hbl-2 ) THEN
1060 IF( ( down.EQ.icurrow( ki ) ) .AND.
1061 $ ( right.EQ.icurcol( ki ) ) .AND. ( num.GT.1 ) )
1062 $ THEN
1063 CALL dgerv2d( contxt, 3, 1,
1064 $ work( vecsidx+( istart-1 )*3+1 ), 3,
1065 $ down, right )
1066 END IF
1067 IF( ( myrow.EQ.icurrow( ki ) ) .AND.
1068 $ ( mycol.EQ.icurcol( ki ) ) .AND. ( num.GT.1 ) )
1069 $ THEN
1070 CALL dgesd2d( contxt, 3, 1,
1071 $ work( vecsidx+( istart-1 )*3+1 ), 3,
1072 $ up, left )
1073 END IF
1074 IF( ( down.EQ.icurrow( ki ) ) .AND.
1075 $ ( npcol.GT.1 ) .AND. ( istart.LE.istop ) ) THEN
1076 jj = mod( icurcol( ki )+npcol-1, npcol )
1077 IF( mycol.NE.jj ) THEN
1078 CALL dgebr2d( contxt, 'ROW', ' ',
1079 $ 3*( istop-istart+1 ), 1,
1080 $ work( vecsidx+( istart-1 )*3+1 ),
1081 $ 3*( istop-istart+1 ), myrow, jj )
1082 ELSE
1083 CALL dgebs2d( contxt, 'ROW', ' ',
1084 $ 3*( istop-istart+1 ), 1,
1085 $ work( vecsidx+( istart-1 )*3+1 ),
1086 $ 3*( istop-istart+1 ) )
1087 END IF
1088 END IF
1089 END IF
1090*
1091* Broadcast Householder information from the block
1092*
1093 IF( ( myrow.EQ.icurrow( ki ) ) .AND. ( npcol.GT.1 ) .AND.
1094 $ ( istart.LE.istop ) ) THEN
1095 IF( mycol.NE.icurcol( ki ) ) THEN
1096 CALL dgebr2d( contxt, 'ROW', ' ',
1097 $ 3*( istop-istart+1 ), 1,
1098 $ work( vecsidx+( istart-1 )*3+1 ),
1099 $ 3*( istop-istart+1 ), myrow,
1100 $ icurcol( ki ) )
1101 ELSE
1102 CALL dgebs2d( contxt, 'ROW', ' ',
1103 $ 3*( istop-istart+1 ), 1,
1104 $ work( vecsidx+( istart-1 )*3+1 ),
1105 $ 3*( istop-istart+1 ) )
1106 END IF
1107 END IF
1108 80 CONTINUE
1109*
1110* Now do column transforms and finish work
1111*
1112 DO 90 ki = 1, ibulge
1113*
1114 istart = max( k1( ki ), m )
1115 istop = min( k2( ki ), i-1 )
1116*
1117 IF( mod( istart-1, hbl ).EQ.hbl-2 ) THEN
1118 IF( ( right.EQ.icurcol( ki ) ) .AND.
1119 $ ( nprow.GT.1 ) .AND. ( istart.LE.istop ) ) THEN
1120 jj = mod( icurrow( ki )+nprow-1, nprow )
1121 IF( myrow.NE.jj ) THEN
1122 CALL dgebr2d( contxt, 'COL', ' ',
1123 $ 3*( istop-istart+1 ), 1,
1124 $ work( vecsidx+( istart-1 )*3+1 ),
1125 $ 3*( istop-istart+1 ), jj, mycol )
1126 ELSE
1127 CALL dgebs2d( contxt, 'COL', ' ',
1128 $ 3*( istop-istart+1 ), 1,
1129 $ work( vecsidx+( istart-1 )*3+1 ),
1130 $ 3*( istop-istart+1 ) )
1131 END IF
1132 END IF
1133 END IF
1134*
1135 IF( ( mycol.EQ.icurcol( ki ) ) .AND. ( nprow.GT.1 ) .AND.
1136 $ ( istart.LE.istop ) ) THEN
1137 IF( myrow.NE.icurrow( ki ) ) THEN
1138 CALL dgebr2d( contxt, 'COL', ' ',
1139 $ 3*( istop-istart+1 ), 1,
1140 $ work( vecsidx+( istart-1 )*3+1 ),
1141 $ 3*( istop-istart+1 ), icurrow( ki ),
1142 $ mycol )
1143 ELSE
1144 CALL dgebs2d( contxt, 'COL', ' ',
1145 $ 3*( istop-istart+1 ), 1,
1146 $ work( vecsidx+( istart-1 )*3+1 ),
1147 $ 3*( istop-istart+1 ) )
1148 END IF
1149 END IF
1150 90 CONTINUE
1151*
1152* Now do make up work to have things in block fashion
1153*
1154 DO 150 ki = 1, ibulge
1155 istart = max( k1( ki ), m )
1156 istop = min( k2( ki ), i-1 )
1157*
1158 modkm1 = mod( istart-1, hbl )
1159 IF( ( myrow.EQ.icurrow( ki ) ) .AND.
1160 $ ( mycol.EQ.icurcol( ki ) ) .AND.
1161 $ ( modkm1.EQ.hbl-2 ) .AND. ( istart.LT.i-1 ) ) THEN
1162 k = istart
1163*
1164* Catch up on column & border work
1165*
1166 nr = min( 3, i-k+1 )
1167 v2 = work( vecsidx+( k-1 )*3+1 )
1168 v3 = work( vecsidx+( k-1 )*3+2 )
1169 t1 = work( vecsidx+( k-1 )*3+3 )
1170 IF( nr.EQ.3 ) THEN
1171*
1172* Do some work so next step is ready...
1173*
1174* V3 = VCOPY( 3 )
1175 t2 = t1*v2
1176 t3 = t1*v3
1177 itmp1 = min( 6, i2+2-k )
1178 itmp2 = max( i1-k+2, 1 )
1179 DO 100 j = 2, itmp1
1180 sum = smalla( 2, j, ki ) +
1181 $ v2*smalla( 3, j, ki ) +
1182 $ v3*smalla( 4, j, ki )
1183 smalla( 2, j, ki ) = smalla( 2, j, ki ) - sum*t1
1184 smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum*t2
1185 smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*t3
1186 100 CONTINUE
1187 DO 110 j = itmp2, 5
1188 sum = smalla( j, 2, ki ) +
1189 $ v2*smalla( j, 3, ki ) +
1190 $ v3*smalla( j, 4, ki )
1191 smalla( j, 2, ki ) = smalla( j, 2, ki ) - sum*t1
1192 smalla( j, 3, ki ) = smalla( j, 3, ki ) - sum*t2
1193 smalla( j, 4, ki ) = smalla( j, 4, ki ) - sum*t3
1194 110 CONTINUE
1195 END IF
1196 END IF
1197*
1198 IF( ( mod( istart-1, hbl ).EQ.hbl-1 ) .AND.
1199 $ ( istart.LE.istop ) .AND.
1200 $ ( myrow.EQ.icurrow( ki ) ) .AND.
1201 $ ( mycol.EQ.icurcol( ki ) ) ) THEN
1202 k = istop
1203*
1204* Catch up on column & border work
1205*
1206 nr = min( 3, i-k+1 )
1207 v2 = work( vecsidx+( k-1 )*3+1 )
1208 v3 = work( vecsidx+( k-1 )*3+2 )
1209 t1 = work( vecsidx+( k-1 )*3+3 )
1210 IF( nr.EQ.3 ) THEN
1211*
1212* Do some work so next step is ready...
1213*
1214* V3 = VCOPY( 3 )
1215 t2 = t1*v2
1216 t3 = t1*v3
1217 itmp1 = min( 6, i2-k+3 )
1218 itmp2 = max( i1-k+3, 1 )
1219 DO 120 j = 3, itmp1
1220 sum = smalla( 3, j, ki ) +
1221 $ v2*smalla( 4, j, ki ) +
1222 $ v3*smalla( 5, j, ki )
1223 smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum*t1
1224 smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*t2
1225 smalla( 5, j, ki ) = smalla( 5, j, ki ) - sum*t3
1226 120 CONTINUE
1227 DO 130 j = itmp2, 6
1228 sum = smalla( j, 3, ki ) +
1229 $ v2*smalla( j, 4, ki ) +
1230 $ v3*smalla( j, 5, ki )
1231 smalla( j, 3, ki ) = smalla( j, 3, ki ) - sum*t1
1232 smalla( j, 4, ki ) = smalla( j, 4, ki ) - sum*t2
1233 smalla( j, 5, ki ) = smalla( j, 5, ki ) - sum*t3
1234 130 CONTINUE
1235 END IF
1236 END IF
1237*
1238 modkm1 = mod( istart-1, hbl )
1239 IF( ( myrow.EQ.icurrow( ki ) ) .AND.
1240 $ ( mycol.EQ.icurcol( ki ) ) .AND.
1241 $ ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-
1242 $ 1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-
1243 $ 1 ) ) ) ) THEN
1244*
1245* (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
1246*
1247 irow1 = krow( ki )
1248 icol1 = localk2( ki )
1249 DO 140 k = istart, istop
1250*
1251* Catch up on column & border work
1252*
1253 nr = min( 3, i-k+1 )
1254 v2 = work( vecsidx+( k-1 )*3+1 )
1255 v3 = work( vecsidx+( k-1 )*3+2 )
1256 t1 = work( vecsidx+( k-1 )*3+3 )
1257 IF( k.LT.istop ) THEN
1258*
1259* Do some work so next step is ready...
1260*
1261 t2 = t1*v2
1262 t3 = t1*v3
1263 CALL dlaref( 'Col', a, lda, .false., z, ldz,
1264 $ .false., icol1, icol1, istart,
1265 $ istop, min( istart+1, i )-k+irow1,
1266 $ irow1, liloz, lihiz,
1267 $ work( vecsidx+1 ), v2, v3, t1, t2,
1268 $ t3 )
1269 irow1 = irow1 + 1
1270 icol1 = icol1 + 1
1271 ELSE
1272 IF( ( nr.EQ.3 ) .AND. ( mod( k-1,
1273 $ hbl ).LT.hbl-2 ) ) THEN
1274 t2 = t1*v2
1275 t3 = t1*v3
1276 CALL dlaref( 'Row', a, lda, .false., z, ldz,
1277 $ .false., irow1, irow1, istart,
1278 $ istop, icol1, min( min( k2( ki )
1279 $ +1, i-1 ), i2 )-k+icol1, liloz,
1280 $ lihiz, work( vecsidx+1 ), v2,
1281 $ v3, t1, t2, t3 )
1282 END IF
1283 END IF
1284 140 CONTINUE
1285 END IF
1286*
1287* Send SMALLA back again.
1288*
1289 k = istart
1290 modkm1 = mod( k-1, hbl )
1291 IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN
1292 IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN
1293*
1294* Copy 6 elements to global A(K-1:K+4,K-1:K+4)
1295*
1296 CALL infog2l( k+2, k+2, desca, nprow, npcol, myrow,
1297 $ mycol, irow1, icol1, itmp1, itmp2 )
1298 CALL pdlacp3( min( 6, n-k+2 ), k-1, a, desca,
1299 $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
1300 $ 1 )
1301*
1302 END IF
1303 IF( modkm1.EQ.hbl-1 ) THEN
1304*
1305* Copy 6 elements to global A(K-2:K+3,K-2:K+3)
1306*
1307 CALL infog2l( k+1, k+1, desca, nprow, npcol, myrow,
1308 $ mycol, irow1, icol1, itmp1, itmp2 )
1309 CALL pdlacp3( min( 6, n-k+3 ), k-2, a, desca,
1310 $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
1311 $ 1 )
1312 END IF
1313 END IF
1314*
1315 150 CONTINUE
1316*
1317* Now start major set of block ROW reflections
1318*
1319 DO 160 ki = 1, ibulge
1320 IF( ( myrow.NE.icurrow( ki ) ) .AND.
1321 $ ( down.NE.icurrow( ki ) ) )GO TO 160
1322 istart = max( k1( ki ), m )
1323 istop = min( k2( ki ), i-1 )
1324*
1325 IF( ( istop.GT.istart ) .AND.
1326 $ ( mod( istart-1, hbl ).LT.hbl-2 ) .AND.
1327 $ ( icurrow( ki ).EQ.myrow ) ) THEN
1328 irow1 = min( k2( ki )+1, i-1 ) + 1
1329 CALL infog1l( irow1, hbl, npcol, mycol, desca(csrc_),
1330 $ itmp1, itmp2 )
1331 itmp2 = numroc( i2, hbl, mycol, desca(csrc_), npcol )
1332 ii = krow( ki )
1333 CALL dlaref( 'Row', a, lda, wantz, z, ldz, .true., ii,
1334 $ ii, istart, istop, itmp1, itmp2, liloz,
1335 $ lihiz, work( vecsidx+1 ), v2, v3, t1, t2,
1336 $ t3 )
1337 END IF
1338 160 CONTINUE
1339*
1340 DO 180 ki = 1, ibulge
1341 IF( krow( ki ).GT.kp2row( ki ) )
1342 $ GO TO 180
1343 IF( ( myrow.NE.icurrow( ki ) ) .AND.
1344 $ ( down.NE.icurrow( ki ) ) )GO TO 180
1345 istart = max( k1( ki ), m )
1346 istop = min( k2( ki ), i-1 )
1347 IF( ( istart.EQ.istop ) .OR.
1348 $ ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
1349 $ ( icurrow( ki ).NE.myrow ) ) THEN
1350 DO 170 k = istart, istop
1351 v2 = work( vecsidx+( k-1 )*3+1 )
1352 v3 = work( vecsidx+( k-1 )*3+2 )
1353 t1 = work( vecsidx+( k-1 )*3+3 )
1354 nr = min( 3, i-k+1 )
1355 IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
1356 $ kp2row( ki ) ) ) THEN
1357 IF( ( k.LT.istop ) .AND.
1358 $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1359 itmp1 = min( k2( ki )+1, i-1 ) + 1
1360 ELSE
1361 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1362 itmp1 = min( k2( ki )+1, i-1 ) + 1
1363 END IF
1364 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1365 itmp1 = min( k+4, i2 ) + 1
1366 END IF
1367 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1368 itmp1 = min( k+3, i2 ) + 1
1369 END IF
1370 END IF
1371*
1372* Find local coor of rows K through K+2
1373*
1374 irow1 = krow( ki )
1375 irow2 = kp2row( ki )
1376 CALL infog1l( itmp1, hbl, npcol, mycol,
1377 $ desca(csrc_), icol1, icol2 )
1378 icol2 = numroc(i2,hbl,mycol,desca(csrc_),npcol )
1379 IF( ( mod( k-1, hbl ).LT.hbl-2 ) .OR.
1380 $ ( nprow.EQ.1 ) ) THEN
1381 t2 = t1*v2
1382 t3 = t1*v3
1383 CALL dlaref( 'Row', a, lda, wantz, z, ldz,
1384 $ .false., irow1, irow1, istart,
1385 $ istop, icol1, icol2, liloz,
1386 $ lihiz, work( vecsidx+1 ), v2,
1387 $ v3, t1, t2, t3 )
1388 END IF
1389 IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1390 $ ( nprow.GT.1 ) ) THEN
1391 IF( irow1.EQ.irow2 ) THEN
1392 CALL dgesd2d( contxt, 1, icol2-icol1+1,
1393 $ a( ( icol1-1 )*lda+irow2 ),
1394 $ lda, up, mycol )
1395 END IF
1396 END IF
1397 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1398 $ ( nprow.GT.1 ) ) THEN
1399 IF( irow1.EQ.irow2 ) THEN
1400 CALL dgesd2d( contxt, 1, icol2-icol1+1,
1401 $ a( ( icol1-1 )*lda+irow1 ),
1402 $ lda, down, mycol )
1403 END IF
1404 END IF
1405 END IF
1406 170 CONTINUE
1407 END IF
1408 180 CONTINUE
1409*
1410 DO 220 ki = 1, ibulge
1411 IF( krow( ki ).GT.kp2row( ki ) )
1412 $ GO TO 220
1413 IF( ( myrow.NE.icurrow( ki ) ) .AND.
1414 $ ( down.NE.icurrow( ki ) ) )GO TO 220
1415 istart = max( k1( ki ), m )
1416 istop = min( k2( ki ), i-1 )
1417 IF( ( istart.EQ.istop ) .OR.
1418 $ ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
1419 $ ( icurrow( ki ).NE.myrow ) ) THEN
1420 DO 210 k = istart, istop
1421 v2 = work( vecsidx+( k-1 )*3+1 )
1422 v3 = work( vecsidx+( k-1 )*3+2 )
1423 t1 = work( vecsidx+( k-1 )*3+3 )
1424 nr = min( 3, i-k+1 )
1425 IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
1426 $ kp2row( ki ) ) ) THEN
1427 IF( ( k.LT.istop ) .AND.
1428 $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1429 itmp1 = min( k2( ki )+1, i-1 ) + 1
1430 ELSE
1431 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1432 itmp1 = min( k2( ki )+1, i-1 ) + 1
1433 END IF
1434 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1435 itmp1 = min( k+4, i2 ) + 1
1436 END IF
1437 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1438 itmp1 = min( k+3, i2 ) + 1
1439 END IF
1440 END IF
1441*
1442 irow1 = krow( ki ) + k - istart
1443 irow2 = kp2row( ki ) + k - istart
1444 CALL infog1l( itmp1, hbl, npcol, mycol,
1445 $ desca(csrc_),icol1, icol2 )
1446 icol2 = numroc(i2,hbl,mycol,desca(csrc_),npcol )
1447 IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1448 $ ( nprow.GT.1 ) ) THEN
1449 IF( irow1.NE.irow2 ) THEN
1450 CALL dgerv2d( contxt, 1, icol2-icol1+1,
1451 $ work( irbuf+1 ), 1, down,
1452 $ mycol )
1453 t2 = t1*v2
1454 t3 = t1*v3
1455 DO 190 j = icol1, icol2
1456 sum = a( ( j-1 )*lda+irow1 ) +
1457 $ v2*a( ( j-1 )*lda+irow1+1 ) +
1458 $ v3*work( irbuf+j-icol1+1 )
1459 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
1460 $ lda+irow1 ) - sum*t1
1461 a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*
1462 $ lda+irow1+1 ) - sum*t2
1463 work( irbuf+j-icol1+1 ) = work( irbuf+
1464 $ j-icol1+1 ) - sum*t3
1465 190 CONTINUE
1466 CALL dgesd2d( contxt, 1, icol2-icol1+1,
1467 $ work( irbuf+1 ), 1, down,
1468 $ mycol )
1469 END IF
1470 END IF
1471 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1472 $ ( nprow.GT.1 ) ) THEN
1473 IF( irow1.NE.irow2 ) THEN
1474 CALL dgerv2d( contxt, 1, icol2-icol1+1,
1475 $ work( irbuf+1 ), 1, up,
1476 $ mycol )
1477 t2 = t1*v2
1478 t3 = t1*v3
1479 DO 200 j = icol1, icol2
1480 sum = work( irbuf+j-icol1+1 ) +
1481 $ v2*a( ( j-1 )*lda+irow1 ) +
1482 $ v3*a( ( j-1 )*lda+irow1+1 )
1483 work( irbuf+j-icol1+1 ) = work( irbuf+
1484 $ j-icol1+1 ) - sum*t1
1485 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
1486 $ lda+irow1 ) - sum*t2
1487 a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*
1488 $ lda+irow1+1 ) - sum*t3
1489 200 CONTINUE
1490 CALL dgesd2d( contxt, 1, icol2-icol1+1,
1491 $ work( irbuf+1 ), 1, up,
1492 $ mycol )
1493 END IF
1494 END IF
1495 END IF
1496 210 CONTINUE
1497 END IF
1498 220 CONTINUE
1499*
1500 DO 240 ki = 1, ibulge
1501 IF( krow( ki ).GT.kp2row( ki ) )
1502 $ GO TO 240
1503 IF( ( myrow.NE.icurrow( ki ) ) .AND.
1504 $ ( down.NE.icurrow( ki ) ) )GO TO 240
1505 istart = max( k1( ki ), m )
1506 istop = min( k2( ki ), i-1 )
1507 IF( ( istart.EQ.istop ) .OR.
1508 $ ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
1509 $ ( icurrow( ki ).NE.myrow ) ) THEN
1510 DO 230 k = istart, istop
1511 v2 = work( vecsidx+( k-1 )*3+1 )
1512 v3 = work( vecsidx+( k-1 )*3+2 )
1513 t1 = work( vecsidx+( k-1 )*3+3 )
1514 nr = min( 3, i-k+1 )
1515 IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
1516 $ kp2row( ki ) ) ) THEN
1517 IF( ( k.LT.istop ) .AND.
1518 $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1519 itmp1 = min( k2( ki )+1, i-1 ) + 1
1520 ELSE
1521 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1522 itmp1 = min( k2( ki )+1, i-1 ) + 1
1523 END IF
1524 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1525 itmp1 = min( k+4, i2 ) + 1
1526 END IF
1527 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1528 itmp1 = min( k+3, i2 ) + 1
1529 END IF
1530 END IF
1531*
1532 irow1 = krow( ki ) + k - istart
1533 irow2 = kp2row( ki ) + k - istart
1534 CALL infog1l( itmp1, hbl, npcol, mycol,
1535 $ desca(csrc_), icol1, icol2 )
1536 icol2 = numroc(i2,hbl,mycol,desca(csrc_),npcol )
1537 IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1538 $ ( nprow.GT.1 ) ) THEN
1539 IF( irow1.EQ.irow2 ) THEN
1540 CALL dgerv2d( contxt, 1, icol2-icol1+1,
1541 $ a( ( icol1-1 )*lda+irow2 ),
1542 $ lda, up, mycol )
1543 END IF
1544 END IF
1545 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1546 $ ( nprow.GT.1 ) ) THEN
1547 IF( irow1.EQ.irow2 ) THEN
1548 CALL dgerv2d( contxt, 1, icol2-icol1+1,
1549 $ a( ( icol1-1 )*lda+irow1 ),
1550 $ lda, down, mycol )
1551 END IF
1552 END IF
1553 END IF
1554 230 CONTINUE
1555 END IF
1556 240 CONTINUE
1557 250 CONTINUE
1558*
1559* Now start major set of block COL reflections
1560*
1561 DO 260 ki = 1, ibulge
1562 IF( ( mycol.NE.icurcol( ki ) ) .AND.
1563 $ ( right.NE.icurcol( ki ) ) )GO TO 260
1564 istart = max( k1( ki ), m )
1565 istop = min( k2( ki ), i-1 )
1566*
1567 IF( ( ( mod( istart-1, hbl ).LT.hbl-2 ) .OR. ( npcol.EQ.
1568 $ 1 ) ) .AND. ( icurcol( ki ).EQ.mycol ) .AND.
1569 $ ( i-istop+1.GE.3 ) ) THEN
1570 k = istart
1571 IF( ( k.LT.istop ) .AND. ( mod( k-1,
1572 $ hbl ).LT.hbl-2 ) ) THEN
1573 itmp1 = min( istart+1, i ) - 1
1574 ELSE
1575 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1576 itmp1 = min( k+3, i )
1577 END IF
1578 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1579 itmp1 = max( i1, k-1 ) - 1
1580 END IF
1581 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1582 itmp1 = max( i1, k-2 ) - 1
1583 END IF
1584 END IF
1585*
1586 icol1 = kcol( ki )
1587 CALL infog1l( i1, hbl, nprow, myrow, desca(rsrc_),
1588 $ irow1, irow2 )
1589 irow2 = numroc( itmp1, hbl, myrow,desca(rsrc_),nprow )
1590 IF( irow1.LE.irow2 ) THEN
1591 itmp2 = irow2
1592 ELSE
1593 itmp2 = -1
1594 END IF
1595 CALL dlaref( 'Col', a, lda, wantz, z, ldz, .true.,
1596 $ icol1, icol1, istart, istop, irow1,
1597 $ irow2, liloz, lihiz, work( vecsidx+1 ),
1598 $ v2, v3, t1, t2, t3 )
1599 k = istop
1600 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1601*
1602* Do from ITMP1+1 to MIN(K+3,I)
1603*
1604 IF( mod( k-1, hbl ).LT.hbl-3 ) THEN
1605 irow1 = itmp2 + 1
1606 IF( mod( ( itmp1 / hbl ), nprow ).EQ.myrow )
1607 $ THEN
1608 IF( itmp2.GT.0 ) THEN
1609 irow2 = itmp2 + min( k+3, i ) - itmp1
1610 ELSE
1611 irow2 = irow1 - 1
1612 END IF
1613 ELSE
1614 irow2 = irow1 - 1
1615 END IF
1616 ELSE
1617 CALL infog1l( itmp1+1, hbl, nprow, myrow,
1618 $ desca(rsrc_),irow1, irow2 )
1619 irow2 = numroc( min( k+3, i ), hbl, myrow,
1620 $ desca(rsrc_), nprow )
1621 END IF
1622 v2 = work( vecsidx+( k-1 )*3+1 )
1623 v3 = work( vecsidx+( k-1 )*3+2 )
1624 t1 = work( vecsidx+( k-1 )*3+3 )
1625 t2 = t1*v2
1626 t3 = t1*v3
1627 icol1 = kcol( ki ) + istop - istart
1628 CALL dlaref( 'Col', a, lda, .false., z, ldz,
1629 $ .false., icol1, icol1, istart, istop,
1630 $ irow1, irow2, liloz, lihiz,
1631 $ work( vecsidx+1 ), v2, v3, t1, t2,
1632 $ t3 )
1633 END IF
1634 END IF
1635 260 CONTINUE
1636*
1637 DO 320 ki = 1, ibulge
1638 IF( kcol( ki ).GT.kp2col( ki ) )
1639 $ GO TO 320
1640 IF( ( mycol.NE.icurcol( ki ) ) .AND.
1641 $ ( right.NE.icurcol( ki ) ) )GO TO 320
1642 istart = max( k1( ki ), m )
1643 istop = min( k2( ki ), i-1 )
1644 IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
1645*
1646* INFO is found in a buffer
1647*
1648 ispec = 1
1649 ELSE
1650*
1651* All INFO is local
1652*
1653 ispec = 0
1654 END IF
1655*
1656 DO 310 k = istart, istop
1657*
1658 v2 = work( vecsidx+( k-1 )*3+1 )
1659 v3 = work( vecsidx+( k-1 )*3+2 )
1660 t1 = work( vecsidx+( k-1 )*3+3 )
1661 nr = min( 3, i-k+1 )
1662 IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )
1663 $ THEN
1664*
1665 IF( ( k.LT.istop ) .AND.
1666 $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1667 itmp1 = min( istart+1, i ) - 1
1668 ELSE
1669 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1670 itmp1 = min( k+3, i )
1671 END IF
1672 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1673 itmp1 = max( i1, k-1 ) - 1
1674 END IF
1675 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1676 itmp1 = max( i1, k-2 ) - 1
1677 END IF
1678 END IF
1679 icol1 = kcol( ki ) + k - istart
1680 icol2 = kp2col( ki ) + k - istart
1681 CALL infog1l( i1, hbl, nprow, myrow, desca(rsrc_),
1682 $ irow1, irow2 )
1683 irow2 = numroc( itmp1, hbl, myrow, desca(rsrc_),
1684 $ nprow )
1685 IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1686 $ ( npcol.GT.1 ) ) THEN
1687 IF( icol1.EQ.icol2 ) THEN
1688 CALL dgesd2d( contxt, irow2-irow1+1, 1,
1689 $ a( ( icol1-1 )*lda+irow1 ),
1690 $ lda, myrow, left )
1691 CALL dgerv2d( contxt, irow2-irow1+1, 1,
1692 $ a( ( icol1-1 )*lda+irow1 ),
1693 $ lda, myrow, left )
1694 ELSE
1695 CALL dgerv2d( contxt, irow2-irow1+1, 1,
1696 $ work( icbuf+1 ), irow2-irow1+1,
1697 $ myrow, right )
1698 t2 = t1*v2
1699 t3 = t1*v3
1700 DO 270 j = irow1, irow2
1701 sum = a( ( icol1-1 )*lda+j ) +
1702 $ v2*a( icol1*lda+j ) +
1703 $ v3*work( icbuf+j-irow1+1 )
1704 a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
1705 $ lda+j ) - sum*t1
1706 a( icol1*lda+j ) = a( icol1*lda+j ) -
1707 $ sum*t2
1708 work( icbuf+j-irow1+1 ) = work( icbuf+j-
1709 $ irow1+1 ) - sum*t3
1710 270 CONTINUE
1711 CALL dgesd2d( contxt, irow2-irow1+1, 1,
1712 $ work( icbuf+1 ), irow2-irow1+1,
1713 $ myrow, right )
1714 END IF
1715 END IF
1716 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1717 $ ( npcol.GT.1 ) ) THEN
1718 IF( icol1.EQ.icol2 ) THEN
1719 CALL dgesd2d( contxt, irow2-irow1+1, 1,
1720 $ a( ( icol1-1 )*lda+irow1 ),
1721 $ lda, myrow, right )
1722 CALL dgerv2d( contxt, irow2-irow1+1, 1,
1723 $ a( ( icol1-1 )*lda+irow1 ),
1724 $ lda, myrow, right )
1725 ELSE
1726 CALL dgerv2d( contxt, irow2-irow1+1, 1,
1727 $ work( icbuf+1 ), irow2-irow1+1,
1728 $ myrow, left )
1729 t2 = t1*v2
1730 t3 = t1*v3
1731 DO 280 j = irow1, irow2
1732 sum = work( icbuf+j-irow1+1 ) +
1733 $ v2*a( ( icol1-1 )*lda+j ) +
1734 $ v3*a( icol1*lda+j )
1735 work( icbuf+j-irow1+1 ) = work( icbuf+j-
1736 $ irow1+1 ) - sum*t1
1737 a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
1738 $ lda+j ) - sum*t2
1739 a( icol1*lda+j ) = a( icol1*lda+j ) -
1740 $ sum*t3
1741 280 CONTINUE
1742 CALL dgesd2d( contxt, irow2-irow1+1, 1,
1743 $ work( icbuf+1 ), irow2-irow1+1,
1744 $ myrow, left )
1745 END IF
1746 END IF
1747*
1748* If we want Z and we haven't already done any Z
1749 IF( ( wantz ) .AND. ( mod( k-1,
1750 $ hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN
1751*
1752* Accumulate transformations in the matrix Z
1753*
1754 irow1 = liloz
1755 irow2 = lihiz
1756 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1757 IF( icol1.EQ.icol2 ) THEN
1758 CALL dgesd2d( contxt, irow2-irow1+1, 1,
1759 $ z( ( icol1-1 )*ldz+irow1 ),
1760 $ ldz, myrow, left )
1761 CALL dgerv2d( contxt, irow2-irow1+1, 1,
1762 $ z( ( icol1-1 )*ldz+irow1 ),
1763 $ ldz, myrow, left )
1764 ELSE
1765 CALL dgerv2d( contxt, irow2-irow1+1, 1,
1766 $ work( icbuf+1 ),
1767 $ irow2-irow1+1, myrow,
1768 $ right )
1769 t2 = t1*v2
1770 t3 = t1*v3
1771 icol1 = ( icol1-1 )*ldz
1772 DO 290 j = irow1, irow2
1773 sum = z( icol1+j ) +
1774 $ v2*z( icol1+j+ldz ) +
1775 $ v3*work( icbuf+j-irow1+1 )
1776 z( j+icol1 ) = z( j+icol1 ) - sum*t1
1777 z( j+icol1+ldz ) = z( j+icol1+ldz ) -
1778 $ sum*t2
1779 work( icbuf+j-irow1+1 ) = work( icbuf+
1780 $ j-irow1+1 ) - sum*t3
1781 290 CONTINUE
1782 CALL dgesd2d( contxt, irow2-irow1+1, 1,
1783 $ work( icbuf+1 ),
1784 $ irow2-irow1+1, myrow,
1785 $ right )
1786 END IF
1787 END IF
1788 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1789 IF( icol1.EQ.icol2 ) THEN
1790 CALL dgesd2d( contxt, irow2-irow1+1, 1,
1791 $ z( ( icol1-1 )*ldz+irow1 ),
1792 $ ldz, myrow, right )
1793 CALL dgerv2d( contxt, irow2-irow1+1, 1,
1794 $ z( ( icol1-1 )*ldz+irow1 ),
1795 $ ldz, myrow, right )
1796 ELSE
1797 CALL dgerv2d( contxt, irow2-irow1+1, 1,
1798 $ work( icbuf+1 ),
1799 $ irow2-irow1+1, myrow, left )
1800 t2 = t1*v2
1801 t3 = t1*v3
1802 icol1 = ( icol1-1 )*ldz
1803 DO 300 j = irow1, irow2
1804 sum = work( icbuf+j-irow1+1 ) +
1805 $ v2*z( j+icol1 ) +
1806 $ v3*z( j+icol1+ldz )
1807 work( icbuf+j-irow1+1 ) = work( icbuf+
1808 $ j-irow1+1 ) - sum*t1
1809 z( j+icol1 ) = z( j+icol1 ) - sum*t2
1810 z( j+icol1+ldz ) = z( j+icol1+ldz ) -
1811 $ sum*t3
1812 300 CONTINUE
1813 CALL dgesd2d( contxt, irow2-irow1+1, 1,
1814 $ work( icbuf+1 ),
1815 $ irow2-irow1+1, myrow, left )
1816 END IF
1817 END IF
1818 END IF
1819 IF( icurcol( ki ).EQ.mycol ) THEN
1820 IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) ) THEN
1821 localk2( ki ) = localk2( ki ) + 1
1822 END IF
1823 ELSE
1824 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1825 $ ( icurcol( ki ).EQ.right ) ) THEN
1826 IF( k.GT.m ) THEN
1827 localk2( ki ) = localk2( ki ) + 2
1828 ELSE
1829 localk2( ki ) = localk2( ki ) + 1
1830 END IF
1831 END IF
1832 IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1833 $ ( i-k.EQ.2 ) .AND. ( icurcol( ki ).EQ.
1834 $ right ) ) THEN
1835 localk2( ki ) = localk2( ki ) + 2
1836 END IF
1837 END IF
1838 END IF
1839 310 CONTINUE
1840 320 CONTINUE
1841*
1842* Column work done
1843*
1844 330 CONTINUE
1845*
1846* Now do NR=2 work
1847*
1848 DO 410 ki = 1, ibulge
1849 istart = max( k1( ki ), m )
1850 istop = min( k2( ki ), i-1 )
1851 IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
1852*
1853* INFO is found in a buffer
1854*
1855 ispec = 1
1856 ELSE
1857*
1858* All INFO is local
1859*
1860 ispec = 0
1861 END IF
1862*
1863 DO 400 k = istart, istop
1864*
1865 v2 = work( vecsidx+( k-1 )*3+1 )
1866 v3 = work( vecsidx+( k-1 )*3+2 )
1867 t1 = work( vecsidx+( k-1 )*3+3 )
1868 nr = min( 3, i-k+1 )
1869 IF( nr.EQ.2 ) THEN
1870 IF ( icurrow( ki ).EQ.myrow ) THEN
1871 t2 = t1*v2
1872 END IF
1873 IF ( icurcol( ki ).EQ.mycol ) THEN
1874 t2 = t1*v2
1875 END IF
1876*
1877* Apply G from the left to transform the rows of the matrix
1878* in columns K to I2.
1879*
1880 CALL infog1l( k, hbl, npcol, mycol, desca(csrc_),
1881 $ liloh,lihih )
1882 lihih = numroc( i2, hbl, mycol, desca(csrc_),npcol)
1883 CALL infog1l( 1, hbl, nprow, myrow, desca(rsrc_),
1884 $ itmp2,itmp1 )
1885 itmp1 = numroc( k+1,hbl, myrow,desca(rsrc_),nprow )
1886 IF( icurrow( ki ).EQ.myrow ) THEN
1887 IF( ( ispec.EQ.0 ) .OR. ( nprow.EQ.1 ) .OR.
1888 $ ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
1889 itmp1 = itmp1 - 1
1890 DO 340 j = ( liloh-1 )*lda,
1891 $ ( lihih-1 )*lda, lda
1892 sum = a( itmp1+j ) + v2*a( itmp1+1+j )
1893 a( itmp1+j ) = a( itmp1+j ) - sum*t1
1894 a( itmp1+1+j ) = a( itmp1+1+j ) - sum*t2
1895 340 CONTINUE
1896 ELSE
1897 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1898 CALL dgerv2d( contxt, 1, lihih-liloh+1,
1899 $ work( irbuf+1 ), 1, up,
1900 $ mycol )
1901 DO 350 j = liloh, lihih
1902 sum = work( irbuf+j-liloh+1 ) +
1903 $ v2*a( ( j-1 )*lda+itmp1 )
1904 work( irbuf+j-liloh+1 ) = work( irbuf+
1905 $ j-liloh+1 ) - sum*t1
1906 a( ( j-1 )*lda+itmp1 ) = a( ( j-1 )*
1907 $ lda+itmp1 ) - sum*t2
1908 350 CONTINUE
1909 CALL dgesd2d( contxt, 1, lihih-liloh+1,
1910 $ work( irbuf+1 ), 1, up,
1911 $ mycol )
1912 END IF
1913 END IF
1914 ELSE
1915 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1916 $ ( icurrow( ki ).EQ.down ) ) THEN
1917 CALL dgesd2d( contxt, 1, lihih-liloh+1,
1918 $ a( ( liloh-1 )*lda+itmp1 ),
1919 $ lda, down, mycol )
1920 CALL dgerv2d( contxt, 1, lihih-liloh+1,
1921 $ a( ( liloh-1 )*lda+itmp1 ),
1922 $ lda, down, mycol )
1923 END IF
1924 END IF
1925*
1926* Apply G from the right to transform the columns of the
1927* matrix in rows I1 to MIN(K+3,I).
1928*
1929 CALL infog1l( i1, hbl, nprow, myrow, desca(rsrc_),
1930 $ liloh, lihih )
1931 lihih = numroc( i, hbl, myrow, desca(rsrc_),nprow )
1932*
1933 IF( icurcol( ki ).EQ.mycol ) THEN
1934* LOCAL A(LILOZ:LIHIZ,LOCALK2:LOCALK2+2)
1935 IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.
1936 $ ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
1937 CALL infog1l( k, hbl, npcol, mycol,
1938 $ desca(csrc_), itmp1,itmp2 )
1939 itmp2 = numroc(k+1,hbl,mycol,desca(csrc_),
1940 $ npcol )
1941 DO 360 j = liloh, lihih
1942 sum = a( ( itmp1-1 )*lda+j ) +
1943 $ v2*a( itmp1*lda+j )
1944 a( ( itmp1-1 )*lda+j ) = a( ( itmp1-1 )*
1945 $ lda+j ) - sum*t1
1946 a( itmp1*lda+j ) = a( itmp1*lda+j ) -
1947 $ sum*t2
1948 360 CONTINUE
1949 ELSE
1950 itmp1 = localk2( ki )
1951 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1952 CALL dgerv2d( contxt, lihih-liloh+1, 1,
1953 $ work( icbuf+1 ),
1954 $ lihih-liloh+1, myrow, left )
1955 DO 370 j = liloh, lihih
1956 sum = work( icbuf+j ) +
1957 $ v2*a( ( itmp1-1 )*lda+j )
1958 work( icbuf+j ) = work( icbuf+j ) -
1959 $ sum*t1
1960 a( ( itmp1-1 )*lda+j )
1961 $ = a( ( itmp1-1 )*lda+j ) - sum*t2
1962 370 CONTINUE
1963 CALL dgesd2d( contxt, lihih-liloh+1, 1,
1964 $ work( icbuf+1 ),
1965 $ lihih-liloh+1, myrow, left )
1966 END IF
1967 END IF
1968 ELSE
1969 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1970 $ ( icurcol( ki ).EQ.right ) ) THEN
1971 itmp1 = kcol( ki )
1972 CALL dgesd2d( contxt, lihih-liloh+1, 1,
1973 $ a( ( itmp1-1 )*lda+liloh ),
1974 $ lda, myrow, right )
1975 CALL infog1l( k, hbl, npcol, mycol,
1976 $ desca(csrc_), itmp1, itmp2 )
1977 itmp2 = numroc( k+1, hbl, mycol,
1978 $ desca(csrc_), npcol )
1979 CALL dgerv2d( contxt, lihih-liloh+1, 1,
1980 $ a( ( itmp1-1 )*lda+liloh ),
1981 $ lda, myrow, right )
1982 END IF
1983 END IF
1984*
1985 IF( wantz ) THEN
1986*
1987* Accumulate transformations in the matrix Z
1988*
1989 IF( icurcol( ki ).EQ.mycol ) THEN
1990* LOCAL Z(LILOZ:LIHIZ,LOCALK2:LOCALK2+2)
1991 IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.
1992 $ ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
1993 itmp1 = kcol( ki ) + k - istart
1994 itmp1 = ( itmp1-1 )*ldz
1995 DO 380 j = liloz, lihiz
1996 sum = z( j+itmp1 ) +
1997 $ v2*z( j+itmp1+ldz )
1998 z( j+itmp1 ) = z( j+itmp1 ) - sum*t1
1999 z( j+itmp1+ldz ) = z( j+itmp1+ldz ) -
2000 $ sum*t2
2001 380 CONTINUE
2002 localk2( ki ) = localk2( ki ) + 1
2003 ELSE
2004 itmp1 = localk2( ki )
2005* IF WE ACTUALLY OWN COLUMN K
2006 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2007 CALL dgerv2d( contxt, lihiz-liloz+1, 1,
2008 $ work( icbuf+1 ), ldz,
2009 $ myrow, left )
2010 itmp1 = ( itmp1-1 )*ldz
2011 DO 390 j = liloz, lihiz
2012 sum = work( icbuf+j ) +
2013 $ v2*z( j+itmp1 )
2014 work( icbuf+j ) = work( icbuf+j ) -
2015 $ sum*t1
2016 z( j+itmp1 ) = z( j+itmp1 ) - sum*t2
2017 390 CONTINUE
2018 CALL dgesd2d( contxt, lihiz-liloz+1, 1,
2019 $ work( icbuf+1 ), ldz,
2020 $ myrow, left )
2021 localk2( ki ) = localk2( ki ) + 1
2022 END IF
2023 END IF
2024 ELSE
2025*
2026* NO WORK BUT NEED TO UPDATE ANYWAY????
2027*
2028 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
2029 $ ( icurcol( ki ).EQ.right ) ) THEN
2030 itmp1 = kcol( ki )
2031 itmp1 = ( itmp1-1 )*ldz
2032 CALL dgesd2d( contxt, lihiz-liloz+1, 1,
2033 $ z( liloz+itmp1 ), ldz,
2034 $ myrow, right )
2035 CALL dgerv2d( contxt, lihiz-liloz+1, 1,
2036 $ z( liloz+itmp1 ), ldz,
2037 $ myrow, right )
2038 localk2( ki ) = localk2( ki ) + 1
2039 END IF
2040 END IF
2041 END IF
2042 END IF
2043 400 CONTINUE
2044*
2045* Adjust local information for this bulge
2046*
2047 IF( nprow.EQ.1 ) THEN
2048 krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1
2049 kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1
2050 END IF
2051 IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.
2052 $ ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )
2053 $ THEN
2054 krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1
2055 END IF
2056 IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.
2057 $ ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )
2058 $ THEN
2059 kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1
2060 END IF
2061 IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.
2062 $ ( ( myrow.EQ.icurrow( ki ) ) .OR. ( down.EQ.
2063 $ icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN
2064 CALL infog1l( k2( ki )+1, hbl, nprow, myrow,
2065 $ desca(rsrc_), krow( ki ), itmp2 )
2066 itmp2 = numroc( n, hbl, myrow, desca(rsrc_), nprow )
2067 END IF
2068 IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.
2069 $ ( ( myrow.EQ.icurrow( ki ) ) .OR. ( up.EQ.
2070 $ icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN
2071 CALL infog1l( 1, hbl, nprow, myrow, desca(rsrc_),
2072 $ itmp2,kp2row( ki ) )
2073 kp2row( ki ) = numroc( k2( ki )+3, hbl, myrow,
2074 $ desca(rsrc_), nprow )
2075 END IF
2076 IF( npcol.EQ.1 ) THEN
2077 kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1
2078 kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1
2079 END IF
2080 IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.
2081 $ ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )
2082 $ THEN
2083 kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1
2084 END IF
2085 IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.
2086 $ ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )
2087 $ THEN
2088 kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1
2089 END IF
2090 IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.
2091 $ ( ( mycol.EQ.icurcol( ki ) ) .OR. ( right.EQ.
2092 $ icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN
2093 CALL infog1l( k2( ki )+1, hbl, npcol, mycol,
2094 $ desca(csrc_), kcol( ki ), itmp2 )
2095 itmp2 = numroc( n, hbl, mycol, desca(csrc_), npcol )
2096 END IF
2097 IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.
2098 $ ( ( mycol.EQ.icurcol( ki ) ) .OR. ( left.EQ.
2099 $ icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN
2100 CALL infog1l( 1, hbl, npcol, mycol,desca(csrc_),itmp2,
2101 $ kp2col( ki ) )
2102 kp2col( ki ) = numroc( k2( ki )+3, hbl, mycol,
2103 $ desca(csrc_), npcol )
2104 END IF
2105 k1( ki ) = k2( ki ) + 1
2106 istop = min( k1( ki )+rotn-mod( k1( ki ), rotn ), i-2 )
2107 istop = min( istop, k1( ki )+hbl-3-
2108 $ mod( k1( ki )-1, hbl ) )
2109 istop = min( istop, i2-2 )
2110 istop = max( istop, k1( ki ) )
2111* ISTOP = MIN( ISTOP , I-1 )
2112 k2( ki ) = istop
2113 IF( k1( ki ).EQ.istop ) THEN
2114 IF( ( mod( istop-1, hbl ).EQ.hbl-2 ) .AND.
2115 $ ( i-istop.GT.1 ) ) THEN
2116*
2117* Next step switches rows & cols
2118*
2119 icurrow( ki ) = mod( icurrow( ki )+1, nprow )
2120 icurcol( ki ) = mod( icurcol( ki )+1, npcol )
2121 END IF
2122 END IF
2123 410 CONTINUE
2124 IF( k2( ibulge ).LE.i-1 )
2125 $ GO TO 40
2126 END IF
2127*
2128 420 CONTINUE
2129*
2130* Failure to converge in remaining number of iterations
2131*
2132 info = i
2133 work( 1 ) = dble( lwkopt )
2134 RETURN
2135*
2136 430 CONTINUE
2137*
2138 IF( l.EQ.i ) THEN
2139*
2140* H(I,I-1) is negligible: one eigenvalue has converged.
2141*
2142 CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow,
2143 $ icol, itmp1, itmp2 )
2144 IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
2145 wr( i ) = a( ( icol-1 )*lda+irow )
2146 ELSE
2147 wr( i ) = zero
2148 END IF
2149 wi( i ) = zero
2150 ELSE IF( l.EQ.i-1 ) THEN
2151*
2152* H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
2153*
2154 CALL pdelget( 'All', ' ', h11, a, l, l, desca )
2155 CALL pdelget( 'All', ' ', h21, a, i, l, desca )
2156 CALL pdelget( 'All', ' ', h12, a, l, i, desca )
2157 CALL pdelget( 'All', ' ', h22, a, i, i, desca )
2158 CALL dlanv2( h11, h12, h21, h22, wr( l ), wi( l ), wr( i ),
2159 $ wi( i ), cs, sn )
2160 CALL pdelset( a, l, l, desca, h11 )
2161 CALL pdelset( a, i, l, desca, h21 )
2162 CALL pdelset( a, l, i, desca, h12 )
2163 CALL pdelset( a, i, i, desca, h22 )
2164*
2165* Transform H to the standard Schur form
2166*
2167 IF( wantt ) THEN
2168 IF(i .LT. n) CALL pdrot( n-i, a, l, i+1, desca, desca( m_ ),
2169 $ a, i, i+1, desca, desca( m_ ), cs,
2170 $ sn, work( vecsidx+1 ),
2171 $ lwork-vecsidx, ierr )
2172 ltop = 1
2173 ELSE
2174 ltop = i1
2175 END IF
2176 IF (l .GT. ltop) CALL pdrot( l-ltop, a, ltop, l, desca, 1, a,
2177 $ ltop, i, desca, 1, cs, sn,
2178 $ work( vecsidx+1 ), lwork-vecsidx,
2179 $ ierr )
2180 IF( wantz ) THEN
2181 CALL pdrot( ihiz-iloz+1, z, iloz, l, descz, 1, z, iloz, i,
2182 $ descz, 1, cs, sn, work( vecsidx+1 ),
2183 $ lwork-vecsidx, ierr )
2184 END IF
2185 IF( node .NE. 0 ) THEN
2186 wr( l ) = zero
2187 wr( i ) = zero
2188 wi( l ) = zero
2189 wi( i ) = zero
2190 ENDIF
2191 ELSE
2192*
2193* Find the eigenvalues in H(L:I,L:I), L < I-1
2194*
2195 nh = i - l + 1
2196 IF( nh .LE. lds ) THEN
2197 CALL pdlaqr4( wantt, wantz, n, l, i, a, desca, wr, wi,
2198 $ iloz, ihiz, z, descz, work( s1+1 ), nh,
2199 $ work( s2+1 ), nh, work( s3+1 ), 4*lds*lds,
2200 $ info )
2201 IF( info.NE.0 ) THEN
2202 work( 1 ) = dble( lwkopt )
2203 RETURN
2204 END IF
2205 IF( node.NE.0 ) THEN
2206*
2207* Erase the eigenvalues
2208*
2209 DO 440 k = l, i
2210 wr( k ) = zero
2211 wi( k ) = zero
2212 440 CONTINUE
2213 END IF
2214 END IF
2215 END IF
2216*
2217* Decrement number of remaining iterations, and return to start of
2218* the main loop with new value of I.
2219*
2220 itn = itn - its
2221 IF( m.EQ.l-10 ) THEN
2222 i = l - 1
2223 ELSE
2224 i = m
2225 END IF
2226* I = L - 1
2227 GO TO 10
2228*
2229 450 CONTINUE
2230*
2231 IF( num.GT.1 ) THEN
2232 CALL dgsum2d( contxt, 'All', ' ', ihi-ilo+1, 1, wr(ilo), n,
2233 $ -1, -1 )
2234 CALL dgsum2d( contxt, 'All', ' ', ihi-ilo+1, 1, wi(ilo), n,
2235 $ -1, -1 )
2236 END IF
2237*
2238 work( 1 ) = dble( lwkopt )
2239 iwork( 1 ) = totit
2240 iwork( 2 ) = totsw
2241 iwork( 3 ) = totns
2242 RETURN
2243*
2244* END OF PDLAQR1
2245*
2246 END
subroutine dlaref(type, a, lda, wantz, z, ldz, block, irow1, icol1, istart, istop, itmp1, itmp2, liloz, lihiz, vecs, v2, v3, t1, t2, t3)
Definition dlaref.f:4
subroutine dlasorte(s, lds, j, out, info)
Definition dlasorte.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
double precision function pdlamch(ictxt, cmach)
Definition pdblastst.f:6769
subroutine pdelget(scope, top, alpha, a, ia, ja, desca)
Definition pdelget.f:2
subroutine pdelset(a, ia, ja, desca, alpha)
Definition pdelset.f:2
subroutine pdlabad(ictxt, small, large)
Definition pdlabad.f:2
subroutine pdlacp3(m, i, a, desca, b, ldb, ii, jj, rev)
Definition pdlacp3.f:2
recursive subroutine pdlaqr1(wantt, wantz, n, ilo, ihi, a, desca, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, ilwork, info)
Definition pdlaqr1.f:5
subroutine pdlaqr2(wantt, wantz, n, ktop, kbot, nw, a, desca, iloz, ihiz, z, descz, ns, nd, sr, si, t, ldt, v, ldv, wr, wi, work, lwork)
Definition pdlaqr2.f:4
subroutine pdlaqr4(wantt, wantz, n, ilo, ihi, a, desca, wr, wi, iloz, ihiz, z, descz, t, ldt, v, ldv, work, lwork, info)
Definition pdlaqr4.f:4
subroutine pdlasmsub(a, desca, i, l, k, smlnum, buf, lwork)
Definition pdlasmsub.f:2
subroutine pdlawil(ii, jj, m, a, desca, h44, h33, h43h34, v)
Definition pdlawil.f:2
subroutine pdrot(n, x, ix, jx, descx, incx, y, iy, jy, descy, incy, cs, sn, work, lwork, info)
Definition pdrot.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2