ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pclahqr.f
Go to the documentation of this file.
1  SUBROUTINE pclahqr( WANTT, WANTZ, N, ILO, IHI, A, DESCA, W, ILOZ,
2  $ IHIZ, Z, DESCZ, WORK, LWORK, IWORK, ILWORK,
3  $ INFO )
4 *
5 * -- ScaLAPACK routine (version 1.7.3) --
6 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7 * and University of California, Berkeley.
8 * 1.7.3: March 22, 2006
9 * modification suggested by Mark Fahey and Greg Henry
10 * 1.7.0: July 31, 2001
11 *
12 * .. Scalar Arguments ..
13  LOGICAL WANTT, WANTZ
14  INTEGER IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N
15 * ..
16 * .. Array Arguments ..
17  INTEGER DESCA( * ), DESCZ( * ), IWORK( * )
18  COMPLEX A( * ), W( * ), WORK( * ), Z( * )
19 * ..
20 *
21 * Purpose
22 * =======
23 *
24 * PCLAHQR is an auxiliary routine used to find the Schur decomposition
25 * and or eigenvalues of a matrix already in Hessenberg form from
26 * cols ILO to IHI.
27 * If Z = I, and WANTT=WANTZ=.TRUE., H gets replaced with Z'HZ,
28 * with Z'Z=I, and H in Schur form.
29 *
30 * Notes
31 * =====
32 *
33 * Each global data object is described by an associated description
34 * vector. This vector stores the information required to establish
35 * the mapping between an object element and its corresponding process
36 * and memory location.
37 *
38 * Let A be a generic term for any 2D block cyclicly distributed array.
39 * Such a global array has an associated description vector DESCA.
40 * In the following comments, the character _ should be read as
41 * "of the global array".
42 *
43 * NOTATION STORED IN EXPLANATION
44 * --------------- -------------- --------------------------------------
45 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
46 * DTYPE_A = 1.
47 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
48 * the BLACS process grid A is distribu-
49 * ted over. The context itself is glo-
50 * bal, but the handle (the integer
51 * value) may vary.
52 * M_A (global) DESCA( M_ ) The number of rows in the global
53 * array A.
54 * N_A (global) DESCA( N_ ) The number of columns in the global
55 * array A.
56 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
57 * the rows of the array.
58 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
59 * the columns of the array.
60 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
61 * row of the array A is distributed.
62 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63 * first column of the array A is
64 * distributed.
65 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66 * array. LLD_A >= MAX(1,LOCp(M_A)).
67 *
68 * Let K be the number of rows or columns of a distributed matrix,
69 * and assume that its process grid has dimension p x q.
70 * LOCp( K ) denotes the number of elements of K that a process
71 * would receive if K were distributed over the p processes of its
72 * process column.
73 * Similarly, LOCq( K ) denotes the number of elements of K that a
74 * process would receive if K were distributed over the q processes of
75 * its process row.
76 * The values of LOCp() and LOCq() may be determined via a call to the
77 * ScaLAPACK tool function, NUMROC:
78 * LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
79 * LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
80 * An upper bound for these quantities may be computed by:
81 * LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
82 * LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
83 *
84 * Arguments
85 * =========
86 *
87 * WANTT (global input) LOGICAL
88 * = .TRUE. : the full Schur form T is required;
89 * = .FALSE.: only eigenvalues are required.
90 *
91 * WANTZ (global input) LOGICAL
92 * = .TRUE. : the matrix of Schur vectors Z is required;
93 * = .FALSE.: Schur vectors are not required.
94 *
95 * N (global input) INTEGER
96 * The order of the Hessenberg matrix A (and Z if WANTZ).
97 * N >= 0.
98 *
99 * ILO (global input) INTEGER
100 * IHI (global input) INTEGER
101 * It is assumed that A is already upper quasi-triangular in
102 * rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless
103 * ILO = 1). PCLAHQR works primarily with the Hessenberg
104 * submatrix in rows and columns ILO to IHI, but applies
105 * transformations to all of H if WANTT is .TRUE..
106 * 1 <= ILO <= max(1,IHI); IHI <= N.
107 *
108 * A (global input/output) COMPLEX array, dimension
109 * (DESCA(LLD_),*)
110 * On entry, the upper Hessenberg matrix A.
111 * On exit, if WANTT is .TRUE., A is upper triangular in rows
112 * and columns ILO:IHI. If WANTT is .FALSE., the contents of
113 * A are unspecified on exit.
114 *
115 * DESCA (global and local input) INTEGER array of dimension DLEN_.
116 * The array descriptor for the distributed matrix A.
117 *
118 * W (global replicated output) COMPLEX array, dimension (N)
119 * The computed eigenvalues ILO to IHI are stored in the
120 * corresponding elements of W. If WANTT is .TRUE., the
121 * eigenvalues are stored in the same order as on the diagonal
122 * of the Schur form returned in A. A may be returned with
123 * larger diagonal blocks until the next release.
124 *
125 * ILOZ (global input) INTEGER
126 * IHIZ (global input) INTEGER
127 * Specify the rows of Z to which transformations must be
128 * applied if WANTZ is .TRUE..
129 * 1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
130 *
131 * Z (global input/output) COMPLEX array.
132 * If WANTZ is .TRUE., on entry Z must contain the current
133 * matrix Z of transformations accumulated by PCHSEQR, and on
134 * exit Z has been updated; transformations are applied only to
135 * the submatrix Z(ILOZ:IHIZ,ILO:IHI).
136 * If WANTZ is .FALSE., Z is not referenced.
137 *
138 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
139 * The array descriptor for the distributed matrix Z.
140 *
141 * WORK (local output) COMPLEX array of size LWORK
142 * (Unless LWORK=-1, in which case WORK must be at least size 1)
143 *
144 * LWORK (local input) INTEGER
145 * WORK(LWORK) is a local array and LWORK is assumed big enough
146 * so that LWORK >= 3*N +
147 * MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCq(N),
148 * 7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) +
149 * MAX( 2*N, (8*LCM(NPROW,NPCOL)+2)**2 )
150 * If LWORK=-1, then WORK(1) gets set to the above number and
151 * the code returns immediately.
152 *
153 * IWORK (global and local input) INTEGER array of size ILWORK
154 * This will hold some of the IBLK integer arrays.
155 * This is held as a place holder for a future release.
156 * Currently unreferenced.
157 *
158 * ILWORK (local input) INTEGER
159 * This will hold the size of the IWORK array.
160 * This is held as a place holder for a future release.
161 * Currently unreferenced.
162 *
163 * INFO (global output) INTEGER
164 * < 0: parameter number -INFO incorrect or inconsistent
165 * = 0: successful exit
166 * > 0: PCLAHQR failed to compute all the eigenvalues ILO to IHI
167 * in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
168 * elements i+1:ihi of W contains those eigenvalues
169 * which have been successfully computed.
170 *
171 * Logic:
172 * This algorithm is very similar to SLAHQR. Unlike SLAHQR,
173 * instead of sending one double shift through the largest
174 * unreduced submatrix, this algorithm sends multiple double shifts
175 * and spaces them apart so that there can be parallelism across
176 * several processor row/columns. Another critical difference is
177 * that this algorithm aggregrates multiple transforms together in
178 * order to apply them in a block fashion.
179 *
180 * Important Local Variables:
181 * IBLK = The maximum number of bulges that can be computed.
182 * Currently fixed. Future releases this won't be fixed.
183 * HBL = The square block size (HBL=DESCA(MB_)=DESCA(NB_))
184 * ROTN = The number of transforms to block together
185 * NBULGE = The number of bulges that will be attempted on the
186 * current submatrix.
187 * IBULGE = The current number of bulges started.
188 * K1(*),K2(*) = The current bulge loops from K1(*) to K2(*).
189 *
190 * Subroutines:
191 * From LAPACK, this routine calls:
192 * CLAHQR -> Serial QR used to determine shifts and
193 * eigenvalues
194 * CLARFG -> Determine the Householder transforms
195 *
196 * This ScaLAPACK, this routine calls:
197 * PCLACONSB -> To determine where to start each iteration
198 * CLAMSH -> Sends multiple shifts through a small
199 * submatrix to see how the consecutive
200 * subdiagonals change (if PCLACONSB indicates
201 * we can start a run in the middle)
202 * PCLAWIL -> Given the shift, get the transformation
203 * PCLACP3 -> Parallel array to local replicated array copy
204 * & back.
205 * CLAREF -> Row/column reflector applier. Core routine
206 * here.
207 * PCLASMSUB -> Finds negligible subdiagonal elements.
208 *
209 * Current Notes and/or Restrictions:
210 * 1.) This code requires the distributed block size to be square
211 * and at least six (6); unlike simpler codes like LU, this
212 * algorithm is extremely sensitive to block size. Unwise
213 * choices of too small a block size can lead to bad
214 * performance.
215 * 2.) This code requires A and Z to be distributed identically
216 * and have identical contxts. A future version may allow Z to
217 * have a different contxt to 1D row map it to all nodes (so no
218 * communication on Z is necessary.)
219 * 3.) This code does not currently block the initial transforms
220 * so that none of the rows or columns for any bulge are
221 * completed until all are started. To offset pipeline
222 * start-up it is recommended that at least 2*LCM(NPROW,NPCOL)
223 * bulges are used (if possible)
224 * 4.) The maximum number of bulges currently supported is fixed at
225 * 32. In future versions this will be limited only by the
226 * incoming WORK and IWORK array.
227 * 5.) The matrix A must be in upper Hessenberg form. If elements
228 * below the subdiagonal are nonzero, the resulting transforms
229 * may be nonsimilar. This is also true with the LAPACK
230 * routine CLAHQR.
231 * 6.) For this release, this code has only been tested for
232 * RSRC_=CSRC_=0, but it has been written for the general case.
233 * 7.) Currently, all the eigenvalues are distributed to all the
234 * nodes. Future releases will probably distribute the
235 * eigenvalues by the column partitioning.
236 * 8.) The internals of this routine are subject to change.
237 * 9.) To optimize this for your architecture, try tuning CLAREF.
238 * 10.) This code has only been tested for WANTZ = .TRUE. and may
239 * behave unpredictably for WANTZ set to .FALSE.
240 *
241 * Further Details
242 * ===============
243 *
244 * Contributed by Mark Fahey, June, 2000.
245 *
246 * =====================================================================
247 *
248 * .. Parameters ..
249  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_,
250  $ LLD_, MB_, M_, NB_, N_, RSRC_
251  parameter( block_cyclic_2d = 1, dlen_ = 9, dt_ = 1,
252  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
253  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
254  REAL RONE
255  PARAMETER ( RONE = 1.0e+0 )
256  COMPLEX ZERO, ONE
257  parameter( zero = ( 0.0e+0, 0.0e+0 ),
258  $ one = ( 1.0e+0, 0.0e+0 ) )
259  REAL CONST
260  PARAMETER ( CONST = 1.50e+0 )
261  INTEGER IBLK
262  parameter( iblk = 32 )
263 * ..
264 * .. Local Scalars ..
265  LOGICAL SKIP
266  INTEGER CONTXT, DOWN, HBL, I, I1, I2, IAFIRST, IBULGE,
267  $ icbuf, icol, icol1, icol2, idia, ierr, ii,
268  $ irbuf, irow, irow1, irow2, ispec, istart,
269  $ istartcol, istartrow, istop, isub, isup,
270  $ itermax, itmp1, itmp2, itn, its, izbuf, j,
271  $ jafirst, jblk, jj, k, ki, l, lcmrc, lda, ldz,
272  $ left, lihih, lihiz, liloh, liloz, locali1,
273  $ locali2, localk, localm, m, modkm1, mycol,
274  $ myrow, nbulge, nh, node, npcol, nprow, nq, nr,
275  $ num, nz, right, rotn, up, vecsidx
276  REAL CS, OVFL, S, SMLNUM, ULP, UNFL
277  COMPLEX CDUM, H10, H11, H22, H33, H43H34, H44, SN, SUM,
278  $ t1, t1copy, t2, t3, v1save, v2, v2save, v3,
279  $ v3save
280 * ..
281 * .. Local Arrays ..
282  INTEGER ICURCOL( IBLK ), ICURROW( IBLK ), K1( IBLK ),
283  $ K2( IBLK ), KCOL( IBLK ), KP2COL( IBLK ),
284  $ kp2row( iblk ), krow( iblk )
285  COMPLEX S1( 2*IBLK, 2*IBLK ), SMALLA( 6, 6, IBLK ),
286  $ VCOPY( 3 )
287 * ..
288 * .. External Functions ..
289  INTEGER ILCM, NUMROC
290  REAL PSLAMCH
291  EXTERNAL ilcm, numroc, pslamch
292 * ..
293 * .. External Subroutines ..
294  EXTERNAL blacs_gridinfo, igamn2d, igebr2d, igebs2d,
296  $ pclacp3, pclasmsub, pclawil, pcrot, ccopy,
297  $ cgebr2d, cgebs2d, cgerv2d, cgesd2d, cgsum2d,
298  $ clahqr2, clamsh, clanv2, claref, clarfg
299 * ..
300 * .. Intrinsic Functions ..
301 *
302  INTRINSIC abs, real, conjg, aimag, max, min, mod
303 * ..
304 * .. Statement Functions ..
305  REAL CABS1
306 * ..
307 * .. Statement Function definitions ..
308  cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) )
309 * ..
310 * .. Executable Statements ..
311 *
312  info = 0
313 *
314  itermax = 30*( ihi-ilo+1 )
315  IF( n.EQ.0 )
316  $ RETURN
317 *
318 * NODE (IAFIRST,JAFIRST) OWNS A(1,1)
319 *
320  hbl = desca( mb_ )
321  contxt = desca( ctxt_ )
322  lda = desca( lld_ )
323  iafirst = desca( rsrc_ )
324  jafirst = desca( csrc_ )
325  ldz = descz( lld_ )
326  CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
327  node = myrow*npcol + mycol
328  num = nprow*npcol
329  left = mod( mycol+npcol-1, npcol )
330  right = mod( mycol+1, npcol )
331  up = mod( myrow+nprow-1, nprow )
332  down = mod( myrow+1, nprow )
333  lcmrc = ilcm( nprow, npcol )
334  IF( ( nprow.LE.3 ) .OR. ( npcol.LE.3 ) ) THEN
335  skip = .true.
336  ELSE
337  skip = .false.
338  END IF
339 *
340 * Determine the number of columns we have so we can check workspace
341 *
342  nq = numroc( n, hbl, mycol, jafirst, npcol )
343  jj = n / hbl
344  IF( jj*hbl.LT.n )
345  $ jj = jj + 1
346  jj = 7*jj / lcmrc
347  jj = 3*n + max( 2*max( lda, ldz )+2*nq, jj )
348  jj = jj + max( 2*n, ( 8*lcmrc+2 )**2 )
349  IF( lwork.EQ.-1 ) THEN
350  work( 1 ) = jj
351  RETURN
352  END IF
353  IF( lwork.LT.jj ) THEN
354  info = -14
355  END IF
356  IF( descz( ctxt_ ).NE.desca( ctxt_ ) ) THEN
357  info = -( 1300+ctxt_ )
358  END IF
359  IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
360  info = -( 700+nb_ )
361  END IF
362  IF( descz( mb_ ).NE.descz( nb_ ) ) THEN
363  info = -( 1300+nb_ )
364  END IF
365  IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
366  info = -( 1300+mb_ )
367  END IF
368  IF( ( desca( rsrc_ ).NE.0 ) .OR. ( desca( csrc_ ).NE.0 ) ) THEN
369  info = -( 700+rsrc_ )
370  END IF
371  IF( ( descz( rsrc_ ).NE.0 ) .OR. ( descz( csrc_ ).NE.0 ) ) THEN
372  info = -( 1300+rsrc_ )
373  END IF
374  IF( ( ilo.GT.n ) .OR. ( ilo.LT.1 ) ) THEN
375  info = -4
376  END IF
377  IF( ( ihi.GT.n ) .OR. ( ihi.LT.1 ) ) THEN
378  info = -5
379  END IF
380  IF( hbl.LT.5 ) THEN
381  info = -( 700+mb_ )
382  END IF
383  CALL igamn2d( contxt, 'ALL', ' ', 1, 1, info, 1, itmp1, itmp2, -1,
384  $ -1, -1 )
385  IF( info.LT.0 ) THEN
386  CALL pxerbla( contxt, 'PCLAHQR', -info )
387  RETURN
388  END IF
389 *
390 * Set work array indices
391 *
392  vecsidx = 0
393  idia = 3*n
394  isub = 3*n
395  isup = 3*n
396  irbuf = 3*n
397  icbuf = 3*n
398  izbuf = 5*n
399 *
400 * Find a value for ROTN
401 *
402  rotn = hbl / 3
403  rotn = min( rotn, hbl-2 )
404  rotn = max( rotn, 1 )
405 *
406  IF( ilo.EQ.ihi ) THEN
407  CALL infog2l( ilo, ilo, desca, nprow, npcol, myrow, mycol,
408  $ irow, icol, ii, jj )
409  IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
410  w( ilo ) = a( ( icol-1 )*lda+irow )
411  ELSE
412  w( ilo ) = zero
413  END IF
414  RETURN
415  END IF
416 *
417  nh = ihi - ilo + 1
418  nz = ihiz - iloz + 1
419 *
420  CALL infog1l( iloz, hbl, nprow, myrow, iafirst, liloz, lihiz )
421  lihiz = numroc( ihiz, hbl, myrow, iafirst, nprow )
422 *
423 * Set machine-dependent constants for the stopping criterion.
424 * If NORM(H) <= SQRT(OVFL), overflow should not occur.
425 *
426  unfl = pslamch( contxt, 'SAFE MINIMUM' )
427  ovfl = rone / unfl
428  CALL pslabad( contxt, unfl, ovfl )
429  ulp = pslamch( contxt, 'PRECISION' )
430  smlnum = unfl*( nh / ulp )
431 *
432 * I1 and I2 are the indices of the first row and last column of H
433 * to which transformations must be applied. If eigenvalues only are
434 * being computed, I1 and I2 are set inside the main loop.
435 *
436  IF( wantt ) THEN
437  i1 = 1
438  i2 = n
439  END IF
440 *
441 * ITN is the total number of QR iterations allowed.
442 *
443  itn = itermax
444 *
445 * The main loop begins here. I is the loop index and decreases from
446 * IHI to ILO in steps of our schur block size (<=2*IBLK). Each
447 * iteration of the loop works with the active submatrix in rows
448 * and columns L to I. Eigenvalues I+1 to IHI have already
449 * converged. Either L = ILO or the global A(L,L-1) is negligible
450 * so that the matrix splits.
451 *
452  i = ihi
453  10 CONTINUE
454  l = ilo
455  IF( i.LT.ilo )
456  $ GO TO 570
457 *
458 * Perform QR iterations on rows and columns ILO to I until a
459 * submatrix of order 1 or 2 splits off at the bottom because a
460 * subdiagonal element has become negligible.
461 *
462  DO 540 its = 0, itn
463 *
464 * Look for a single small subdiagonal element.
465 *
466  CALL pclasmsub( a, desca, i, l, k, smlnum, work( irbuf+1 ),
467  $ lwork-irbuf )
468  l = k
469 *
470  IF( l.GT.ilo ) THEN
471 *
472 * H(L,L-1) is negligible
473 *
474  CALL infog2l( l, l-1, desca, nprow, npcol, myrow, mycol,
475  $ irow, icol, itmp1, itmp2 )
476  IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
477  a( ( icol-1 )*lda+irow ) = zero
478  END IF
479  work( isub+l-1 ) = zero
480  END IF
481 *
482 * Exit from loop if a submatrix of order 1 or 2 has split off.
483 *
484  IF( wantt ) THEN
485 * For Schur form, use 2x2 blocks
486  IF( l.GE.i-1 ) THEN
487  GO TO 550
488  END IF
489  ELSE
490 * If we don't want the Schur form, use bigger blocks.
491  IF( l.GE.i-( 2*iblk-1 ) ) THEN
492  GO TO 550
493  END IF
494  END IF
495 *
496 * Now the active submatrix is in rows and columns L to I. If
497 * eigenvalues only are being computed, only the active submatrix
498 * need be transformed.
499 *
500  IF( .NOT.wantt ) THEN
501  i1 = l
502  i2 = i
503  END IF
504 *
505 * Copy submatrix of size 2*JBLK and prepare to do generalized
506 * Wilkinson shift or an exceptional shift
507 *
508  jblk = min( iblk, ( ( i-l+1 ) / 2 )-1 )
509  IF( jblk.GT.lcmrc ) THEN
510 *
511 * Make sure it's divisible by LCM (we want even workloads!)
512 *
513  jblk = jblk - mod( jblk, lcmrc )
514  END IF
515  jblk = min( jblk, 2*lcmrc )
516  jblk = max( jblk, 1 )
517 *
518  CALL pclacp3( 2*jblk, i-2*jblk+1, a, desca, s1, 2*iblk, -1, -1,
519  $ 0 )
520  IF( ( its.EQ.20 .OR. its.EQ.40 ) .AND. ( jblk.GT.1 ) ) THEN
521 *
522 * Exceptional shift.
523 *
524  DO 20 ii = 2*jblk, 2, -1
525  s1( ii, ii ) = const*( cabs1( s1( ii, ii ) )+
526  $ cabs1( s1( ii, ii-1 ) ) )
527  s1( ii, ii-1 ) = zero
528  s1( ii-1, ii ) = zero
529  20 CONTINUE
530  s1( 1, 1 ) = const*cabs1( s1( 1, 1 ) )
531  ELSE
532  CALL clahqr2( .false., .false., 2*jblk, 1, 2*jblk, s1,
533  $ 2*iblk, work( irbuf+1 ), 1, 2*jblk, z, ldz,
534  $ ierr )
535 *
536 * Prepare to use Wilkinson's double shift
537 *
538  h44 = s1( 2*jblk, 2*jblk )
539  h33 = s1( 2*jblk-1, 2*jblk-1 )
540  h43h34 = s1( 2*jblk-1, 2*jblk )*s1( 2*jblk, 2*jblk-1 )
541 *
542  END IF
543 *
544 * Look for two consecutive small subdiagonal elements:
545 * PCLACONSB is the routine that does this.
546 *
547  CALL pclaconsb( a, desca, i, l, m, h44, h33, h43h34,
548  $ work( irbuf+1 ), lwork-irbuf )
549 *
550 * Double-shift QR step
551 *
552 * NBULGE is the number of bulges that will be attempted
553 *
554  istop = min( m+rotn-1-mod( m-( m / hbl )*hbl-1, rotn ), i-2 )
555  istop = min( istop, m+hbl-3-mod( m-1, hbl ) )
556  istop = min( istop, i2-2 )
557  istop = max( istop, m )
558  nbulge = ( i-1-istop ) / hbl
559 *
560 * Do not exceed maximum determined.
561 *
562  nbulge = min( nbulge, jblk )
563  IF( nbulge.GT.lcmrc ) THEN
564 *
565 * Make sure it's divisible by LCM (we want even workloads!)
566 *
567  nbulge = nbulge - mod( nbulge, lcmrc )
568  END IF
569  nbulge = max( nbulge, 1 )
570 *
571 * If we are starting in the middle because of consecutive small
572 * subdiagonal elements, we need to see how many bulges we
573 * can send through without breaking the consecutive small
574 * subdiagonal property.
575 *
576  IF( ( nbulge.GT.1 ) .AND. ( m.GT.l ) ) THEN
577 *
578 * Copy a chunk of elements from global A(M-1:,M-1:)
579 *
580  CALL infog2l( m+2, m+2, desca, nprow, npcol, myrow, mycol,
581  $ irow1, icol1, itmp1, itmp2 )
582  ii = min( 4*nbulge+2, n-m+2 )
583  CALL pclacp3( ii, m-1, a, desca, work( irbuf+1 ), ii, itmp1,
584  $ itmp2, 0 )
585  IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
586 *
587 * Find a new NBULGE based on the bulges we have.
588 *
589  CALL clamsh( s1, 2*iblk, nbulge, jblk, work( irbuf+1 ),
590  $ ii, ii, ulp )
591  IF( num.GT.1 ) THEN
592  CALL igebs2d( contxt, 'ALL', ' ', 1, 1, nbulge, 1 )
593  END IF
594  ELSE
595 *
596 * Everyone needs to receive the new NBULGE
597 *
598  CALL igebr2d( contxt, 'ALL', ' ', 1, 1, nbulge, 1, itmp1,
599  $ itmp2 )
600  END IF
601  END IF
602 *
603 * IBULGE is the number of bulges going so far
604 *
605  ibulge = 1
606 *
607 * "A" row defs : main row transforms from LOCALK to LOCALI2
608 *
609  CALL infog1l( m, hbl, npcol, mycol, jafirst, itmp1, localk )
610  localk = nq
611  CALL infog1l( 1, hbl, npcol, mycol, jafirst, icol1, locali2 )
612  locali2 = numroc( i2, hbl, mycol, jafirst, npcol )
613 *
614 * "A" col defs : main col transforms from LOCALI1 to LOCALM
615 *
616  CALL infog1l( i1, hbl, nprow, myrow, iafirst, locali1, icol1 )
617  CALL infog1l( 1, hbl, nprow, myrow, iafirst, localm, icol1 )
618  icol1 = numroc( min( m+3, i ), hbl, myrow, iafirst, nprow )
619 *
620 * Which row & column will start the bulges
621 *
622  istartrow = mod( ( m+1 ) / hbl, nprow ) + iafirst
623  istartcol = mod( ( m+1 ) / hbl, npcol ) + jafirst
624 *
625  CALL infog1l( m, hbl, nprow, myrow, iafirst, ii, itmp2 )
626  CALL infog1l( m, hbl, npcol, mycol, jafirst, jj, itmp2 )
627  CALL infog1l( 1, hbl, nprow, myrow, iafirst, istop,
628  $ kp2row( 1 ) )
629  kp2row( 1 ) = numroc( m+2, hbl, myrow, iafirst, nprow )
630  CALL infog1l( 1, hbl, npcol, mycol, jafirst, istop,
631  $ kp2col( 1 ) )
632  kp2col( 1 ) = numroc( m+2, hbl, mycol, jafirst, npcol )
633 *
634 * Set all values for bulges. All bulges are stored in
635 * intermediate steps as loops over KI. Their current "task"
636 * over the global M to I-1 values is always K1(KI) to K2(KI).
637 * However, because there are many bulges, K1(KI) & K2(KI) might
638 * go past that range while later bulges (KI+1,KI+2,etc..) are
639 * finishing up. Even if ROTN=1, in order to minimize border
640 * communication sometimes K1(KI)=HBL-2 & K2(KI)=HBL-1 so both
641 * border messages can be handled at once.
642 *
643 * Rules:
644 * If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)<HBL-2
645 * If MOD(K1(KI)-1,HBL) = HBL-1 then MOD(K2(KI)-1,HBL)=HBL-1
646 * K2(KI)-K1(KI) <= ROTN
647 *
648 * We first hit a border when MOD(K1(KI)-1,HBL)=HBL-2 and we hit
649 * it again when MOD(K1(KI)-1,HBL)=HBL-1.
650 *
651  DO 30 ki = 1, nbulge
652  k1( ki ) = m
653  istop = min( m+rotn-1-mod( m-( m / hbl )*hbl-1, rotn ),
654  $ i-2 )
655  istop = min( istop, m+hbl-3-mod( m-1, hbl ) )
656  istop = min( istop, i2-2 )
657  istop = max( istop, m )
658  IF( ( mod( m-1, hbl ).EQ.hbl-2 ) .AND.
659  $ ( istop.LT.min( i-2, i2-2 ) ) ) THEN
660  istop = istop + 1
661  END IF
662  k2( ki ) = istop
663  icurrow( ki ) = istartrow
664  icurcol( ki ) = istartcol
665  krow( ki ) = ii
666  kcol( ki ) = jj
667  IF( ki.GT.1 )
668  $ kp2row( ki ) = kp2row( 1 )
669  IF( ki.GT.1 )
670  $ kp2col( ki ) = kp2col( 1 )
671  30 CONTINUE
672 *
673 * Get first transform on node who owns M+2,M+2
674 *
675  DO 31 itmp1 = 1, 3
676  vcopy(itmp1) = zero
677  31 CONTINUE
678  itmp1 = istartrow
679  itmp2 = istartcol
680  CALL pclawil( itmp1, itmp2, m, a, desca, h44, h33, h43h34,
681  $ vcopy )
682  v1save = vcopy( 1 )
683  v2save = vcopy( 2 )
684  v3save = vcopy( 3 )
685 *
686 * The main implicit shift Francis loops over the bulges starts
687 * here!
688 *
689  IF( k2( ibulge ).LE.i-1 ) THEN
690  40 CONTINUE
691  IF( ( k1( ibulge ).GE.m+5 ) .AND. ( ibulge.LT.nbulge ) )
692  $ THEN
693  IF( ( mod( k2( ibulge )+2, hbl ).EQ.mod( k2( ibulge+1 )+
694  $ 2, hbl ) ) .AND. ( k1( 1 ).LE.i-1 ) ) THEN
695  h44 = s1( 2*jblk-2*ibulge, 2*jblk-2*ibulge )
696  h33 = s1( 2*jblk-2*ibulge-1, 2*jblk-2*ibulge-1 )
697  h43h34 = s1( 2*jblk-2*ibulge-1, 2*jblk-2*ibulge )*
698  $ s1( 2*jblk-2*ibulge, 2*jblk-2*ibulge-1 )
699  itmp1 = istartrow
700  itmp2 = istartcol
701  CALL pclawil( itmp1, itmp2, m, a, desca, h44, h33,
702  $ h43h34, vcopy )
703  v1save = vcopy( 1 )
704  v2save = vcopy( 2 )
705  v3save = vcopy( 3 )
706  ibulge = ibulge + 1
707  END IF
708  END IF
709 *
710 * When we hit a border, there are row and column transforms that
711 * overlap over several processors and the code gets very
712 * "congested." As a remedy, when we first hit a border, a 6x6
713 * *local* matrix is generated on one node (called SMALLA) and
714 * work is done on that. At the end of the border, the data is
715 * passed back and everything stays a lot simpler.
716 *
717  DO 120 ki = 1, ibulge
718 *
719  istart = max( k1( ki ), m )
720  istop = min( k2( ki ), i-1 )
721  k = istart
722  modkm1 = mod( k-1, hbl )
723  IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN
724  DO 81 itmp1 = 1, 6
725  DO 82 itmp2 = 1, 6
726  smalla(itmp1, itmp2, ki) = zero
727  82 CONTINUE
728  81 CONTINUE
729  IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN
730 *
731 * Copy 6 elements from global A(K-1:K+4,K-1:K+4)
732 *
733  itmp1 = icurrow( ki )
734  itmp2 = icurcol( ki )
735  CALL pclacp3( min( 6, n-k+2 ), k-1, a, desca,
736  $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
737  $ 0 )
738  END IF
739  IF( modkm1.EQ.hbl-1 ) THEN
740 *
741 * Copy 6 elements from global A(K-2:K+3,K-2:K+3)
742 *
743  CALL infog2l( k+1, k+1, desca, nprow, npcol, myrow,
744  $ mycol, irow1, icol1, itmp1, itmp2 )
745  CALL pclacp3( min( 6, n-k+3 ), k-2, a, desca,
746  $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
747  $ 0 )
748  END IF
749  END IF
750 *
751 *
752 * CLAHQR used to have a single row application and a single
753 * column application to H. Here we do something a little
754 * more clever. We break each transformation down into 3
755 * parts:
756 * 1.) The minimum amount of work it takes to determine
757 * a group of ROTN transformations (this is on
758 * the critical path.) (Loops 50-120)
759 * (the data is broadcast now: loops 180-240)
760 * 2.) The small work it takes so that each of the rows
761 * and columns is at the same place. For example,
762 * all ROTN row transforms are all complete
763 * through some column TMP. (Loops 250-260)
764 * 3.) The majority of the row and column transforms
765 * are then applied in a block fashion.
766 * (row transforms are in loops 280-380)
767 * (col transforms are in loops 400-540)
768 *
769 * Each of these three parts are further subdivided into 3
770 * parts:
771 * A.) Work at the start of a border when
772 * MOD(ISTART-1,HBL) = HBL-2
773 * B.) Work at the end of a border when
774 * MOD(ISTART-1,HBL) = HBL-1
775 * C.) Work in the middle of the block when
776 * MOD(ISTART-1,HBL) < HBL-2
777 *
778 * Further optimization is met with the boolean SKIP. A border
779 * communication can be broken into several parts for
780 * efficient parallelism:
781 * Loop over all the bulges, just sending the data out
782 * Loop over all the bulges, just doing the work
783 * Loop over all the bulges, just sending the data back.
784 *
785 *
786  IF( ( myrow.EQ.icurrow( ki ) ) .AND.
787  $ ( mycol.EQ.icurcol( ki ) ) .AND.
788  $ ( modkm1.EQ.hbl-2 ) .AND.
789  $ ( istart.LT.min( i-1, istop+1 ) ) ) THEN
790  k = istart
791  nr = min( 3, i-k+1 )
792  IF( k.GT.m ) THEN
793  CALL ccopy( nr, smalla( 2, 1, ki ), 1, vcopy, 1 )
794  ELSE
795  vcopy( 1 ) = v1save
796  vcopy( 2 ) = v2save
797  vcopy( 3 ) = v3save
798  END IF
799  CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )
800  IF( k.GT.m ) THEN
801  smalla( 2, 1, ki ) = vcopy( 1 )
802  smalla( 3, 1, ki ) = zero
803  IF( k.LT.i-1 )
804  $ smalla( 4, 1, ki ) = zero
805  ELSE IF( m.GT.l ) THEN
806 *
807 * Following differs in comparison to pslahqr.
808 *
809  smalla( 2, 1, ki ) = smalla( 2, 1, ki ) -
810  $ conjg( t1copy )*
811  $ smalla( 2, 1, ki )
812  END IF
813  v2 = vcopy( 2 )
814  t2 = t1copy*v2
815  work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
816  work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
817  work( vecsidx+( k-1 )*3+3 ) = t1copy
818  IF( nr.EQ.3 ) THEN
819 *
820 * Do some work so next step is ready...
821 *
822  t1 = t1copy
823  v3 = vcopy( 3 )
824  t3 = t1*v3
825  itmp1 = min( 6, i2+2-k )
826  itmp2 = max( i1-k+2, 1 )
827  DO 50 j = 2, itmp1
828  sum = conjg( t1 )*smalla( 2, j, ki ) +
829  $ conjg( t2 )*smalla( 3, j, ki ) +
830  $ conjg( t3 )*smalla( 4, j, ki )
831  smalla( 2, j, ki ) = smalla( 2, j, ki ) - sum
832  smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum*v2
833  smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*v3
834  50 CONTINUE
835  DO 60 j = itmp2, 5
836  sum = t1*smalla( j, 2, ki ) +
837  $ t2*smalla( j, 3, ki ) +
838  $ t3*smalla( j, 4, ki )
839  smalla( j, 2, ki ) = smalla( j, 2, ki ) - sum
840  smalla( j, 3, ki ) = smalla( j, 3, ki ) -
841  $ sum*conjg( v2 )
842  smalla( j, 4, ki ) = smalla( j, 4, ki ) -
843  $ sum*conjg( v3 )
844  60 CONTINUE
845  END IF
846  END IF
847 *
848  IF( ( mod( istop-1, hbl ).EQ.hbl-1 ) .AND.
849  $ ( myrow.EQ.icurrow( ki ) ) .AND.
850  $ ( mycol.EQ.icurcol( ki ) ) .AND.
851  $ ( istart.LE.min( i, istop ) ) ) THEN
852  k = istop
853  nr = min( 3, i-k+1 )
854  IF( k.GT.m ) THEN
855  CALL ccopy( nr, smalla( 3, 2, ki ), 1, vcopy, 1 )
856  ELSE
857  vcopy( 1 ) = v1save
858  vcopy( 2 ) = v2save
859  vcopy( 3 ) = v3save
860  END IF
861  CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )
862  IF( k.GT.m ) THEN
863  smalla( 3, 2, ki ) = vcopy( 1 )
864  smalla( 4, 2, ki ) = zero
865  IF( k.LT.i-1 )
866  $ smalla( 5, 2, ki ) = zero
867 *
868 * Set a subdiagonal to zero now if it's possible
869 *
870  IF( ( k-2.GT.m ) .AND. ( mod( k-1, hbl ).GT.1 ) )
871  $ THEN
872  h11 = smalla( 1, 1, ki )
873  h10 = smalla( 2, 1, ki )
874  h22 = smalla( 2, 2, ki )
875  s = cabs1( h11 ) + cabs1( h22 )
876  IF( cabs1( h10 ).LE.max( ulp*s, smlnum ) ) THEN
877  smalla( 2, 1, ki ) = zero
878  END IF
879  END IF
880  ELSE IF( m.GT.l ) THEN
881 *
882 * Following differs in comparison to pslahqr.
883 *
884  smalla( 3, 2, ki ) = smalla( 3, 2, ki ) -
885  $ conjg( t1copy )*
886  $ smalla( 3, 2, ki )
887  END IF
888  v2 = vcopy( 2 )
889  t2 = t1copy*v2
890  work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
891  work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
892  work( vecsidx+( k-1 )*3+3 ) = t1copy
893  IF( nr.EQ.3 ) THEN
894 *
895 * Do some work so next step is ready...
896 *
897  t1 = t1copy
898  v3 = vcopy( 3 )
899  t3 = t1*v3
900  itmp1 = min( 6, i2-k+3 )
901  itmp2 = max( i1-k+3, 1 )
902  DO 70 j = 3, itmp1
903  sum = conjg( t1 )*smalla( 3, j, ki ) +
904  $ conjg( t2 )*smalla( 4, j, ki ) +
905  $ conjg( t3 )*smalla( 5, j, ki )
906  smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum
907  smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*v2
908  smalla( 5, j, ki ) = smalla( 5, j, ki ) - sum*v3
909  70 CONTINUE
910  DO 80 j = itmp2, 6
911  sum = t1*smalla( j, 3, ki ) +
912  $ t2*smalla( j, 4, ki ) +
913  $ t3*smalla( j, 5, ki )
914  smalla( j, 3, ki ) = smalla( j, 3, ki ) - sum
915  smalla( j, 4, ki ) = smalla( j, 4, ki ) -
916  $ sum*conjg( v2 )
917  smalla( j, 5, ki ) = smalla( j, 5, ki ) -
918  $ sum*conjg( v3 )
919  80 CONTINUE
920  END IF
921  END IF
922 *
923  IF( ( modkm1.EQ.0 ) .AND. ( istart.LE.i-1 ) .AND.
924  $ ( myrow.EQ.icurrow( ki ) ) .AND.
925  $ ( right.EQ.icurcol( ki ) ) ) THEN
926 *
927 * (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
928 *
929  irow1 = krow( ki )
930  icol1 = kcol( ki )
931 *
932 * The ELSE part of this IF needs updated VCOPY, this
933 * was not necessary in PSLAHQR.
934 *
935  IF( istart.GT.m ) THEN
936  vcopy( 1 ) = smalla( 4, 3, ki )
937  vcopy( 2 ) = smalla( 5, 3, ki )
938  vcopy( 3 ) = smalla( 6, 3, ki )
939  nr = min( 3, i-istart+1 )
940  CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
941  $ t1copy )
942  a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )
943  a( ( icol1-2 )*lda+irow1+1 ) = zero
944  IF( istart.LT.i-1 ) THEN
945  a( ( icol1-2 )*lda+irow1+2 ) = zero
946  END IF
947  ELSE
948 *
949 * If NPCOL.NE.1 THEN we need updated VCOPY.
950 *
951  nr = min( 3, i-istart+1 )
952  IF( npcol.EQ.1 ) THEN
953  vcopy( 1 ) = v1save
954  vcopy( 2 ) = v2save
955  vcopy( 3 ) = v3save
956  ELSE
957 *
958 * Get updated VCOPY from RIGHT
959 *
960  CALL cgerv2d( contxt, 3, 1, vcopy, 3, myrow,
961  $ right )
962  END IF
963  CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
964  $ t1copy )
965  IF( m.GT.l ) THEN
966 *
967 * Following differs in comparison to pslahqr.
968 *
969  a( ( icol1-2 )*lda+irow1 ) = a( ( icol1-2 )*lda+
970  $ irow1 )*conjg( one-t1copy )
971  END IF
972  END IF
973  END IF
974 *
975  IF( ( myrow.EQ.icurrow( ki ) ) .AND.
976  $ ( mycol.EQ.icurcol( ki ) ) .AND.
977  $ ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-
978  $ 1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-
979  $ 1 ) ) ) ) THEN
980 *
981 * (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
982 *
983  irow1 = krow( ki )
984  icol1 = kcol( ki )
985  DO 110 k = istart, istop
986 *
987 * Create and do these transforms
988 *
989  nr = min( 3, i-k+1 )
990  IF( k.GT.m ) THEN
991  IF( mod( k-1, hbl ).EQ.0 ) THEN
992  vcopy( 1 ) = smalla( 4, 3, ki )
993  vcopy( 2 ) = smalla( 5, 3, ki )
994  vcopy( 3 ) = smalla( 6, 3, ki )
995  ELSE
996  vcopy( 1 ) = a( ( icol1-2 )*lda+irow1 )
997  vcopy( 2 ) = a( ( icol1-2 )*lda+irow1+1 )
998  IF( nr.EQ.3 ) THEN
999  vcopy( 3 ) = a( ( icol1-2 )*lda+irow1+2 )
1000  END IF
1001  END IF
1002  ELSE
1003  vcopy( 1 ) = v1save
1004  vcopy( 2 ) = v2save
1005  vcopy( 3 ) = v3save
1006  END IF
1007 *
1008 * Must send uptodate copy of VCOPY to left.
1009 *
1010  IF( npcol.GT.1 .AND. istart.LE.m .AND.
1011  $ mod( k-1, hbl ).EQ.0 ) THEN
1012  CALL cgesd2d( contxt, 3, 1, vcopy, 3, myrow,
1013  $ left )
1014  END IF
1015  CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
1016  $ t1copy )
1017  IF( k.GT.m ) THEN
1018  IF( mod( k-1, hbl ).GT.0 ) THEN
1019  a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )
1020  a( ( icol1-2 )*lda+irow1+1 ) = zero
1021  IF( k.LT.i-1 ) THEN
1022  a( ( icol1-2 )*lda+irow1+2 ) = zero
1023  END IF
1024 *
1025 * Set a subdiagonal to zero now if it's possible
1026 *
1027  IF( ( irow1.GT.2 ) .AND. ( icol1.GT.2 ) .AND.
1028  $ ( k-2.GT.m ) .AND. ( mod( k-1,
1029  $ hbl ).GT.1 ) ) THEN
1030  h11 = a( ( icol1-3 )*lda+irow1-2 )
1031  h10 = a( ( icol1-3 )*lda+irow1-1 )
1032  h22 = a( ( icol1-2 )*lda+irow1-1 )
1033  s = cabs1( h11 ) + cabs1( h22 )
1034  IF( cabs1( h10 ).LE.max( ulp*s, smlnum ) )
1035  $ THEN
1036  a( ( icol1-3 )*lda+irow1-1 ) = zero
1037  END IF
1038  END IF
1039  END IF
1040  ELSE IF( m.GT.l ) THEN
1041  IF( mod( k-1, hbl ).GT.0 ) THEN
1042 *
1043 * Following differs in comparison to pslahqr.
1044 *
1045  a( ( icol1-2 )*lda+irow1 ) = a( ( icol1-2 )*
1046  $ lda+irow1 )*conjg( one-t1copy )
1047  END IF
1048  END IF
1049  v2 = vcopy( 2 )
1050  t2 = t1copy*v2
1051  work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
1052  work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
1053  work( vecsidx+( k-1 )*3+3 ) = t1copy
1054  t1 = t1copy
1055  IF( k.LT.istop ) THEN
1056 *
1057 * Do some work so next step is ready...
1058 *
1059  v3 = vcopy( 3 )
1060  t3 = t1*v3
1061  DO 90 j = ( icol1-1 )*lda + irow1,
1062  $ ( min( k2( ki )+1, i-1 )+icol1-k-1 )*
1063  $ lda + irow1, lda
1064  sum = conjg( t1 )*a( j ) +
1065  $ conjg( t2 )*a( j+1 ) +
1066  $ conjg( t3 )*a( j+2 )
1067  a( j ) = a( j ) - sum
1068  a( j+1 ) = a( j+1 ) - sum*v2
1069  a( j+2 ) = a( j+2 ) - sum*v3
1070  90 CONTINUE
1071  DO 100 j = irow1 + 1, irow1 + 3
1072  sum = t1*a( ( icol1-1 )*lda+j ) +
1073  $ t2*a( icol1*lda+j ) +
1074  $ t3*a( ( icol1+1 )*lda+j )
1075  a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*lda+
1076  $ j ) - sum
1077  a( icol1*lda+j ) = a( icol1*lda+j ) -
1078  $ sum*conjg( v2 )
1079  a( ( icol1+1 )*lda+j ) = a( ( icol1+1 )*lda+
1080  $ j ) - sum*conjg( v3 )
1081  100 CONTINUE
1082  END IF
1083  irow1 = irow1 + 1
1084  icol1 = icol1 + 1
1085  110 CONTINUE
1086  END IF
1087  120 CONTINUE
1088 *
1089 * First part of applying the transforms is complete.
1090 * Broadcasts of the Householder data is done here.
1091 *
1092  DO 130 ki = 1, ibulge
1093 *
1094  istart = max( k1( ki ), m )
1095  istop = min( k2( ki ), i-1 )
1096 *
1097 * Broadcast Householder information from the block
1098 *
1099  IF( ( myrow.EQ.icurrow( ki ) ) .AND. ( npcol.GT.1 ) .AND.
1100  $ ( istart.LE.istop ) ) THEN
1101  IF( mycol.NE.icurcol( ki ) ) THEN
1102  CALL cgebr2d( contxt, 'ROW', ' ',
1103  $ 3*( istop-istart+1 ), 1,
1104  $ work( vecsidx+( istart-1 )*3+1 ),
1105  $ 3*( istop-istart+1 ), myrow,
1106  $ icurcol( ki ) )
1107  ELSE
1108  CALL cgebs2d( contxt, 'ROW', ' ',
1109  $ 3*( istop-istart+1 ), 1,
1110  $ work( vecsidx+( istart-1 )*3+1 ),
1111  $ 3*( istop-istart+1 ) )
1112  END IF
1113  END IF
1114  130 CONTINUE
1115 *
1116 * Now do column transforms and finish work
1117 *
1118  DO 140 ki = 1, ibulge
1119 *
1120  istart = max( k1( ki ), m )
1121  istop = min( k2( ki ), i-1 )
1122 *
1123  IF( ( mycol.EQ.icurcol( ki ) ) .AND. ( nprow.GT.1 ) .AND.
1124  $ ( istart.LE.istop ) ) THEN
1125  IF( myrow.NE.icurrow( ki ) ) THEN
1126  CALL cgebr2d( contxt, 'COL', ' ',
1127  $ 3*( istop-istart+1 ), 1,
1128  $ work( vecsidx+( istart-1 )*3+1 ),
1129  $ 3*( istop-istart+1 ), icurrow( ki ),
1130  $ mycol )
1131  ELSE
1132  CALL cgebs2d( contxt, 'COL', ' ',
1133  $ 3*( istop-istart+1 ), 1,
1134  $ work( vecsidx+( istart-1 )*3+1 ),
1135  $ 3*( istop-istart+1 ) )
1136  END IF
1137  END IF
1138  140 CONTINUE
1139 *
1140 *
1141 * Now do make up work to have things in block fashion
1142 *
1143  DO 160 ki = 1, ibulge
1144  istart = max( k1( ki ), m )
1145  istop = min( k2( ki ), i-1 )
1146 *
1147  modkm1 = mod( istart-1, hbl )
1148  IF( ( myrow.EQ.icurrow( ki ) ) .AND.
1149  $ ( mycol.EQ.icurcol( ki ) ) .AND.
1150  $ ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-
1151  $ 1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-
1152  $ 1 ) ) ) ) THEN
1153 *
1154 * (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
1155 *
1156  irow1 = krow( ki )
1157  icol1 = kcol( ki )
1158  DO 150 k = istart, istop
1159 *
1160 * Catch up on column & border work
1161 *
1162  nr = min( 3, i-k+1 )
1163  v2 = work( vecsidx+( k-1 )*3+1 )
1164  v3 = work( vecsidx+( k-1 )*3+2 )
1165  t1 = work( vecsidx+( k-1 )*3+3 )
1166  t2 = t1*v2
1167  IF( k.LT.istop ) THEN
1168 *
1169 * Do some work so next step is ready...
1170 *
1171  t3 = t1*v3
1172  CALL claref( 'Col', a, lda, .false., z, ldz,
1173  $ .false., icol1, icol1, istart,
1174  $ istop, min( istart+1, i )-k+irow1,
1175  $ irow1, liloz, lihiz,
1176  $ work( vecsidx+1 ), v2, v3, t1, t2,
1177  $ t3 )
1178  irow1 = irow1 + 1
1179  icol1 = icol1 + 1
1180  ELSE
1181  IF( ( nr.EQ.3 ) .AND. ( mod( k-1,
1182  $ hbl ).LT.hbl-2 ) ) THEN
1183  t3 = t1*v3
1184  CALL claref( 'Row', a, lda, .false., z, ldz,
1185  $ .false., irow1, irow1, istart,
1186  $ istop, icol1, min( min( k2( ki )
1187  $ +1, i-1 ), i2 )-k+icol1, liloz,
1188  $ lihiz, work( vecsidx+1 ), v2,
1189  $ v3, t1, t2, t3 )
1190  END IF
1191  END IF
1192  150 CONTINUE
1193  END IF
1194 *
1195 * Send SMALLA back again.
1196 *
1197  k = istart
1198  modkm1 = mod( k-1, hbl )
1199  IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN
1200  IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN
1201 *
1202 * Copy 6 elements from global A(K-1:K+4,K-1:K+4)
1203 *
1204  itmp1 = icurrow( ki )
1205  itmp2 = icurcol( ki )
1206  CALL pclacp3( min( 6, n-k+2 ), k-1, a, desca,
1207  $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
1208  $ 1 )
1209 *
1210  END IF
1211  IF( modkm1.EQ.hbl-1 ) THEN
1212 *
1213 * Copy 6 elements from global A(K-2:K+3,K-2:K+3)
1214 *
1215  itmp1 = icurrow( ki )
1216  itmp2 = icurcol( ki )
1217  CALL pclacp3( min( 6, n-k+3 ), k-2, a, desca,
1218  $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
1219  $ 1 )
1220  END IF
1221  END IF
1222 *
1223  160 CONTINUE
1224 *
1225  170 CONTINUE
1226 *
1227 * Now start major set of block ROW reflections
1228 *
1229  DO 180 ki = 1, ibulge
1230  IF( ( myrow.NE.icurrow( ki ) ) .AND.
1231  $ ( down.NE.icurrow( ki ) ) )GO TO 180
1232  istart = max( k1( ki ), m )
1233  istop = min( k2( ki ), i-1 )
1234 *
1235  IF( ( istop.GT.istart ) .AND.
1236  $ ( mod( istart-1, hbl ).LT.hbl-2 ) .AND.
1237  $ ( icurrow( ki ).EQ.myrow ) ) THEN
1238  irow1 = min( k2( ki )+1, i-1 ) + 1
1239  CALL infog1l( irow1, hbl, npcol, mycol, jafirst,
1240  $ itmp1, itmp2 )
1241  itmp2 = locali2
1242  ii = krow( ki )
1243  CALL claref( 'Row', a, lda, wantz, z, ldz, .true., ii,
1244  $ ii, istart, istop, itmp1, itmp2, liloz,
1245  $ lihiz, work( vecsidx+1 ), v2, v3, t1, t2,
1246  $ t3 )
1247  END IF
1248  180 CONTINUE
1249 *
1250  DO 220 ki = 1, ibulge
1251  IF( krow( ki ).GT.kp2row( ki ) )
1252  $ GO TO 220
1253  IF( ( myrow.NE.icurrow( ki ) ) .AND.
1254  $ ( down.NE.icurrow( ki ) ) )GO TO 220
1255  istart = max( k1( ki ), m )
1256  istop = min( k2( ki ), i-1 )
1257  IF( ( istart.EQ.istop ) .OR.
1258  $ ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
1259  $ ( icurrow( ki ).NE.myrow ) ) THEN
1260  DO 210 k = istart, istop
1261  v2 = work( vecsidx+( k-1 )*3+1 )
1262  v3 = work( vecsidx+( k-1 )*3+2 )
1263  t1 = work( vecsidx+( k-1 )*3+3 )
1264  nr = min( 3, i-k+1 )
1265  IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
1266  $ kp2row( ki ) ) ) THEN
1267  IF( ( k.LT.istop ) .AND.
1268  $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1269  itmp1 = min( k2( ki )+1, i-1 ) + 1
1270  ELSE
1271  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1272  itmp1 = min( k2( ki )+1, i-1 ) + 1
1273  END IF
1274  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1275  itmp1 = min( k+4, i2 ) + 1
1276  END IF
1277  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1278  itmp1 = min( k+3, i2 ) + 1
1279  END IF
1280  END IF
1281 *
1282 * Find local coor of rows K through K+2
1283 *
1284  irow1 = krow( ki )
1285  irow2 = kp2row( ki )
1286  IF( ( k.GT.istart ) .AND.
1287  $ ( mod( k-1, hbl ).GE.hbl-2 ) ) THEN
1288  IF( down.EQ.icurrow( ki ) ) THEN
1289  irow1 = irow1 + 1
1290  END IF
1291  IF( myrow.EQ.icurrow( ki ) ) THEN
1292  irow2 = irow2 + 1
1293  END IF
1294  END IF
1295  CALL infog1l( itmp1, hbl, npcol, mycol, jafirst,
1296  $ icol1, icol2 )
1297  icol2 = locali2
1298  IF( ( mod( k-1, hbl ).LT.hbl-2 ) .OR.
1299  $ ( nprow.EQ.1 ) ) THEN
1300  t2 = t1*v2
1301  t3 = t1*v3
1302  CALL claref( 'Row', a, lda, wantz, z, ldz,
1303  $ .false., irow1, irow1, istart,
1304  $ istop, icol1, icol2, liloz,
1305  $ lihiz, work( vecsidx+1 ), v2,
1306  $ v3, t1, t2, t3 )
1307  END IF
1308  IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1309  $ ( nprow.GT.1 ) ) THEN
1310  IF( irow1.NE.irow2 ) THEN
1311  CALL cgesd2d( contxt, 2, icol2-icol1+1,
1312  $ a( ( icol1-1 )*lda+irow1 ),
1313  $ lda, down, mycol )
1314  IF( skip .AND. ( istart.EQ.istop ) ) THEN
1315  CALL cgerv2d( contxt, 2, icol2-icol1+1,
1316  $ a( ( icol1-1 )*lda+
1317  $ irow1 ), lda, down,
1318  $ mycol )
1319  END IF
1320  ELSE IF( skip ) THEN
1321  CALL cgerv2d( contxt, 2, icol2-icol1+1,
1322  $ work( irbuf+1 ), 2, up,
1323  $ mycol )
1324  t2 = t1*v2
1325  t3 = t1*v3
1326  DO 190 j = icol1, icol2
1327  sum = conjg( t1 )*
1328  $ work( irbuf+2*( j-icol1 )+1 ) +
1329  $ conjg( t2 )*work( irbuf+2*
1330  $ ( j-icol1 )+2 ) +
1331  $ conjg( t3 )*a( ( j-1 )*lda+
1332  $ irow1 )
1333  work( irbuf+2*( j-icol1 )+1 )
1334  $ = work( irbuf+2*( j-icol1 )+1 ) -
1335  $ sum
1336  work( irbuf+2*( j-icol1 )+2 )
1337  $ = work( irbuf+2*( j-icol1 )+2 ) -
1338  $ sum*v2
1339  a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
1340  $ lda+irow1 ) - sum*v3
1341  190 CONTINUE
1342  IF( istart.EQ.istop ) THEN
1343  CALL cgesd2d( contxt, 2, icol2-icol1+1,
1344  $ work( irbuf+1 ), 2, up,
1345  $ mycol )
1346  END IF
1347  END IF
1348  END IF
1349  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1350  $ ( nprow.GT.1 ) ) THEN
1351  IF( irow1.EQ.irow2 ) THEN
1352  IF( istart.EQ.istop ) THEN
1353  CALL cgesd2d( contxt, 2, icol2-icol1+1,
1354  $ a( ( icol1-1 )*lda+irow1-
1355  $ 1 ), lda, down, mycol )
1356  END IF
1357  IF( skip ) THEN
1358  CALL cgerv2d( contxt, 2, icol2-icol1+1,
1359  $ a( ( icol1-1 )*lda+irow1-
1360  $ 1 ), lda, down, mycol )
1361  END IF
1362  ELSE IF( skip ) THEN
1363  IF( istart.EQ.istop ) THEN
1364  CALL cgerv2d( contxt, 2, icol2-icol1+1,
1365  $ work( irbuf+1 ), 2, up,
1366  $ mycol )
1367  END IF
1368  t2 = t1*v2
1369  t3 = t1*v3
1370  DO 200 j = icol1, icol2
1371  sum = conjg( t1 )*
1372  $ work( irbuf+2*( j-icol1 )+2 ) +
1373  $ conjg( t2 )*a( ( j-1 )*lda+
1374  $ irow1 ) + conjg( t3 )*
1375  $ a( ( j-1 )*lda+irow1+1 )
1376  work( irbuf+2*( j-icol1 )+2 )
1377  $ = work( irbuf+2*( j-icol1 )+2 ) -
1378  $ sum
1379  a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
1380  $ lda+irow1 ) - sum*v2
1381  a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*
1382  $ lda+irow1+1 ) - sum*v3
1383  200 CONTINUE
1384  CALL cgesd2d( contxt, 2, icol2-icol1+1,
1385  $ work( irbuf+1 ), 2, up,
1386  $ mycol )
1387 *
1388  END IF
1389  END IF
1390  END IF
1391  210 CONTINUE
1392  END IF
1393  220 CONTINUE
1394 *
1395  IF( skip )
1396  $ GO TO 290
1397 *
1398  DO 260 ki = 1, ibulge
1399  IF( krow( ki ).GT.kp2row( ki ) )
1400  $ GO TO 260
1401  IF( ( myrow.NE.icurrow( ki ) ) .AND.
1402  $ ( down.NE.icurrow( ki ) ) )GO TO 260
1403  istart = max( k1( ki ), m )
1404  istop = min( k2( ki ), i-1 )
1405  IF( ( istart.EQ.istop ) .OR.
1406  $ ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
1407  $ ( icurrow( ki ).NE.myrow ) ) THEN
1408  DO 250 k = istart, istop
1409  v2 = work( vecsidx+( k-1 )*3+1 )
1410  v3 = work( vecsidx+( k-1 )*3+2 )
1411  t1 = work( vecsidx+( k-1 )*3+3 )
1412  nr = min( 3, i-k+1 )
1413  IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
1414  $ kp2row( ki ) ) ) THEN
1415  IF( ( k.LT.istop ) .AND.
1416  $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1417  itmp1 = min( k2( ki )+1, i-1 ) + 1
1418  ELSE
1419  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1420  itmp1 = min( k2( ki )+1, i-1 ) + 1
1421  END IF
1422  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1423  itmp1 = min( k+4, i2 ) + 1
1424  END IF
1425  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1426  itmp1 = min( k+3, i2 ) + 1
1427  END IF
1428  END IF
1429 *
1430 * Find local coor of rows K through K+2
1431 *
1432  irow1 = krow( ki )
1433  irow2 = kp2row( ki )
1434  IF( ( k.GT.istart ) .AND.
1435  $ ( mod( k-1, hbl ).GE.hbl-2 ) ) THEN
1436  IF( down.EQ.icurrow( ki ) ) THEN
1437  irow1 = irow1 + 1
1438  END IF
1439  IF( myrow.EQ.icurrow( ki ) ) THEN
1440  irow2 = irow2 + 1
1441  END IF
1442  END IF
1443  CALL infog1l( itmp1, hbl, npcol, mycol, jafirst,
1444  $ icol1, icol2 )
1445  icol2 = locali2
1446  IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1447  $ ( nprow.GT.1 ) ) THEN
1448  IF( irow1.EQ.irow2 ) THEN
1449  CALL cgerv2d( contxt, 2, icol2-icol1+1,
1450  $ work( irbuf+1 ), 2, up,
1451  $ mycol )
1452  t2 = t1*v2
1453  t3 = t1*v3
1454  DO 230 j = icol1, icol2
1455  sum = conjg( t1 )*
1456  $ work( irbuf+2*( j-icol1 )+1 ) +
1457  $ conjg( t2 )*work( irbuf+2*
1458  $ ( j-icol1 )+2 ) +
1459  $ conjg( t3 )*a( ( j-1 )*lda+
1460  $ irow1 )
1461  work( irbuf+2*( j-icol1 )+1 )
1462  $ = work( irbuf+2*( j-icol1 )+1 ) -
1463  $ sum
1464  work( irbuf+2*( j-icol1 )+2 )
1465  $ = work( irbuf+2*( j-icol1 )+2 ) -
1466  $ sum*v2
1467  a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
1468  $ lda+irow1 ) - sum*v3
1469  230 CONTINUE
1470  IF( istart.EQ.istop ) THEN
1471  CALL cgesd2d( contxt, 2, icol2-icol1+1,
1472  $ work( irbuf+1 ), 2, up,
1473  $ mycol )
1474  END IF
1475  END IF
1476  END IF
1477  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1478  $ ( nprow.GT.1 ) ) THEN
1479  IF( irow1.NE.irow2 ) THEN
1480  IF( istart.EQ.istop ) THEN
1481  CALL cgerv2d( contxt, 2, icol2-icol1+1,
1482  $ work( irbuf+1 ), 2, up,
1483  $ mycol )
1484  END IF
1485  t2 = t1*v2
1486  t3 = t1*v3
1487  DO 240 j = icol1, icol2
1488  sum = conjg( t1 )*
1489  $ work( irbuf+2*( j-icol1 )+2 ) +
1490  $ conjg( t2 )*a( ( j-1 )*lda+
1491  $ irow1 ) + conjg( t3 )*
1492  $ a( ( j-1 )*lda+irow1+1 )
1493  work( irbuf+2*( j-icol1 )+2 )
1494  $ = work( irbuf+2*( j-icol1 )+2 ) -
1495  $ sum
1496  a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
1497  $ lda+irow1 ) - sum*v2
1498  a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*
1499  $ lda+irow1+1 ) - sum*v3
1500  240 CONTINUE
1501  CALL cgesd2d( contxt, 2, icol2-icol1+1,
1502  $ work( irbuf+1 ), 2, up,
1503  $ mycol )
1504  END IF
1505  END IF
1506  END IF
1507  250 CONTINUE
1508  END IF
1509  260 CONTINUE
1510 *
1511  DO 280 ki = 1, ibulge
1512  IF( krow( ki ).GT.kp2row( ki ) )
1513  $ GO TO 280
1514  IF( ( myrow.NE.icurrow( ki ) ) .AND.
1515  $ ( down.NE.icurrow( ki ) ) )GO TO 280
1516  istart = max( k1( ki ), m )
1517  istop = min( k2( ki ), i-1 )
1518  IF( ( istart.EQ.istop ) .OR.
1519  $ ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
1520  $ ( icurrow( ki ).NE.myrow ) ) THEN
1521  DO 270 k = istart, istop
1522  v2 = work( vecsidx+( k-1 )*3+1 )
1523  v3 = work( vecsidx+( k-1 )*3+2 )
1524  t1 = work( vecsidx+( k-1 )*3+3 )
1525  nr = min( 3, i-k+1 )
1526  IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
1527  $ kp2row( ki ) ) ) THEN
1528  IF( ( k.LT.istop ) .AND.
1529  $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1530  itmp1 = min( k2( ki )+1, i-1 ) + 1
1531  ELSE
1532  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1533  itmp1 = min( k2( ki )+1, i-1 ) + 1
1534  END IF
1535  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1536  itmp1 = min( k+4, i2 ) + 1
1537  END IF
1538  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1539  itmp1 = min( k+3, i2 ) + 1
1540  END IF
1541  END IF
1542 *
1543 * Find local coor of rows K through K+2
1544 *
1545  irow1 = krow( ki )
1546  irow2 = kp2row( ki )
1547  IF( ( k.GT.istart ) .AND.
1548  $ ( mod( k-1, hbl ).GE.hbl-2 ) ) THEN
1549  IF( down.EQ.icurrow( ki ) ) THEN
1550  irow1 = irow1 + 1
1551  END IF
1552  IF( myrow.EQ.icurrow( ki ) ) THEN
1553  irow2 = irow2 + 1
1554  END IF
1555  END IF
1556  CALL infog1l( itmp1, hbl, npcol, mycol, jafirst,
1557  $ icol1, icol2 )
1558  icol2 = locali2
1559  IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1560  $ ( nprow.GT.1 ) ) THEN
1561  IF( irow1.NE.irow2 ) THEN
1562  IF( istart.EQ.istop ) THEN
1563  CALL cgerv2d( contxt, 2, icol2-icol1+1,
1564  $ a( ( icol1-1 )*lda+
1565  $ irow1 ), lda, down,
1566  $ mycol )
1567  END IF
1568  END IF
1569  END IF
1570  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1571  $ ( nprow.GT.1 ) ) THEN
1572  IF( irow1.EQ.irow2 ) THEN
1573  CALL cgerv2d( contxt, 2, icol2-icol1+1,
1574  $ a( ( icol1-1 )*lda+irow1-
1575  $ 1 ), lda, down, mycol )
1576  END IF
1577  END IF
1578  END IF
1579  270 CONTINUE
1580  END IF
1581  280 CONTINUE
1582 *
1583  290 CONTINUE
1584 *
1585 * Now start major set of block COL reflections
1586 *
1587  DO 300 ki = 1, ibulge
1588  IF( ( mycol.NE.icurcol( ki ) ) .AND.
1589  $ ( right.NE.icurcol( ki ) ) )GO TO 300
1590  istart = max( k1( ki ), m )
1591  istop = min( k2( ki ), i-1 )
1592 *
1593  IF( ( ( mod( istart-1, hbl ).LT.hbl-2 ) .OR. ( npcol.EQ.
1594  $ 1 ) ) .AND. ( icurcol( ki ).EQ.mycol ) .AND.
1595  $ ( i-istop+1.GE.3 ) ) THEN
1596  k = istart
1597  IF( ( k.LT.istop ) .AND. ( mod( k-1,
1598  $ hbl ).LT.hbl-2 ) ) THEN
1599  itmp1 = min( istart+1, i ) - 1
1600  ELSE
1601  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1602  itmp1 = min( k+3, i )
1603  END IF
1604  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1605  itmp1 = max( i1, k-1 ) - 1
1606  END IF
1607  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1608  itmp1 = max( i1, k-2 ) - 1
1609  END IF
1610  END IF
1611 *
1612  icol1 = kcol( ki )
1613  CALL infog1l( i1, hbl, nprow, myrow, iafirst, irow1,
1614  $ irow2 )
1615  irow2 = numroc( itmp1, hbl, myrow, iafirst, nprow )
1616  IF( irow1.LE.irow2 ) THEN
1617  itmp2 = irow2
1618  ELSE
1619  itmp2 = -1
1620  END IF
1621  CALL claref( 'Col', a, lda, wantz, z, ldz, .true.,
1622  $ icol1, icol1, istart, istop, irow1,
1623  $ irow2, liloz, lihiz, work( vecsidx+1 ),
1624  $ v2, v3, t1, t2, t3 )
1625  k = istop
1626  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1627 *
1628 * Do from ITMP1+1 to MIN(K+3,I)
1629 *
1630  IF( mod( k-1, hbl ).LT.hbl-3 ) THEN
1631  irow1 = itmp2 + 1
1632  IF( mod( ( itmp1 / hbl ), nprow ).EQ.myrow )
1633  $ THEN
1634  IF( itmp2.GT.0 ) THEN
1635  irow2 = itmp2 + min( k+3, i ) - itmp1
1636  ELSE
1637  irow2 = irow1 - 1
1638  END IF
1639  ELSE
1640  irow2 = irow1 - 1
1641  END IF
1642  ELSE
1643  CALL infog1l( itmp1+1, hbl, nprow, myrow,
1644  $ iafirst, irow1, irow2 )
1645  irow2 = numroc( min( k+3, i ), hbl, myrow,
1646  $ iafirst, nprow )
1647  END IF
1648  v2 = work( vecsidx+( k-1 )*3+1 )
1649  v3 = work( vecsidx+( k-1 )*3+2 )
1650  t1 = work( vecsidx+( k-1 )*3+3 )
1651  t2 = t1*v2
1652  t3 = t1*v3
1653  icol1 = kcol( ki ) + istop - istart
1654  CALL claref( 'Col', a, lda, .false., z, ldz,
1655  $ .false., icol1, icol1, istart, istop,
1656  $ irow1, irow2, liloz, lihiz,
1657  $ work( vecsidx+1 ), v2, v3, t1, t2,
1658  $ t3 )
1659  END IF
1660  END IF
1661  300 CONTINUE
1662 *
1663  DO 360 ki = 1, ibulge
1664  IF( kcol( ki ).GT.kp2col( ki ) )
1665  $ GO TO 360
1666  IF( ( mycol.NE.icurcol( ki ) ) .AND.
1667  $ ( right.NE.icurcol( ki ) ) )GO TO 360
1668  istart = max( k1( ki ), m )
1669  istop = min( k2( ki ), i-1 )
1670  IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
1671 *
1672 * INFO is found in a buffer
1673 *
1674  ispec = 1
1675  ELSE
1676 *
1677 * All INFO is local
1678 *
1679  ispec = 0
1680  END IF
1681  DO 350 k = istart, istop
1682 *
1683  v2 = work( vecsidx+( k-1 )*3+1 )
1684  v3 = work( vecsidx+( k-1 )*3+2 )
1685  t1 = work( vecsidx+( k-1 )*3+3 )
1686  nr = min( 3, i-k+1 )
1687  IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )
1688  $ THEN
1689 *
1690  IF( ( k.LT.istop ) .AND.
1691  $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1692  itmp1 = min( istart+1, i ) - 1
1693  ELSE
1694  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1695  itmp1 = min( k+3, i )
1696  END IF
1697  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1698  itmp1 = max( i1, k-1 ) - 1
1699  END IF
1700  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1701  itmp1 = max( i1, k-2 ) - 1
1702  END IF
1703  END IF
1704  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1705  icol1 = kcol( ki ) + k - istart
1706  icol2 = kp2col( ki ) + k - istart
1707  ELSE
1708  icol1 = kcol( ki )
1709  icol2 = kp2col( ki )
1710  IF( k.GT.istart ) THEN
1711  IF( right.EQ.icurcol( ki ) ) THEN
1712  icol1 = icol1 + 1
1713  END IF
1714  IF( mycol.EQ.icurcol( ki ) ) THEN
1715  icol2 = icol2 + 1
1716  END IF
1717  END IF
1718  END IF
1719  CALL infog1l( i1, hbl, nprow, myrow, iafirst,
1720  $ irow1, irow2 )
1721  irow2 = numroc( itmp1, hbl, myrow, iafirst, nprow )
1722  IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1723  $ ( npcol.GT.1 ) ) THEN
1724  IF( icol1.NE.icol2 ) THEN
1725  CALL cgesd2d( contxt, irow2-irow1+1, 2,
1726  $ a( ( icol1-1 )*lda+irow1 ),
1727  $ lda, myrow, right )
1728  IF( ( istart.EQ.istop ) .AND. skip ) THEN
1729  CALL cgerv2d( contxt, irow2-irow1+1, 2,
1730  $ a( ( icol1-1 )*lda+irow1 ),
1731  $ lda, myrow, right )
1732  END IF
1733  ELSE IF( skip ) THEN
1734  t2 = t1*v2
1735  t3 = t1*v3
1736  CALL cgerv2d( contxt, irow2-irow1+1, 2,
1737  $ work( icbuf+1 ), irow2-irow1+1,
1738  $ myrow, left )
1739  ii = icbuf - irow1 + 1
1740  jj = icbuf + irow2 - 2*irow1 + 2
1741  DO 310 j = irow1, irow2
1742  sum = t1*work( ii+j ) + t2*work( jj+j ) +
1743  $ t3*a( ( icol1-1 )*lda+j )
1744  work( ii+j ) = work( ii+j ) - sum
1745  work( jj+j ) = work( jj+j ) -
1746  $ sum*conjg( v2 )
1747  a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
1748  $ lda+j ) - sum*conjg( v3 )
1749  310 CONTINUE
1750  IF( istart.EQ.istop ) THEN
1751  CALL cgesd2d( contxt, irow2-irow1+1, 2,
1752  $ work( icbuf+1 ),
1753  $ irow2-irow1+1, myrow, left )
1754  END IF
1755  END IF
1756  END IF
1757  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1758  $ ( npcol.GT.1 ) ) THEN
1759  IF( icol1.EQ.icol2 ) THEN
1760  IF( istart.EQ.istop ) THEN
1761  CALL cgesd2d( contxt, irow2-irow1+1, 2,
1762  $ a( ( icol1-2 )*lda+irow1 ),
1763  $ lda, myrow, right )
1764  END IF
1765  IF( skip ) THEN
1766  CALL cgerv2d( contxt, irow2-irow1+1, 2,
1767  $ a( ( icol1-2 )*lda+irow1 ),
1768  $ lda, myrow, right )
1769  END IF
1770  ELSE IF( skip ) THEN
1771  IF( istart.EQ.istop ) THEN
1772  CALL cgerv2d( contxt, irow2-irow1+1, 2,
1773  $ work( icbuf+1 ),
1774  $ irow2-irow1+1, myrow, left )
1775  END IF
1776  t2 = t1*v2
1777  t3 = t1*v3
1778  ii = icbuf + irow2 - 2*irow1 + 2
1779  DO 320 j = irow1, irow2
1780  sum = t1*work( j+ii ) +
1781  $ t2*a( ( icol1-1 )*lda+j ) +
1782  $ t3*a( icol1*lda+j )
1783  work( j+ii ) = work( j+ii ) - sum
1784  a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
1785  $ lda+j ) - sum*conjg( v2 )
1786  a( icol1*lda+j ) = a( icol1*lda+j ) -
1787  $ sum*conjg( v3 )
1788  320 CONTINUE
1789  CALL cgesd2d( contxt, irow2-irow1+1, 2,
1790  $ work( icbuf+1 ), irow2-irow1+1,
1791  $ myrow, left )
1792  END IF
1793  END IF
1794 *
1795 * If we want Z and we haven't already done any Z
1796 *
1797  IF( ( wantz ) .AND. ( mod( k-1,
1798  $ hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN
1799 *
1800 * Accumulate transformations in the matrix Z
1801 *
1802  irow1 = liloz
1803  irow2 = lihiz
1804  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1805  IF( icol1.NE.icol2 ) THEN
1806  CALL cgesd2d( contxt, irow2-irow1+1, 2,
1807  $ z( ( icol1-1 )*ldz+irow1 ),
1808  $ ldz, myrow, right )
1809  IF( ( istart.EQ.istop ) .AND. skip ) THEN
1810  CALL cgerv2d( contxt, irow2-irow1+1, 2,
1811  $ z( ( icol1-1 )*ldz+
1812  $ irow1 ), ldz, myrow,
1813  $ right )
1814  END IF
1815  ELSE IF( skip ) THEN
1816  CALL cgerv2d( contxt, irow2-irow1+1, 2,
1817  $ work( izbuf+1 ),
1818  $ irow2-irow1+1, myrow, left )
1819  t2 = t1*v2
1820  t3 = t1*v3
1821  icol1 = ( icol1-1 )*ldz
1822  ii = izbuf - irow1 + 1
1823  jj = izbuf + irow2 - 2*irow1 + 2
1824  DO 330 j = irow1, irow2
1825  sum = t1*work( ii+j ) +
1826  $ t2*work( jj+j ) + t3*z( icol1+j )
1827  work( ii+j ) = work( ii+j ) - sum
1828  work( jj+j ) = work( jj+j ) -
1829  $ sum*conjg( v2 )
1830  z( icol1+j ) = z( icol1+j ) -
1831  $ sum*conjg( v3 )
1832  330 CONTINUE
1833  IF( istart.EQ.istop ) THEN
1834  CALL cgesd2d( contxt, irow2-irow1+1, 2,
1835  $ work( izbuf+1 ),
1836  $ irow2-irow1+1, myrow,
1837  $ left )
1838  END IF
1839  END IF
1840  END IF
1841  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1842  IF( icol1.EQ.icol2 ) THEN
1843  IF( istart.EQ.istop ) THEN
1844  CALL cgesd2d( contxt, irow2-irow1+1, 2,
1845  $ z( ( icol1-2 )*ldz+
1846  $ irow1 ), ldz, myrow,
1847  $ right )
1848  END IF
1849  IF( skip ) THEN
1850  CALL cgerv2d( contxt, irow2-irow1+1, 2,
1851  $ z( ( icol1-2 )*ldz+
1852  $ irow1 ), ldz, myrow,
1853  $ right )
1854  END IF
1855  ELSE IF( skip ) THEN
1856  IF( istart.EQ.istop ) THEN
1857  CALL cgerv2d( contxt, irow2-irow1+1, 2,
1858  $ work( izbuf+1 ),
1859  $ irow2-irow1+1, myrow,
1860  $ left )
1861  END IF
1862  t2 = t1*v2
1863  t3 = t1*v3
1864  icol1 = ( icol1-1 )*ldz
1865  ii = izbuf + irow2 - 2*irow1 + 2
1866  DO 340 j = irow1, irow2
1867  sum = t1*work( ii+j ) +
1868  $ t2*z( j+icol1 ) +
1869  $ t3*z( j+icol1+ldz )
1870  work( ii+j ) = work( ii+j ) - sum
1871  z( j+icol1 ) = z( j+icol1 ) -
1872  $ sum*conjg( v2 )
1873  z( j+icol1+ldz ) = z( j+icol1+ldz ) -
1874  $ sum*conjg( v3 )
1875  340 CONTINUE
1876  CALL cgesd2d( contxt, irow2-irow1+1, 2,
1877  $ work( izbuf+1 ),
1878  $ irow2-irow1+1, myrow, left )
1879  END IF
1880  END IF
1881  END IF
1882  END IF
1883  350 CONTINUE
1884  360 CONTINUE
1885 *
1886  IF( skip )
1887  $ GO TO 450
1888 *
1889  DO 420 ki = 1, ibulge
1890  IF( kcol( ki ).GT.kp2col( ki ) )
1891  $ GO TO 420
1892  IF( ( mycol.NE.icurcol( ki ) ) .AND.
1893  $ ( right.NE.icurcol( ki ) ) )GO TO 420
1894  istart = max( k1( ki ), m )
1895  istop = min( k2( ki ), i-1 )
1896  IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
1897 *
1898 * INFO is found in a buffer
1899 *
1900  ispec = 1
1901  ELSE
1902 *
1903 * All INFO is local
1904 *
1905  ispec = 0
1906  END IF
1907  DO 410 k = istart, istop
1908 *
1909  v2 = work( vecsidx+( k-1 )*3+1 )
1910  v3 = work( vecsidx+( k-1 )*3+2 )
1911  t1 = work( vecsidx+( k-1 )*3+3 )
1912  nr = min( 3, i-k+1 )
1913  IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )
1914  $ THEN
1915 *
1916  IF( ( k.LT.istop ) .AND.
1917  $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1918  itmp1 = min( istart+1, i ) - 1
1919  ELSE
1920  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1921  itmp1 = min( k+3, i )
1922  END IF
1923  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1924  itmp1 = max( i1, k-1 ) - 1
1925  END IF
1926  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1927  itmp1 = max( i1, k-2 ) - 1
1928  END IF
1929  END IF
1930  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1931  icol1 = kcol( ki ) + k - istart
1932  icol2 = kp2col( ki ) + k - istart
1933  ELSE
1934  icol1 = kcol( ki )
1935  icol2 = kp2col( ki )
1936  IF( k.GT.istart ) THEN
1937  IF( right.EQ.icurcol( ki ) ) THEN
1938  icol1 = icol1 + 1
1939  END IF
1940  IF( mycol.EQ.icurcol( ki ) ) THEN
1941  icol2 = icol2 + 1
1942  END IF
1943  END IF
1944  END IF
1945  CALL infog1l( i1, hbl, nprow, myrow, iafirst,
1946  $ irow1, irow2 )
1947  irow2 = numroc( itmp1, hbl, myrow, iafirst, nprow )
1948  IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1949  $ ( npcol.GT.1 ) ) THEN
1950  IF( icol1.EQ.icol2 ) THEN
1951  CALL cgerv2d( contxt, irow2-irow1+1, 2,
1952  $ work( icbuf+1 ), irow2-irow1+1,
1953  $ myrow, left )
1954  t2 = t1*v2
1955  t3 = t1*v3
1956  ii = icbuf - irow1 + 1
1957  jj = icbuf + irow2 - 2*irow1 + 2
1958  DO 370 j = irow1, irow2
1959  sum = t1*work( ii+j ) + t2*work( jj+j ) +
1960  $ t3*a( ( icol1-1 )*lda+j )
1961  work( ii+j ) = work( ii+j ) - sum
1962  work( jj+j ) = work( jj+j ) -
1963  $ sum*conjg( v2 )
1964  a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
1965  $ lda+j ) - sum*conjg( v3 )
1966  370 CONTINUE
1967  IF( istart.EQ.istop ) THEN
1968  CALL cgesd2d( contxt, irow2-irow1+1, 2,
1969  $ work( icbuf+1 ),
1970  $ irow2-irow1+1, myrow, left )
1971  END IF
1972  END IF
1973  END IF
1974  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1975  $ ( npcol.GT.1 ) ) THEN
1976  IF( icol1.NE.icol2 ) THEN
1977  IF( istart.EQ.istop ) THEN
1978  CALL cgerv2d( contxt, irow2-irow1+1, 2,
1979  $ work( icbuf+1 ),
1980  $ irow2-irow1+1, myrow, left )
1981  END IF
1982  t2 = t1*v2
1983  t3 = t1*v3
1984  ii = icbuf + irow2 - 2*irow1 + 2
1985  DO 380 j = irow1, irow2
1986  sum = t1*work( j+ii ) +
1987  $ t2*a( ( icol1-1 )*lda+j ) +
1988  $ t3*a( icol1*lda+j )
1989  work( j+ii ) = work( j+ii ) - sum
1990  a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
1991  $ lda+j ) - sum*conjg( v2 )
1992  a( icol1*lda+j ) = a( icol1*lda+j ) -
1993  $ sum*conjg( v3 )
1994  380 CONTINUE
1995  CALL cgesd2d( contxt, irow2-irow1+1, 2,
1996  $ work( icbuf+1 ), irow2-irow1+1,
1997  $ myrow, left )
1998  END IF
1999  END IF
2000 *
2001 *
2002 * If we want Z and we haven't already done any Z
2003  IF( ( wantz ) .AND. ( mod( k-1,
2004  $ hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN
2005 *
2006 * Accumulate transformations in the matrix Z
2007 *
2008  irow1 = liloz
2009  irow2 = lihiz
2010  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
2011  IF( icol1.EQ.icol2 ) THEN
2012  CALL cgerv2d( contxt, irow2-irow1+1, 2,
2013  $ work( izbuf+1 ),
2014  $ irow2-irow1+1, myrow, left )
2015  t2 = t1*v2
2016  t3 = t1*v3
2017  icol1 = ( icol1-1 )*ldz
2018  ii = izbuf - irow1 + 1
2019  jj = izbuf + irow2 - 2*irow1 + 2
2020  DO 390 j = irow1, irow2
2021  sum = t1*work( ii+j ) +
2022  $ t2*work( jj+j ) + t3*z( icol1+j )
2023  work( ii+j ) = work( ii+j ) - sum
2024  work( jj+j ) = work( jj+j ) -
2025  $ sum*conjg( v2 )
2026  z( icol1+j ) = z( icol1+j ) -
2027  $ sum*conjg( v3 )
2028  390 CONTINUE
2029  IF( istart.EQ.istop ) THEN
2030  CALL cgesd2d( contxt, irow2-irow1+1, 2,
2031  $ work( izbuf+1 ),
2032  $ irow2-irow1+1, myrow,
2033  $ left )
2034  END IF
2035  END IF
2036  END IF
2037  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2038  IF( icol1.NE.icol2 ) THEN
2039  IF( istart.EQ.istop ) THEN
2040  CALL cgerv2d( contxt, irow2-irow1+1, 2,
2041  $ work( izbuf+1 ),
2042  $ irow2-irow1+1, myrow,
2043  $ left )
2044  END IF
2045  t2 = t1*v2
2046  t3 = t1*v3
2047  icol1 = ( icol1-1 )*ldz
2048  ii = izbuf + irow2 - 2*irow1 + 2
2049  DO 400 j = irow1, irow2
2050  sum = t1*work( ii+j ) +
2051  $ t2*z( j+icol1 ) +
2052  $ t3*z( j+icol1+ldz )
2053  work( ii+j ) = work( ii+j ) - sum
2054  z( j+icol1 ) = z( j+icol1 ) -
2055  $ sum*conjg( v2 )
2056  z( j+icol1+ldz ) = z( j+icol1+ldz ) -
2057  $ sum*conjg( v3 )
2058  400 CONTINUE
2059  CALL cgesd2d( contxt, irow2-irow1+1, 2,
2060  $ work( izbuf+1 ),
2061  $ irow2-irow1+1, myrow, left )
2062  END IF
2063  END IF
2064  END IF
2065  END IF
2066  410 CONTINUE
2067  420 CONTINUE
2068 *
2069  DO 440 ki = 1, ibulge
2070  IF( kcol( ki ).GT.kp2col( ki ) )
2071  $ GO TO 440
2072  IF( ( mycol.NE.icurcol( ki ) ) .AND.
2073  $ ( right.NE.icurcol( ki ) ) )GO TO 440
2074  istart = max( k1( ki ), m )
2075  istop = min( k2( ki ), i-1 )
2076  IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
2077 *
2078 * INFO is found in a buffer
2079 *
2080  ispec = 1
2081  ELSE
2082 *
2083 * All INFO is local
2084 *
2085  ispec = 0
2086  END IF
2087  DO 430 k = istart, istop
2088 *
2089  v2 = work( vecsidx+( k-1 )*3+1 )
2090  v3 = work( vecsidx+( k-1 )*3+2 )
2091  t1 = work( vecsidx+( k-1 )*3+3 )
2092  nr = min( 3, i-k+1 )
2093  IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )
2094  $ THEN
2095 *
2096  IF( ( k.LT.istop ) .AND.
2097  $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
2098  itmp1 = min( istart+1, i ) - 1
2099  ELSE
2100  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
2101  itmp1 = min( k+3, i )
2102  END IF
2103  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
2104  itmp1 = max( i1, k-1 ) - 1
2105  END IF
2106  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2107  itmp1 = max( i1, k-2 ) - 1
2108  END IF
2109  END IF
2110  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
2111  icol1 = kcol( ki ) + k - istart
2112  icol2 = kp2col( ki ) + k - istart
2113  ELSE
2114  icol1 = kcol( ki )
2115  icol2 = kp2col( ki )
2116  IF( k.GT.istart ) THEN
2117  IF( right.EQ.icurcol( ki ) ) THEN
2118  icol1 = icol1 + 1
2119  END IF
2120  IF( mycol.EQ.icurcol( ki ) ) THEN
2121  icol2 = icol2 + 1
2122  END IF
2123  END IF
2124  END IF
2125  CALL infog1l( i1, hbl, nprow, myrow, iafirst,
2126  $ irow1, irow2 )
2127  irow2 = numroc( itmp1, hbl, myrow, iafirst, nprow )
2128  IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
2129  $ ( npcol.GT.1 ) ) THEN
2130  IF( icol1.NE.icol2 ) THEN
2131  IF( istart.EQ.istop ) THEN
2132  CALL cgerv2d( contxt, irow2-irow1+1, 2,
2133  $ a( ( icol1-1 )*lda+irow1 ),
2134  $ lda, myrow, right )
2135  END IF
2136  END IF
2137  END IF
2138  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
2139  $ ( npcol.GT.1 ) ) THEN
2140  IF( icol1.EQ.icol2 ) THEN
2141  CALL cgerv2d( contxt, irow2-irow1+1, 2,
2142  $ a( ( icol1-2 )*lda+irow1 ),
2143  $ lda, myrow, right )
2144  END IF
2145  END IF
2146 *
2147 * If we want Z and we haven't already done any Z
2148 *
2149  IF( ( wantz ) .AND. ( mod( k-1,
2150  $ hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN
2151 *
2152 * Accumulate transformations in the matrix Z
2153 *
2154  irow1 = liloz
2155  irow2 = lihiz
2156  IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
2157  IF( icol1.NE.icol2 ) THEN
2158  IF( istart.EQ.istop ) THEN
2159  CALL cgerv2d( contxt, irow2-irow1+1, 2,
2160  $ z( ( icol1-1 )*ldz+
2161  $ irow1 ), ldz, myrow,
2162  $ right )
2163  END IF
2164  END IF
2165  END IF
2166  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2167  IF( icol1.EQ.icol2 ) THEN
2168  CALL cgerv2d( contxt, irow2-irow1+1, 2,
2169  $ z( ( icol1-2 )*ldz+irow1 ),
2170  $ ldz, myrow, right )
2171  END IF
2172  END IF
2173  END IF
2174  END IF
2175  430 CONTINUE
2176  440 CONTINUE
2177 *
2178 * Column work done
2179 *
2180  450 CONTINUE
2181 *
2182 * Now do NR=2 work
2183 *
2184  DO 530 ki = 1, ibulge
2185  istart = max( k1( ki ), m )
2186  istop = min( k2( ki ), i-1 )
2187  IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
2188 *
2189 * INFO is found in a buffer
2190 *
2191  ispec = 1
2192  ELSE
2193 *
2194 * All INFO is local
2195 *
2196  ispec = 0
2197  END IF
2198 *
2199  DO 520 k = istart, istop
2200 *
2201  v2 = work( vecsidx+( k-1 )*3+1 )
2202  v3 = work( vecsidx+( k-1 )*3+2 )
2203  t1 = work( vecsidx+( k-1 )*3+3 )
2204  nr = min( 3, i-k+1 )
2205  IF( nr.EQ.2 ) THEN
2206  IF ( icurrow( ki ).EQ.myrow ) THEN
2207  t2 = t1*v2
2208  END IF
2209  IF ( icurcol( ki ).EQ.mycol ) THEN
2210  t2 = t1*v2
2211  END IF
2212 *
2213 * Apply G from the left to transform the rows of the matrix
2214 * in columns K to I2.
2215 *
2216  CALL infog1l( k, hbl, npcol, mycol, jafirst, liloh,
2217  $ lihih )
2218  lihih = locali2
2219  CALL infog1l( 1, hbl, nprow, myrow, iafirst, itmp2,
2220  $ itmp1 )
2221  itmp1 = numroc( k+1, hbl, myrow, iafirst, nprow )
2222  IF( icurrow( ki ).EQ.myrow ) THEN
2223  IF( ( ispec.EQ.0 ) .OR. ( nprow.EQ.1 ) .OR.
2224  $ ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
2225  itmp1 = itmp1 - 1
2226  DO 460 j = ( liloh-1 )*lda,
2227  $ ( lihih-1 )*lda, lda
2228  sum = conjg( t1 )*a( itmp1+j ) +
2229  $ conjg( t2 )*a( itmp1+1+j )
2230  a( itmp1+j ) = a( itmp1+j ) - sum
2231  a( itmp1+1+j ) = a( itmp1+1+j ) - sum*v2
2232  460 CONTINUE
2233  ELSE
2234  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2235  CALL cgerv2d( contxt, 1, lihih-liloh+1,
2236  $ work( irbuf+1 ), 1, up,
2237  $ mycol )
2238  DO 470 j = liloh, lihih
2239  sum = conjg( t1 )*
2240  $ work( irbuf+j-liloh+1 ) +
2241  $ conjg( t2 )*a( ( j-1 )*lda+
2242  $ itmp1 )
2243  work( irbuf+j-liloh+1 ) = work( irbuf+
2244  $ j-liloh+1 ) - sum
2245  a( ( j-1 )*lda+itmp1 ) = a( ( j-1 )*
2246  $ lda+itmp1 ) - sum*v2
2247  470 CONTINUE
2248  CALL cgesd2d( contxt, 1, lihih-liloh+1,
2249  $ work( irbuf+1 ), 1, up,
2250  $ mycol )
2251  END IF
2252  END IF
2253  ELSE
2254  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
2255  $ ( icurrow( ki ).EQ.down ) ) THEN
2256  CALL cgesd2d( contxt, 1, lihih-liloh+1,
2257  $ a( ( liloh-1 )*lda+itmp1 ),
2258  $ lda, down, mycol )
2259  CALL cgerv2d( contxt, 1, lihih-liloh+1,
2260  $ a( ( liloh-1 )*lda+itmp1 ),
2261  $ lda, down, mycol )
2262  END IF
2263  END IF
2264 *
2265 * Apply G from the right to transform the columns of the
2266 * matrix in rows I1 to MIN(K+3,I).
2267 *
2268  CALL infog1l( i1, hbl, nprow, myrow, iafirst,
2269  $ liloh, lihih )
2270  lihih = numroc( i, hbl, myrow, iafirst, nprow )
2271 *
2272  IF( icurcol( ki ).EQ.mycol ) THEN
2273 * LOCAL A(LILOZ:LIHIZ,KCOL:KCOL+2)
2274  IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.
2275  $ ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
2276  CALL infog1l( k, hbl, npcol, mycol, jafirst,
2277  $ itmp1, itmp2 )
2278  itmp2 = numroc( k+1, hbl, mycol, jafirst,
2279  $ npcol )
2280  DO 480 j = liloh, lihih
2281  sum = t1*a( ( itmp1-1 )*lda+j ) +
2282  $ t2*a( itmp1*lda+j )
2283  a( ( itmp1-1 )*lda+j ) = a( ( itmp1-1 )*
2284  $ lda+j ) - sum
2285  a( itmp1*lda+j ) = a( itmp1*lda+j ) -
2286  $ sum*conjg( v2 )
2287  480 CONTINUE
2288  ELSE
2289  itmp1 = kcol( ki )
2290  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2291  CALL cgerv2d( contxt, lihih-liloh+1, 1,
2292  $ work( icbuf+1 ),
2293  $ lihih-liloh+1, myrow, left )
2294  DO 490 j = liloh, lihih
2295  sum = t1*work( icbuf+j ) +
2296  $ t2*a( ( itmp1-1 )*lda+j )
2297  work( icbuf+j ) = work( icbuf+j ) - sum
2298  a( ( itmp1-1 )*lda+j )
2299  $ = a( ( itmp1-1 )*lda+j ) -
2300  $ sum*conjg( v2 )
2301  490 CONTINUE
2302  CALL cgesd2d( contxt, lihih-liloh+1, 1,
2303  $ work( icbuf+1 ),
2304  $ lihih-liloh+1, myrow, left )
2305  END IF
2306  END IF
2307  ELSE
2308  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
2309  $ ( icurcol( ki ).EQ.right ) ) THEN
2310  itmp1 = kcol( ki )
2311  CALL cgesd2d( contxt, lihih-liloh+1, 1,
2312  $ a( ( itmp1-1 )*lda+liloh ),
2313  $ lda, myrow, right )
2314  CALL infog1l( k, hbl, npcol, mycol, jafirst,
2315  $ itmp1, itmp2 )
2316  itmp2 = numroc( k+1, hbl, mycol, jafirst,
2317  $ npcol )
2318  CALL cgerv2d( contxt, lihih-liloh+1, 1,
2319  $ a( ( itmp1-1 )*lda+liloh ),
2320  $ lda, myrow, right )
2321  END IF
2322  END IF
2323 *
2324  IF( wantz ) THEN
2325 *
2326 * Accumulate transformations in the matrix Z
2327 *
2328  IF( icurcol( ki ).EQ.mycol ) THEN
2329 * LOCAL Z(LILOZ:LIHIZ,KCOL:KCOL+2)
2330  IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.
2331  $ ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
2332  itmp1 = kcol( ki ) + k - istart
2333  itmp1 = ( itmp1-1 )*ldz
2334  DO 500 j = liloz, lihiz
2335  sum = t1*z( j+itmp1 ) +
2336  $ t2*z( j+itmp1+ldz )
2337  z( j+itmp1 ) = z( j+itmp1 ) - sum
2338  z( j+itmp1+ldz ) = z( j+itmp1+ldz ) -
2339  $ sum*conjg( v2 )
2340  500 CONTINUE
2341  ELSE
2342  itmp1 = kcol( ki )
2343 * IF WE ACTUALLY OWN COLUMN K
2344  IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2345  CALL cgerv2d( contxt, lihiz-liloz+1, 1,
2346  $ work( izbuf+1 ), ldz,
2347  $ myrow, left )
2348  itmp1 = ( itmp1-1 )*ldz
2349  DO 510 j = liloz, lihiz
2350  sum = t1*work( izbuf+j ) +
2351  $ t2*z( j+itmp1 )
2352  work( izbuf+j ) = work( izbuf+j ) -
2353  $ sum
2354  z( j+itmp1 ) = z( j+itmp1 ) -
2355  $ sum*conjg( v2 )
2356  510 CONTINUE
2357  CALL cgesd2d( contxt, lihiz-liloz+1, 1,
2358  $ work( izbuf+1 ), ldz,
2359  $ myrow, left )
2360  END IF
2361  END IF
2362  ELSE
2363 *
2364 * NO WORK BUT NEED TO UPDATE ANYWAY????
2365 *
2366  IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
2367  $ ( icurcol( ki ).EQ.right ) ) THEN
2368  itmp1 = kcol( ki )
2369  itmp1 = ( itmp1-1 )*ldz
2370  CALL cgesd2d( contxt, lihiz-liloz+1, 1,
2371  $ z( liloz+itmp1 ), ldz,
2372  $ myrow, right )
2373  CALL cgerv2d( contxt, lihiz-liloz+1, 1,
2374  $ z( liloz+itmp1 ), ldz,
2375  $ myrow, right )
2376  END IF
2377  END IF
2378  END IF
2379  END IF
2380  520 CONTINUE
2381 *
2382 * Adjust local information for this bulge
2383 *
2384  IF( nprow.EQ.1 ) THEN
2385  krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1
2386  kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1
2387  END IF
2388  IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.
2389  $ ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )
2390  $ THEN
2391  krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1
2392  END IF
2393  IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.
2394  $ ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )
2395  $ THEN
2396  kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1
2397  END IF
2398  IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.
2399  $ ( ( myrow.EQ.icurrow( ki ) ) .OR. ( down.EQ.
2400  $ icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN
2401  CALL infog1l( k2( ki )+1, hbl, nprow, myrow, iafirst,
2402  $ krow( ki ), itmp2 )
2403  END IF
2404  IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.
2405  $ ( ( myrow.EQ.icurrow( ki ) ) .OR. ( up.EQ.
2406  $ icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN
2407  kp2row( ki ) = numroc( k2( ki )+3, hbl, myrow,
2408  $ iafirst, nprow )
2409  END IF
2410  IF( npcol.EQ.1 ) THEN
2411  kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1
2412  kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1
2413  END IF
2414  IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.
2415  $ ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )
2416  $ THEN
2417  kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1
2418  END IF
2419  IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.
2420  $ ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )
2421  $ THEN
2422  kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1
2423  END IF
2424  IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.
2425  $ ( ( mycol.EQ.icurcol( ki ) ) .OR. ( right.EQ.
2426  $ icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN
2427  CALL infog1l( k2( ki )+1, hbl, npcol, mycol, jafirst,
2428  $ kcol( ki ), itmp2 )
2429  END IF
2430  IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.
2431  $ ( ( mycol.EQ.icurcol( ki ) ) .OR. ( left.EQ.
2432  $ icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN
2433  kp2col( ki ) = numroc( k2( ki )+3, hbl, mycol,
2434  $ jafirst, npcol )
2435  END IF
2436  k1( ki ) = k2( ki ) + 1
2437  istop = min( k1( ki )+rotn-mod( k1( ki ), rotn ), i-2 )
2438  istop = min( istop, k1( ki )+hbl-3-
2439  $ mod( k1( ki )-1, hbl ) )
2440  istop = min( istop, i2-2 )
2441  istop = max( istop, k1( ki ) )
2442  IF( ( mod( k1( ki )-1, hbl ).EQ.hbl-2 ) .AND.
2443  $ ( istop.LT.min( i-2, i2-2 ) ) ) THEN
2444  istop = istop + 1
2445  END IF
2446  k2( ki ) = istop
2447  IF( k1( ki ).LE.istop ) THEN
2448  IF( ( mod( k1( ki )-1, hbl ).EQ.hbl-2 ) .AND.
2449  $ ( i-k1( ki ).GT.1 ) ) THEN
2450 *
2451 * Next step switches rows & cols
2452 *
2453  icurrow( ki ) = mod( icurrow( ki )+1, nprow )
2454  icurcol( ki ) = mod( icurcol( ki )+1, npcol )
2455  END IF
2456  END IF
2457  530 CONTINUE
2458 *
2459  IF( k2( ibulge ).LE.i-1 )
2460  $ GO TO 40
2461  END IF
2462 *
2463  540 CONTINUE
2464 *
2465 * Failure to converge in remaining number of iterations
2466 *
2467  info = i
2468  RETURN
2469 *
2470  550 CONTINUE
2471 *
2472  IF( l.EQ.i ) THEN
2473 *
2474 * H(I,I-1) is negligible: one eigenvalue has converged.
2475 *
2476  CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow,
2477  $ icol, itmp1, itmp2 )
2478  IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
2479  w( i ) = a( ( icol-1 )*lda+irow )
2480  ELSE
2481  w( i ) = zero
2482  END IF
2483  ELSE IF( l.EQ.i-1 ) THEN
2484 *
2485 * H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
2486 *
2487  CALL pclacp3( 2, i-1, a, desca, s1, 2*iblk, -1, -1, 0 )
2488  CALL clanv2( s1( 1, 1 ), s1( 1, 2 ), s1( 2, 1 ), s1( 2, 2 ),
2489  $ w( i-1 ), w( i ), cs, sn )
2490  CALL pclacp3( 2, i-1, a, desca, s1, 2*iblk, 0, 0, 1 )
2491 *
2492  IF( node.NE.0 ) THEN
2493 * Erase the eigenvalues other eigenvalues
2494  w( i-1 ) = zero
2495  w( i ) = zero
2496  END IF
2497 *
2498  IF( wantt ) THEN
2499 *
2500 * Apply the transformation to A.
2501 *
2502  IF( i2.GT.i ) THEN
2503  CALL pcrot( i2-i, a, i-1, i+1, desca, n, a, i, i+1,
2504  $ desca, n, cs, sn )
2505  END IF
2506  CALL pcrot( i-i1-1, a, i1, i-1, desca, 1, a, i1, i, desca,
2507  $ 1, cs, conjg( sn ) )
2508  END IF
2509  IF( wantz ) THEN
2510 *
2511 * Apply the transformation to Z.
2512 *
2513  CALL pcrot( nz, z, iloz, i-1, descz, 1, z, iloz, i, descz,
2514  $ 1, cs, conjg( sn ) )
2515  END IF
2516 *
2517  ELSE
2518 *
2519 * Find the eigenvalues in H(L:I,L:I), L < I-1
2520 *
2521  jblk = i - l + 1
2522  IF( jblk.LE.2*iblk ) THEN
2523  CALL pclacp3( i-l+1, l, a, desca, s1, 2*iblk, 0, 0, 0 )
2524  CALL clahqr2( .false., .false., jblk, 1, jblk, s1, 2*iblk,
2525  $ w( l ), 1, jblk, z, ldz, ierr )
2526  IF( node.NE.0 ) THEN
2527 *
2528 * Erase the eigenvalues
2529 *
2530  DO 560 k = l, i
2531  w( k ) = zero
2532  560 CONTINUE
2533  END IF
2534  END IF
2535  END IF
2536 *
2537 * Decrement number of remaining iterations, and return to start of
2538 * the main loop with new value of I.
2539 *
2540  itn = itn - its
2541  i = l - 1
2542  GO TO 10
2543 *
2544  570 CONTINUE
2545  CALL cgsum2d( contxt, 'All', ' ', n, 1, w, n, -1, -1 )
2546  RETURN
2547 *
2548 * END OF PCLAHQR
2549 *
2550  END
pclaconsb
subroutine pclaconsb(A, DESCA, I, L, M, H44, H33, H43H34, BUF, LWORK)
Definition: pclaconsb.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
pslabad
subroutine pslabad(ICTXT, SMALL, LARGE)
Definition: pslabad.f:2
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pclacp3
subroutine pclacp3(M, I, A, DESCA, B, LDB, II, JJ, REV)
Definition: pclacp3.f:2
pclahqr
subroutine pclahqr(WANTT, WANTZ, N, ILO, IHI, A, DESCA, W, ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK, ILWORK, INFO)
Definition: pclahqr.f:4
clanv2
subroutine clanv2(A, B, C, D, RT1, RT2, CS, SN)
Definition: clanv2.f:2
pclawil
subroutine pclawil(II, JJ, M, A, DESCA, H44, H33, H43H34, V)
Definition: pclawil.f:2
claref
subroutine claref(TYPE, A, LDA, WANTZ, Z, LDZ, BLOCK, IROW1, ICOL1, ISTART, ISTOP, ITMP1, ITMP2, LILOZ, LIHIZ, VECS, V2, V3, T1, T2, T3)
Definition: claref.f:4
pclasmsub
subroutine pclasmsub(A, DESCA, I, L, K, SMLNUM, BUF, LWORK)
Definition: pclasmsub.f:2
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
clamsh
subroutine clamsh(S, LDS, NBULGE, JBLK, H, LDH, N, ULP)
Definition: clamsh.f:2
clahqr2
subroutine clahqr2(WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, IHIZ, Z, LDZ, INFO)
Definition: clahqr2.f:3
min
#define min(A, B)
Definition: pcgemr.c:181