SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pclahqr.f
Go to the documentation of this file.
1 SUBROUTINE pclahqr( WANTT, WANTZ, N, ILO, IHI, A, DESCA, W, ILOZ,
2 $ IHIZ, Z, DESCZ, WORK, LWORK, IWORK, ILWORK,
3 $ INFO )
4*
5* -- ScaLAPACK routine (version 1.7.3) --
6* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7* and University of California, Berkeley.
8* 1.7.3: March 22, 2006
9* modification suggested by Mark Fahey and Greg Henry
10* 1.7.0: July 31, 2001
11*
12* .. Scalar Arguments ..
13 LOGICAL WANTT, WANTZ
14 INTEGER IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N
15* ..
16* .. Array Arguments ..
17 INTEGER DESCA( * ), DESCZ( * ), IWORK( * )
18 COMPLEX A( * ), W( * ), WORK( * ), Z( * )
19* ..
20*
21* Purpose
22* =======
23*
24* PCLAHQR is an auxiliary routine used to find the Schur decomposition
25* and or eigenvalues of a matrix already in Hessenberg form from
26* cols ILO to IHI.
27* If Z = I, and WANTT=WANTZ=.TRUE., H gets replaced with Z'HZ,
28* with Z'Z=I, and H in Schur form.
29*
30* Notes
31* =====
32*
33* Each global data object is described by an associated description
34* vector. This vector stores the information required to establish
35* the mapping between an object element and its corresponding process
36* and memory location.
37*
38* Let A be a generic term for any 2D block cyclicly distributed array.
39* Such a global array has an associated description vector DESCA.
40* In the following comments, the character _ should be read as
41* "of the global array".
42*
43* NOTATION STORED IN EXPLANATION
44* --------------- -------------- --------------------------------------
45* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
46* DTYPE_A = 1.
47* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
48* the BLACS process grid A is distribu-
49* ted over. The context itself is glo-
50* bal, but the handle (the integer
51* value) may vary.
52* M_A (global) DESCA( M_ ) The number of rows in the global
53* array A.
54* N_A (global) DESCA( N_ ) The number of columns in the global
55* array A.
56* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
57* the rows of the array.
58* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
59* the columns of the array.
60* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
61* row of the array A is distributed.
62* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63* first column of the array A is
64* distributed.
65* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66* array. LLD_A >= MAX(1,LOCp(M_A)).
67*
68* Let K be the number of rows or columns of a distributed matrix,
69* and assume that its process grid has dimension p x q.
70* LOCp( K ) denotes the number of elements of K that a process
71* would receive if K were distributed over the p processes of its
72* process column.
73* Similarly, LOCq( K ) denotes the number of elements of K that a
74* process would receive if K were distributed over the q processes of
75* its process row.
76* The values of LOCp() and LOCq() may be determined via a call to the
77* ScaLAPACK tool function, NUMROC:
78* LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
79* LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
80* An upper bound for these quantities may be computed by:
81* LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
82* LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
83*
84* Arguments
85* =========
86*
87* WANTT (global input) LOGICAL
88* = .TRUE. : the full Schur form T is required;
89* = .FALSE.: only eigenvalues are required.
90*
91* WANTZ (global input) LOGICAL
92* = .TRUE. : the matrix of Schur vectors Z is required;
93* = .FALSE.: Schur vectors are not required.
94*
95* N (global input) INTEGER
96* The order of the Hessenberg matrix A (and Z if WANTZ).
97* N >= 0.
98*
99* ILO (global input) INTEGER
100* IHI (global input) INTEGER
101* It is assumed that A is already upper quasi-triangular in
102* rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless
103* ILO = 1). PCLAHQR works primarily with the Hessenberg
104* submatrix in rows and columns ILO to IHI, but applies
105* transformations to all of H if WANTT is .TRUE..
106* 1 <= ILO <= max(1,IHI); IHI <= N.
107*
108* A (global input/output) COMPLEX array, dimension
109* (DESCA(LLD_),*)
110* On entry, the upper Hessenberg matrix A.
111* On exit, if WANTT is .TRUE., A is upper triangular in rows
112* and columns ILO:IHI. If WANTT is .FALSE., the contents of
113* A are unspecified on exit.
114*
115* DESCA (global and local input) INTEGER array of dimension DLEN_.
116* The array descriptor for the distributed matrix A.
117*
118* W (global replicated output) COMPLEX array, dimension (N)
119* The computed eigenvalues ILO to IHI are stored in the
120* corresponding elements of W. If WANTT is .TRUE., the
121* eigenvalues are stored in the same order as on the diagonal
122* of the Schur form returned in A. A may be returned with
123* larger diagonal blocks until the next release.
124*
125* ILOZ (global input) INTEGER
126* IHIZ (global input) INTEGER
127* Specify the rows of Z to which transformations must be
128* applied if WANTZ is .TRUE..
129* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
130*
131* Z (global input/output) COMPLEX array.
132* If WANTZ is .TRUE., on entry Z must contain the current
133* matrix Z of transformations accumulated by PCHSEQR, and on
134* exit Z has been updated; transformations are applied only to
135* the submatrix Z(ILOZ:IHIZ,ILO:IHI).
136* If WANTZ is .FALSE., Z is not referenced.
137*
138* DESCZ (global and local input) INTEGER array of dimension DLEN_.
139* The array descriptor for the distributed matrix Z.
140*
141* WORK (local output) COMPLEX array of size LWORK
142* (Unless LWORK=-1, in which case WORK must be at least size 1)
143*
144* LWORK (local input) INTEGER
145* WORK(LWORK) is a local array and LWORK is assumed big enough
146* so that LWORK >= 3*N +
147* MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCq(N),
148* 7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) +
149* MAX( 2*N, (8*LCM(NPROW,NPCOL)+2)**2 )
150* If LWORK=-1, then WORK(1) gets set to the above number and
151* the code returns immediately.
152*
153* IWORK (global and local input) INTEGER array of size ILWORK
154* This will hold some of the IBLK integer arrays.
155* This is held as a place holder for a future release.
156* Currently unreferenced.
157*
158* ILWORK (local input) INTEGER
159* This will hold the size of the IWORK array.
160* This is held as a place holder for a future release.
161* Currently unreferenced.
162*
163* INFO (global output) INTEGER
164* < 0: parameter number -INFO incorrect or inconsistent
165* = 0: successful exit
166* > 0: PCLAHQR failed to compute all the eigenvalues ILO to IHI
167* in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
168* elements i+1:ihi of W contains those eigenvalues
169* which have been successfully computed.
170*
171* Logic:
172* This algorithm is very similar to SLAHQR. Unlike SLAHQR,
173* instead of sending one double shift through the largest
174* unreduced submatrix, this algorithm sends multiple double shifts
175* and spaces them apart so that there can be parallelism across
176* several processor row/columns. Another critical difference is
177* that this algorithm aggregrates multiple transforms together in
178* order to apply them in a block fashion.
179*
180* Important Local Variables:
181* IBLK = The maximum number of bulges that can be computed.
182* Currently fixed. Future releases this won't be fixed.
183* HBL = The square block size (HBL=DESCA(MB_)=DESCA(NB_))
184* ROTN = The number of transforms to block together
185* NBULGE = The number of bulges that will be attempted on the
186* current submatrix.
187* IBULGE = The current number of bulges started.
188* K1(*),K2(*) = The current bulge loops from K1(*) to K2(*).
189*
190* Subroutines:
191* From LAPACK, this routine calls:
192* CLAHQR -> Serial QR used to determine shifts and
193* eigenvalues
194* CLARFG -> Determine the Householder transforms
195*
196* This ScaLAPACK, this routine calls:
197* PCLACONSB -> To determine where to start each iteration
198* CLAMSH -> Sends multiple shifts through a small
199* submatrix to see how the consecutive
200* subdiagonals change (if PCLACONSB indicates
201* we can start a run in the middle)
202* PCLAWIL -> Given the shift, get the transformation
203* PCLACP3 -> Parallel array to local replicated array copy
204* & back.
205* CLAREF -> Row/column reflector applier. Core routine
206* here.
207* PCLASMSUB -> Finds negligible subdiagonal elements.
208*
209* Current Notes and/or Restrictions:
210* 1.) This code requires the distributed block size to be square
211* and at least six (6); unlike simpler codes like LU, this
212* algorithm is extremely sensitive to block size. Unwise
213* choices of too small a block size can lead to bad
214* performance.
215* 2.) This code requires A and Z to be distributed identically
216* and have identical contxts. A future version may allow Z to
217* have a different contxt to 1D row map it to all nodes (so no
218* communication on Z is necessary.)
219* 3.) This code does not currently block the initial transforms
220* so that none of the rows or columns for any bulge are
221* completed until all are started. To offset pipeline
222* start-up it is recommended that at least 2*LCM(NPROW,NPCOL)
223* bulges are used (if possible)
224* 4.) The maximum number of bulges currently supported is fixed at
225* 32. In future versions this will be limited only by the
226* incoming WORK and IWORK array.
227* 5.) The matrix A must be in upper Hessenberg form. If elements
228* below the subdiagonal are nonzero, the resulting transforms
229* may be nonsimilar. This is also true with the LAPACK
230* routine CLAHQR.
231* 6.) For this release, this code has only been tested for
232* RSRC_=CSRC_=0, but it has been written for the general case.
233* 7.) Currently, all the eigenvalues are distributed to all the
234* nodes. Future releases will probably distribute the
235* eigenvalues by the column partitioning.
236* 8.) The internals of this routine are subject to change.
237* 9.) To optimize this for your architecture, try tuning CLAREF.
238* 10.) This code has only been tested for WANTZ = .TRUE. and may
239* behave unpredictably for WANTZ set to .FALSE.
240*
241* Further Details
242* ===============
243*
244* Contributed by Mark Fahey, June, 2000.
245*
246* =====================================================================
247*
248* .. Parameters ..
249 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_,
250 $ LLD_, MB_, M_, NB_, N_, RSRC_
251 parameter( block_cyclic_2d = 1, dlen_ = 9, dt_ = 1,
252 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
253 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
254 REAL RONE
255 PARAMETER ( RONE = 1.0e+0 )
256 COMPLEX ZERO, ONE
257 parameter( zero = ( 0.0e+0, 0.0e+0 ),
258 $ one = ( 1.0e+0, 0.0e+0 ) )
259 REAL CONST
260 PARAMETER ( CONST = 1.50e+0 )
261 INTEGER IBLK
262 parameter( iblk = 32 )
263* ..
264* .. Local Scalars ..
265 LOGICAL SKIP
266 INTEGER CONTXT, DOWN, HBL, I, I1, I2, IAFIRST, IBULGE,
267 $ icbuf, icol, icol1, icol2, idia, ierr, ii,
268 $ irbuf, irow, irow1, irow2, ispec, istart,
269 $ istartcol, istartrow, istop, isub, isup,
270 $ itermax, itmp1, itmp2, itn, its, izbuf, j,
271 $ jafirst, jblk, jj, k, ki, l, lcmrc, lda, ldz,
272 $ left, lihih, lihiz, liloh, liloz, locali1,
273 $ locali2, localk, localm, m, modkm1, mycol,
274 $ myrow, nbulge, nh, node, npcol, nprow, nq, nr,
275 $ num, nz, right, rotn, up, vecsidx
276 REAL CS, OVFL, S, SMLNUM, ULP, UNFL
277 COMPLEX CDUM, H10, H11, H22, H33, H43H34, H44, SN, SUM,
278 $ t1, t1copy, t2, t3, v1save, v2, v2save, v3,
279 $ v3save
280* ..
281* .. Local Arrays ..
282 INTEGER ICURCOL( IBLK ), ICURROW( IBLK ), K1( IBLK ),
283 $ K2( IBLK ), KCOL( IBLK ), KP2COL( IBLK ),
284 $ kp2row( iblk ), krow( iblk )
285 COMPLEX S1( 2*IBLK, 2*IBLK ), SMALLA( 6, 6, IBLK ),
286 $ VCOPY( 3 )
287* ..
288* .. External Functions ..
289 INTEGER ILCM, NUMROC
290 REAL PSLAMCH
291 EXTERNAL ilcm, numroc, pslamch
292* ..
293* .. External Subroutines ..
294 EXTERNAL blacs_gridinfo, igamn2d, igebr2d, igebs2d,
296 $ pclacp3, pclasmsub, pclawil, pcrot, ccopy,
297 $ cgebr2d, cgebs2d, cgerv2d, cgesd2d, cgsum2d,
298 $ clahqr2, clamsh, clanv2, claref, clarfg
299* ..
300* .. Intrinsic Functions ..
301*
302 INTRINSIC abs, real, conjg, aimag, max, min, mod
303* ..
304* .. Statement Functions ..
305 REAL CABS1
306* ..
307* .. Statement Function definitions ..
308 cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) )
309* ..
310* .. Executable Statements ..
311*
312 info = 0
313*
314 itermax = 30*( ihi-ilo+1 )
315 IF( n.EQ.0 )
316 $ RETURN
317*
318* NODE (IAFIRST,JAFIRST) OWNS A(1,1)
319*
320 hbl = desca( mb_ )
321 contxt = desca( ctxt_ )
322 lda = desca( lld_ )
323 iafirst = desca( rsrc_ )
324 jafirst = desca( csrc_ )
325 ldz = descz( lld_ )
326 CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
327 node = myrow*npcol + mycol
328 num = nprow*npcol
329 left = mod( mycol+npcol-1, npcol )
330 right = mod( mycol+1, npcol )
331 up = mod( myrow+nprow-1, nprow )
332 down = mod( myrow+1, nprow )
333 lcmrc = ilcm( nprow, npcol )
334 IF( ( nprow.LE.3 ) .OR. ( npcol.LE.3 ) ) THEN
335 skip = .true.
336 ELSE
337 skip = .false.
338 END IF
339*
340* Determine the number of columns we have so we can check workspace
341*
342 nq = numroc( n, hbl, mycol, jafirst, npcol )
343 jj = n / hbl
344 IF( jj*hbl.LT.n )
345 $ jj = jj + 1
346 jj = 7*jj / lcmrc
347 jj = 3*n + max( 2*max( lda, ldz )+2*nq, jj )
348 jj = jj + max( 2*n, ( 8*lcmrc+2 )**2 )
349 IF( lwork.EQ.-1 ) THEN
350 work( 1 ) = jj
351 RETURN
352 END IF
353 IF( lwork.LT.jj ) THEN
354 info = -14
355 END IF
356 IF( descz( ctxt_ ).NE.desca( ctxt_ ) ) THEN
357 info = -( 1300+ctxt_ )
358 END IF
359 IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
360 info = -( 700+nb_ )
361 END IF
362 IF( descz( mb_ ).NE.descz( nb_ ) ) THEN
363 info = -( 1300+nb_ )
364 END IF
365 IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
366 info = -( 1300+mb_ )
367 END IF
368 IF( ( desca( rsrc_ ).NE.0 ) .OR. ( desca( csrc_ ).NE.0 ) ) THEN
369 info = -( 700+rsrc_ )
370 END IF
371 IF( ( descz( rsrc_ ).NE.0 ) .OR. ( descz( csrc_ ).NE.0 ) ) THEN
372 info = -( 1300+rsrc_ )
373 END IF
374 IF( ( ilo.GT.n ) .OR. ( ilo.LT.1 ) ) THEN
375 info = -4
376 END IF
377 IF( ( ihi.GT.n ) .OR. ( ihi.LT.1 ) ) THEN
378 info = -5
379 END IF
380 IF( hbl.LT.5 ) THEN
381 info = -( 700+mb_ )
382 END IF
383 CALL igamn2d( contxt, 'ALL', ' ', 1, 1, info, 1, itmp1, itmp2, -1,
384 $ -1, -1 )
385 IF( info.LT.0 ) THEN
386 CALL pxerbla( contxt, 'PCLAHQR', -info )
387 RETURN
388 END IF
389*
390* Set work array indices
391*
392 vecsidx = 0
393 idia = 3*n
394 isub = 3*n
395 isup = 3*n
396 irbuf = 3*n
397 icbuf = 3*n
398 izbuf = 5*n
399*
400* Find a value for ROTN
401*
402 rotn = hbl / 3
403 rotn = min( rotn, hbl-2 )
404 rotn = max( rotn, 1 )
405*
406 IF( ilo.EQ.ihi ) THEN
407 CALL infog2l( ilo, ilo, desca, nprow, npcol, myrow, mycol,
408 $ irow, icol, ii, jj )
409 IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
410 w( ilo ) = a( ( icol-1 )*lda+irow )
411 ELSE
412 w( ilo ) = zero
413 END IF
414 RETURN
415 END IF
416*
417 nh = ihi - ilo + 1
418 nz = ihiz - iloz + 1
419*
420 CALL infog1l( iloz, hbl, nprow, myrow, iafirst, liloz, lihiz )
421 lihiz = numroc( ihiz, hbl, myrow, iafirst, nprow )
422*
423* Set machine-dependent constants for the stopping criterion.
424* If NORM(H) <= SQRT(OVFL), overflow should not occur.
425*
426 unfl = pslamch( contxt, 'SAFE MINIMUM' )
427 ovfl = rone / unfl
428 CALL pslabad( contxt, unfl, ovfl )
429 ulp = pslamch( contxt, 'PRECISION' )
430 smlnum = unfl*( nh / ulp )
431*
432* I1 and I2 are the indices of the first row and last column of H
433* to which transformations must be applied. If eigenvalues only are
434* being computed, I1 and I2 are set inside the main loop.
435*
436 IF( wantt ) THEN
437 i1 = 1
438 i2 = n
439 END IF
440*
441* ITN is the total number of QR iterations allowed.
442*
443 itn = itermax
444*
445* The main loop begins here. I is the loop index and decreases from
446* IHI to ILO in steps of our schur block size (<=2*IBLK). Each
447* iteration of the loop works with the active submatrix in rows
448* and columns L to I. Eigenvalues I+1 to IHI have already
449* converged. Either L = ILO or the global A(L,L-1) is negligible
450* so that the matrix splits.
451*
452 i = ihi
453 10 CONTINUE
454 l = ilo
455 IF( i.LT.ilo )
456 $ GO TO 570
457*
458* Perform QR iterations on rows and columns ILO to I until a
459* submatrix of order 1 or 2 splits off at the bottom because a
460* subdiagonal element has become negligible.
461*
462 DO 540 its = 0, itn
463*
464* Look for a single small subdiagonal element.
465*
466 CALL pclasmsub( a, desca, i, l, k, smlnum, work( irbuf+1 ),
467 $ lwork-irbuf )
468 l = k
469*
470 IF( l.GT.ilo ) THEN
471*
472* H(L,L-1) is negligible
473*
474 CALL infog2l( l, l-1, desca, nprow, npcol, myrow, mycol,
475 $ irow, icol, itmp1, itmp2 )
476 IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
477 a( ( icol-1 )*lda+irow ) = zero
478 END IF
479 work( isub+l-1 ) = zero
480 END IF
481*
482* Exit from loop if a submatrix of order 1 or 2 has split off.
483*
484 IF( wantt ) THEN
485* For Schur form, use 2x2 blocks
486 IF( l.GE.i-1 ) THEN
487 GO TO 550
488 END IF
489 ELSE
490* If we don't want the Schur form, use bigger blocks.
491 IF( l.GE.i-( 2*iblk-1 ) ) THEN
492 GO TO 550
493 END IF
494 END IF
495*
496* Now the active submatrix is in rows and columns L to I. If
497* eigenvalues only are being computed, only the active submatrix
498* need be transformed.
499*
500 IF( .NOT.wantt ) THEN
501 i1 = l
502 i2 = i
503 END IF
504*
505* Copy submatrix of size 2*JBLK and prepare to do generalized
506* Wilkinson shift or an exceptional shift
507*
508 jblk = min( iblk, ( ( i-l+1 ) / 2 )-1 )
509 IF( jblk.GT.lcmrc ) THEN
510*
511* Make sure it's divisible by LCM (we want even workloads!)
512*
513 jblk = jblk - mod( jblk, lcmrc )
514 END IF
515 jblk = min( jblk, 2*lcmrc )
516 jblk = max( jblk, 1 )
517*
518 CALL pclacp3( 2*jblk, i-2*jblk+1, a, desca, s1, 2*iblk, -1, -1,
519 $ 0 )
520 IF( ( its.EQ.20 .OR. its.EQ.40 ) .AND. ( jblk.GT.1 ) ) THEN
521*
522* Exceptional shift.
523*
524 DO 20 ii = 2*jblk, 2, -1
525 s1( ii, ii ) = const*( cabs1( s1( ii, ii ) )+
526 $ cabs1( s1( ii, ii-1 ) ) )
527 s1( ii, ii-1 ) = zero
528 s1( ii-1, ii ) = zero
529 20 CONTINUE
530 s1( 1, 1 ) = const*cabs1( s1( 1, 1 ) )
531 ELSE
532 CALL clahqr2( .false., .false., 2*jblk, 1, 2*jblk, s1,
533 $ 2*iblk, work( irbuf+1 ), 1, 2*jblk, z, ldz,
534 $ ierr )
535*
536* Prepare to use Wilkinson's double shift
537*
538 h44 = s1( 2*jblk, 2*jblk )
539 h33 = s1( 2*jblk-1, 2*jblk-1 )
540 h43h34 = s1( 2*jblk-1, 2*jblk )*s1( 2*jblk, 2*jblk-1 )
541*
542 END IF
543*
544* Look for two consecutive small subdiagonal elements:
545* PCLACONSB is the routine that does this.
546*
547 CALL pclaconsb( a, desca, i, l, m, h44, h33, h43h34,
548 $ work( irbuf+1 ), lwork-irbuf )
549*
550* Double-shift QR step
551*
552* NBULGE is the number of bulges that will be attempted
553*
554 istop = min( m+rotn-1-mod( m-( m / hbl )*hbl-1, rotn ), i-2 )
555 istop = min( istop, m+hbl-3-mod( m-1, hbl ) )
556 istop = min( istop, i2-2 )
557 istop = max( istop, m )
558 nbulge = ( i-1-istop ) / hbl
559*
560* Do not exceed maximum determined.
561*
562 nbulge = min( nbulge, jblk )
563 IF( nbulge.GT.lcmrc ) THEN
564*
565* Make sure it's divisible by LCM (we want even workloads!)
566*
567 nbulge = nbulge - mod( nbulge, lcmrc )
568 END IF
569 nbulge = max( nbulge, 1 )
570*
571* If we are starting in the middle because of consecutive small
572* subdiagonal elements, we need to see how many bulges we
573* can send through without breaking the consecutive small
574* subdiagonal property.
575*
576 IF( ( nbulge.GT.1 ) .AND. ( m.GT.l ) ) THEN
577*
578* Copy a chunk of elements from global A(M-1:,M-1:)
579*
580 CALL infog2l( m+2, m+2, desca, nprow, npcol, myrow, mycol,
581 $ irow1, icol1, itmp1, itmp2 )
582 ii = min( 4*nbulge+2, n-m+2 )
583 CALL pclacp3( ii, m-1, a, desca, work( irbuf+1 ), ii, itmp1,
584 $ itmp2, 0 )
585 IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
586*
587* Find a new NBULGE based on the bulges we have.
588*
589 CALL clamsh( s1, 2*iblk, nbulge, jblk, work( irbuf+1 ),
590 $ ii, ii, ulp )
591 IF( num.GT.1 ) THEN
592 CALL igebs2d( contxt, 'ALL', ' ', 1, 1, nbulge, 1 )
593 END IF
594 ELSE
595*
596* Everyone needs to receive the new NBULGE
597*
598 CALL igebr2d( contxt, 'ALL', ' ', 1, 1, nbulge, 1, itmp1,
599 $ itmp2 )
600 END IF
601 END IF
602*
603* IBULGE is the number of bulges going so far
604*
605 ibulge = 1
606*
607* "A" row defs : main row transforms from LOCALK to LOCALI2
608*
609 CALL infog1l( m, hbl, npcol, mycol, jafirst, itmp1, localk )
610 localk = nq
611 CALL infog1l( 1, hbl, npcol, mycol, jafirst, icol1, locali2 )
612 locali2 = numroc( i2, hbl, mycol, jafirst, npcol )
613*
614* "A" col defs : main col transforms from LOCALI1 to LOCALM
615*
616 CALL infog1l( i1, hbl, nprow, myrow, iafirst, locali1, icol1 )
617 CALL infog1l( 1, hbl, nprow, myrow, iafirst, localm, icol1 )
618 icol1 = numroc( min( m+3, i ), hbl, myrow, iafirst, nprow )
619*
620* Which row & column will start the bulges
621*
622 istartrow = mod( ( m+1 ) / hbl, nprow ) + iafirst
623 istartcol = mod( ( m+1 ) / hbl, npcol ) + jafirst
624*
625 CALL infog1l( m, hbl, nprow, myrow, iafirst, ii, itmp2 )
626 CALL infog1l( m, hbl, npcol, mycol, jafirst, jj, itmp2 )
627 CALL infog1l( 1, hbl, nprow, myrow, iafirst, istop,
628 $ kp2row( 1 ) )
629 kp2row( 1 ) = numroc( m+2, hbl, myrow, iafirst, nprow )
630 CALL infog1l( 1, hbl, npcol, mycol, jafirst, istop,
631 $ kp2col( 1 ) )
632 kp2col( 1 ) = numroc( m+2, hbl, mycol, jafirst, npcol )
633*
634* Set all values for bulges. All bulges are stored in
635* intermediate steps as loops over KI. Their current "task"
636* over the global M to I-1 values is always K1(KI) to K2(KI).
637* However, because there are many bulges, K1(KI) & K2(KI) might
638* go past that range while later bulges (KI+1,KI+2,etc..) are
639* finishing up. Even if ROTN=1, in order to minimize border
640* communication sometimes K1(KI)=HBL-2 & K2(KI)=HBL-1 so both
641* border messages can be handled at once.
642*
643* Rules:
644* If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)<HBL-2
645* If MOD(K1(KI)-1,HBL) = HBL-1 then MOD(K2(KI)-1,HBL)=HBL-1
646* K2(KI)-K1(KI) <= ROTN
647*
648* We first hit a border when MOD(K1(KI)-1,HBL)=HBL-2 and we hit
649* it again when MOD(K1(KI)-1,HBL)=HBL-1.
650*
651 DO 30 ki = 1, nbulge
652 k1( ki ) = m
653 istop = min( m+rotn-1-mod( m-( m / hbl )*hbl-1, rotn ),
654 $ i-2 )
655 istop = min( istop, m+hbl-3-mod( m-1, hbl ) )
656 istop = min( istop, i2-2 )
657 istop = max( istop, m )
658 IF( ( mod( m-1, hbl ).EQ.hbl-2 ) .AND.
659 $ ( istop.LT.min( i-2, i2-2 ) ) ) THEN
660 istop = istop + 1
661 END IF
662 k2( ki ) = istop
663 icurrow( ki ) = istartrow
664 icurcol( ki ) = istartcol
665 krow( ki ) = ii
666 kcol( ki ) = jj
667 IF( ki.GT.1 )
668 $ kp2row( ki ) = kp2row( 1 )
669 IF( ki.GT.1 )
670 $ kp2col( ki ) = kp2col( 1 )
671 30 CONTINUE
672*
673* Get first transform on node who owns M+2,M+2
674*
675 DO 31 itmp1 = 1, 3
676 vcopy(itmp1) = zero
677 31 CONTINUE
678 itmp1 = istartrow
679 itmp2 = istartcol
680 CALL pclawil( itmp1, itmp2, m, a, desca, h44, h33, h43h34,
681 $ vcopy )
682 v1save = vcopy( 1 )
683 v2save = vcopy( 2 )
684 v3save = vcopy( 3 )
685*
686* The main implicit shift Francis loops over the bulges starts
687* here!
688*
689 IF( k2( ibulge ).LE.i-1 ) THEN
690 40 CONTINUE
691 IF( ( k1( ibulge ).GE.m+5 ) .AND. ( ibulge.LT.nbulge ) )
692 $ THEN
693 IF( ( mod( k2( ibulge )+2, hbl ).EQ.mod( k2( ibulge+1 )+
694 $ 2, hbl ) ) .AND. ( k1( 1 ).LE.i-1 ) ) THEN
695 h44 = s1( 2*jblk-2*ibulge, 2*jblk-2*ibulge )
696 h33 = s1( 2*jblk-2*ibulge-1, 2*jblk-2*ibulge-1 )
697 h43h34 = s1( 2*jblk-2*ibulge-1, 2*jblk-2*ibulge )*
698 $ s1( 2*jblk-2*ibulge, 2*jblk-2*ibulge-1 )
699 itmp1 = istartrow
700 itmp2 = istartcol
701 CALL pclawil( itmp1, itmp2, m, a, desca, h44, h33,
702 $ h43h34, vcopy )
703 v1save = vcopy( 1 )
704 v2save = vcopy( 2 )
705 v3save = vcopy( 3 )
706 ibulge = ibulge + 1
707 END IF
708 END IF
709*
710* When we hit a border, there are row and column transforms that
711* overlap over several processors and the code gets very
712* "congested." As a remedy, when we first hit a border, a 6x6
713* *local* matrix is generated on one node (called SMALLA) and
714* work is done on that. At the end of the border, the data is
715* passed back and everything stays a lot simpler.
716*
717 DO 120 ki = 1, ibulge
718*
719 istart = max( k1( ki ), m )
720 istop = min( k2( ki ), i-1 )
721 k = istart
722 modkm1 = mod( k-1, hbl )
723 IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN
724 DO 81 itmp1 = 1, 6
725 DO 82 itmp2 = 1, 6
726 smalla(itmp1, itmp2, ki) = zero
727 82 CONTINUE
728 81 CONTINUE
729 IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN
730*
731* Copy 6 elements from global A(K-1:K+4,K-1:K+4)
732*
733 itmp1 = icurrow( ki )
734 itmp2 = icurcol( ki )
735 CALL pclacp3( min( 6, n-k+2 ), k-1, a, desca,
736 $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
737 $ 0 )
738 END IF
739 IF( modkm1.EQ.hbl-1 ) THEN
740*
741* Copy 6 elements from global A(K-2:K+3,K-2:K+3)
742*
743 CALL infog2l( k+1, k+1, desca, nprow, npcol, myrow,
744 $ mycol, irow1, icol1, itmp1, itmp2 )
745 CALL pclacp3( min( 6, n-k+3 ), k-2, a, desca,
746 $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
747 $ 0 )
748 END IF
749 END IF
750*
751*
752* CLAHQR used to have a single row application and a single
753* column application to H. Here we do something a little
754* more clever. We break each transformation down into 3
755* parts:
756* 1.) The minimum amount of work it takes to determine
757* a group of ROTN transformations (this is on
758* the critical path.) (Loops 50-120)
759* (the data is broadcast now: loops 180-240)
760* 2.) The small work it takes so that each of the rows
761* and columns is at the same place. For example,
762* all ROTN row transforms are all complete
763* through some column TMP. (Loops 250-260)
764* 3.) The majority of the row and column transforms
765* are then applied in a block fashion.
766* (row transforms are in loops 280-380)
767* (col transforms are in loops 400-540)
768*
769* Each of these three parts are further subdivided into 3
770* parts:
771* A.) Work at the start of a border when
772* MOD(ISTART-1,HBL) = HBL-2
773* B.) Work at the end of a border when
774* MOD(ISTART-1,HBL) = HBL-1
775* C.) Work in the middle of the block when
776* MOD(ISTART-1,HBL) < HBL-2
777*
778* Further optimization is met with the boolean SKIP. A border
779* communication can be broken into several parts for
780* efficient parallelism:
781* Loop over all the bulges, just sending the data out
782* Loop over all the bulges, just doing the work
783* Loop over all the bulges, just sending the data back.
784*
785*
786 IF( ( myrow.EQ.icurrow( ki ) ) .AND.
787 $ ( mycol.EQ.icurcol( ki ) ) .AND.
788 $ ( modkm1.EQ.hbl-2 ) .AND.
789 $ ( istart.LT.min( i-1, istop+1 ) ) ) THEN
790 k = istart
791 nr = min( 3, i-k+1 )
792 IF( k.GT.m ) THEN
793 CALL ccopy( nr, smalla( 2, 1, ki ), 1, vcopy, 1 )
794 ELSE
795 vcopy( 1 ) = v1save
796 vcopy( 2 ) = v2save
797 vcopy( 3 ) = v3save
798 END IF
799 CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )
800 IF( k.GT.m ) THEN
801 smalla( 2, 1, ki ) = vcopy( 1 )
802 smalla( 3, 1, ki ) = zero
803 IF( k.LT.i-1 )
804 $ smalla( 4, 1, ki ) = zero
805 ELSE IF( m.GT.l ) THEN
806*
807* Following differs in comparison to pslahqr.
808*
809 smalla( 2, 1, ki ) = smalla( 2, 1, ki ) -
810 $ conjg( t1copy )*
811 $ smalla( 2, 1, ki )
812 END IF
813 v2 = vcopy( 2 )
814 t2 = t1copy*v2
815 work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
816 work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
817 work( vecsidx+( k-1 )*3+3 ) = t1copy
818 IF( nr.EQ.3 ) THEN
819*
820* Do some work so next step is ready...
821*
822 t1 = t1copy
823 v3 = vcopy( 3 )
824 t3 = t1*v3
825 itmp1 = min( 6, i2+2-k )
826 itmp2 = max( i1-k+2, 1 )
827 DO 50 j = 2, itmp1
828 sum = conjg( t1 )*smalla( 2, j, ki ) +
829 $ conjg( t2 )*smalla( 3, j, ki ) +
830 $ conjg( t3 )*smalla( 4, j, ki )
831 smalla( 2, j, ki ) = smalla( 2, j, ki ) - sum
832 smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum*v2
833 smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*v3
834 50 CONTINUE
835 DO 60 j = itmp2, 5
836 sum = t1*smalla( j, 2, ki ) +
837 $ t2*smalla( j, 3, ki ) +
838 $ t3*smalla( j, 4, ki )
839 smalla( j, 2, ki ) = smalla( j, 2, ki ) - sum
840 smalla( j, 3, ki ) = smalla( j, 3, ki ) -
841 $ sum*conjg( v2 )
842 smalla( j, 4, ki ) = smalla( j, 4, ki ) -
843 $ sum*conjg( v3 )
844 60 CONTINUE
845 END IF
846 END IF
847*
848 IF( ( mod( istop-1, hbl ).EQ.hbl-1 ) .AND.
849 $ ( myrow.EQ.icurrow( ki ) ) .AND.
850 $ ( mycol.EQ.icurcol( ki ) ) .AND.
851 $ ( istart.LE.min( i, istop ) ) ) THEN
852 k = istop
853 nr = min( 3, i-k+1 )
854 IF( k.GT.m ) THEN
855 CALL ccopy( nr, smalla( 3, 2, ki ), 1, vcopy, 1 )
856 ELSE
857 vcopy( 1 ) = v1save
858 vcopy( 2 ) = v2save
859 vcopy( 3 ) = v3save
860 END IF
861 CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )
862 IF( k.GT.m ) THEN
863 smalla( 3, 2, ki ) = vcopy( 1 )
864 smalla( 4, 2, ki ) = zero
865 IF( k.LT.i-1 )
866 $ smalla( 5, 2, ki ) = zero
867*
868* Set a subdiagonal to zero now if it's possible
869*
870 IF( ( k-2.GT.m ) .AND. ( mod( k-1, hbl ).GT.1 ) )
871 $ THEN
872 h11 = smalla( 1, 1, ki )
873 h10 = smalla( 2, 1, ki )
874 h22 = smalla( 2, 2, ki )
875 s = cabs1( h11 ) + cabs1( h22 )
876 IF( cabs1( h10 ).LE.max( ulp*s, smlnum ) ) THEN
877 smalla( 2, 1, ki ) = zero
878 END IF
879 END IF
880 ELSE IF( m.GT.l ) THEN
881*
882* Following differs in comparison to pslahqr.
883*
884 smalla( 3, 2, ki ) = smalla( 3, 2, ki ) -
885 $ conjg( t1copy )*
886 $ smalla( 3, 2, ki )
887 END IF
888 v2 = vcopy( 2 )
889 t2 = t1copy*v2
890 work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
891 work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
892 work( vecsidx+( k-1 )*3+3 ) = t1copy
893 IF( nr.EQ.3 ) THEN
894*
895* Do some work so next step is ready...
896*
897 t1 = t1copy
898 v3 = vcopy( 3 )
899 t3 = t1*v3
900 itmp1 = min( 6, i2-k+3 )
901 itmp2 = max( i1-k+3, 1 )
902 DO 70 j = 3, itmp1
903 sum = conjg( t1 )*smalla( 3, j, ki ) +
904 $ conjg( t2 )*smalla( 4, j, ki ) +
905 $ conjg( t3 )*smalla( 5, j, ki )
906 smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum
907 smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*v2
908 smalla( 5, j, ki ) = smalla( 5, j, ki ) - sum*v3
909 70 CONTINUE
910 DO 80 j = itmp2, 6
911 sum = t1*smalla( j, 3, ki ) +
912 $ t2*smalla( j, 4, ki ) +
913 $ t3*smalla( j, 5, ki )
914 smalla( j, 3, ki ) = smalla( j, 3, ki ) - sum
915 smalla( j, 4, ki ) = smalla( j, 4, ki ) -
916 $ sum*conjg( v2 )
917 smalla( j, 5, ki ) = smalla( j, 5, ki ) -
918 $ sum*conjg( v3 )
919 80 CONTINUE
920 END IF
921 END IF
922*
923 IF( ( modkm1.EQ.0 ) .AND. ( istart.LE.i-1 ) .AND.
924 $ ( myrow.EQ.icurrow( ki ) ) .AND.
925 $ ( right.EQ.icurcol( ki ) ) ) THEN
926*
927* (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
928*
929 irow1 = krow( ki )
930 icol1 = kcol( ki )
931*
932* The ELSE part of this IF needs updated VCOPY, this
933* was not necessary in PSLAHQR.
934*
935 IF( istart.GT.m ) THEN
936 vcopy( 1 ) = smalla( 4, 3, ki )
937 vcopy( 2 ) = smalla( 5, 3, ki )
938 vcopy( 3 ) = smalla( 6, 3, ki )
939 nr = min( 3, i-istart+1 )
940 CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
941 $ t1copy )
942 a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )
943 a( ( icol1-2 )*lda+irow1+1 ) = zero
944 IF( istart.LT.i-1 ) THEN
945 a( ( icol1-2 )*lda+irow1+2 ) = zero
946 END IF
947 ELSE
948*
949* If NPCOL.NE.1 THEN we need updated VCOPY.
950*
951 nr = min( 3, i-istart+1 )
952 IF( npcol.EQ.1 ) THEN
953 vcopy( 1 ) = v1save
954 vcopy( 2 ) = v2save
955 vcopy( 3 ) = v3save
956 ELSE
957*
958* Get updated VCOPY from RIGHT
959*
960 CALL cgerv2d( contxt, 3, 1, vcopy, 3, myrow,
961 $ right )
962 END IF
963 CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
964 $ t1copy )
965 IF( m.GT.l ) THEN
966*
967* Following differs in comparison to pslahqr.
968*
969 a( ( icol1-2 )*lda+irow1 ) = a( ( icol1-2 )*lda+
970 $ irow1 )*conjg( one-t1copy )
971 END IF
972 END IF
973 END IF
974*
975 IF( ( myrow.EQ.icurrow( ki ) ) .AND.
976 $ ( mycol.EQ.icurcol( ki ) ) .AND.
977 $ ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-
978 $ 1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-
979 $ 1 ) ) ) ) THEN
980*
981* (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
982*
983 irow1 = krow( ki )
984 icol1 = kcol( ki )
985 DO 110 k = istart, istop
986*
987* Create and do these transforms
988*
989 nr = min( 3, i-k+1 )
990 IF( k.GT.m ) THEN
991 IF( mod( k-1, hbl ).EQ.0 ) THEN
992 vcopy( 1 ) = smalla( 4, 3, ki )
993 vcopy( 2 ) = smalla( 5, 3, ki )
994 vcopy( 3 ) = smalla( 6, 3, ki )
995 ELSE
996 vcopy( 1 ) = a( ( icol1-2 )*lda+irow1 )
997 vcopy( 2 ) = a( ( icol1-2 )*lda+irow1+1 )
998 IF( nr.EQ.3 ) THEN
999 vcopy( 3 ) = a( ( icol1-2 )*lda+irow1+2 )
1000 END IF
1001 END IF
1002 ELSE
1003 vcopy( 1 ) = v1save
1004 vcopy( 2 ) = v2save
1005 vcopy( 3 ) = v3save
1006 END IF
1007*
1008* Must send uptodate copy of VCOPY to left.
1009*
1010 IF( npcol.GT.1 .AND. istart.LE.m .AND.
1011 $ mod( k-1, hbl ).EQ.0 ) THEN
1012 CALL cgesd2d( contxt, 3, 1, vcopy, 3, myrow,
1013 $ left )
1014 END IF
1015 CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
1016 $ t1copy )
1017 IF( k.GT.m ) THEN
1018 IF( mod( k-1, hbl ).GT.0 ) THEN
1019 a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )
1020 a( ( icol1-2 )*lda+irow1+1 ) = zero
1021 IF( k.LT.i-1 ) THEN
1022 a( ( icol1-2 )*lda+irow1+2 ) = zero
1023 END IF
1024*
1025* Set a subdiagonal to zero now if it's possible
1026*
1027 IF( ( irow1.GT.2 ) .AND. ( icol1.GT.2 ) .AND.
1028 $ ( k-2.GT.m ) .AND. ( mod( k-1,
1029 $ hbl ).GT.1 ) ) THEN
1030 h11 = a( ( icol1-3 )*lda+irow1-2 )
1031 h10 = a( ( icol1-3 )*lda+irow1-1 )
1032 h22 = a( ( icol1-2 )*lda+irow1-1 )
1033 s = cabs1( h11 ) + cabs1( h22 )
1034 IF( cabs1( h10 ).LE.max( ulp*s, smlnum ) )
1035 $ THEN
1036 a( ( icol1-3 )*lda+irow1-1 ) = zero
1037 END IF
1038 END IF
1039 END IF
1040 ELSE IF( m.GT.l ) THEN
1041 IF( mod( k-1, hbl ).GT.0 ) THEN
1042*
1043* Following differs in comparison to pslahqr.
1044*
1045 a( ( icol1-2 )*lda+irow1 ) = a( ( icol1-2 )*
1046 $ lda+irow1 )*conjg( one-t1copy )
1047 END IF
1048 END IF
1049 v2 = vcopy( 2 )
1050 t2 = t1copy*v2
1051 work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
1052 work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
1053 work( vecsidx+( k-1 )*3+3 ) = t1copy
1054 t1 = t1copy
1055 IF( k.LT.istop ) THEN
1056*
1057* Do some work so next step is ready...
1058*
1059 v3 = vcopy( 3 )
1060 t3 = t1*v3
1061 DO 90 j = ( icol1-1 )*lda + irow1,
1062 $ ( min( k2( ki )+1, i-1 )+icol1-k-1 )*
1063 $ lda + irow1, lda
1064 sum = conjg( t1 )*a( j ) +
1065 $ conjg( t2 )*a( j+1 ) +
1066 $ conjg( t3 )*a( j+2 )
1067 a( j ) = a( j ) - sum
1068 a( j+1 ) = a( j+1 ) - sum*v2
1069 a( j+2 ) = a( j+2 ) - sum*v3
1070 90 CONTINUE
1071 DO 100 j = irow1 + 1, irow1 + 3
1072 sum = t1*a( ( icol1-1 )*lda+j ) +
1073 $ t2*a( icol1*lda+j ) +
1074 $ t3*a( ( icol1+1 )*lda+j )
1075 a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*lda+
1076 $ j ) - sum
1077 a( icol1*lda+j ) = a( icol1*lda+j ) -
1078 $ sum*conjg( v2 )
1079 a( ( icol1+1 )*lda+j ) = a( ( icol1+1 )*lda+
1080 $ j ) - sum*conjg( v3 )
1081 100 CONTINUE
1082 END IF
1083 irow1 = irow1 + 1
1084 icol1 = icol1 + 1
1085 110 CONTINUE
1086 END IF
1087 120 CONTINUE
1088*
1089* First part of applying the transforms is complete.
1090* Broadcasts of the Householder data is done here.
1091*
1092 DO 130 ki = 1, ibulge
1093*
1094 istart = max( k1( ki ), m )
1095 istop = min( k2( ki ), i-1 )
1096*
1097* Broadcast Householder information from the block
1098*
1099 IF( ( myrow.EQ.icurrow( ki ) ) .AND. ( npcol.GT.1 ) .AND.
1100 $ ( istart.LE.istop ) ) THEN
1101 IF( mycol.NE.icurcol( ki ) ) THEN
1102 CALL cgebr2d( contxt, 'ROW', ' ',
1103 $ 3*( istop-istart+1 ), 1,
1104 $ work( vecsidx+( istart-1 )*3+1 ),
1105 $ 3*( istop-istart+1 ), myrow,
1106 $ icurcol( ki ) )
1107 ELSE
1108 CALL cgebs2d( contxt, 'ROW', ' ',
1109 $ 3*( istop-istart+1 ), 1,
1110 $ work( vecsidx+( istart-1 )*3+1 ),
1111 $ 3*( istop-istart+1 ) )
1112 END IF
1113 END IF
1114 130 CONTINUE
1115*
1116* Now do column transforms and finish work
1117*
1118 DO 140 ki = 1, ibulge
1119*
1120 istart = max( k1( ki ), m )
1121 istop = min( k2( ki ), i-1 )
1122*
1123 IF( ( mycol.EQ.icurcol( ki ) ) .AND. ( nprow.GT.1 ) .AND.
1124 $ ( istart.LE.istop ) ) THEN
1125 IF( myrow.NE.icurrow( ki ) ) THEN
1126 CALL cgebr2d( contxt, 'COL', ' ',
1127 $ 3*( istop-istart+1 ), 1,
1128 $ work( vecsidx+( istart-1 )*3+1 ),
1129 $ 3*( istop-istart+1 ), icurrow( ki ),
1130 $ mycol )
1131 ELSE
1132 CALL cgebs2d( contxt, 'COL', ' ',
1133 $ 3*( istop-istart+1 ), 1,
1134 $ work( vecsidx+( istart-1 )*3+1 ),
1135 $ 3*( istop-istart+1 ) )
1136 END IF
1137 END IF
1138 140 CONTINUE
1139*
1140*
1141* Now do make up work to have things in block fashion
1142*
1143 DO 160 ki = 1, ibulge
1144 istart = max( k1( ki ), m )
1145 istop = min( k2( ki ), i-1 )
1146*
1147 modkm1 = mod( istart-1, hbl )
1148 IF( ( myrow.EQ.icurrow( ki ) ) .AND.
1149 $ ( mycol.EQ.icurcol( ki ) ) .AND.
1150 $ ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-
1151 $ 1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-
1152 $ 1 ) ) ) ) THEN
1153*
1154* (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
1155*
1156 irow1 = krow( ki )
1157 icol1 = kcol( ki )
1158 DO 150 k = istart, istop
1159*
1160* Catch up on column & border work
1161*
1162 nr = min( 3, i-k+1 )
1163 v2 = work( vecsidx+( k-1 )*3+1 )
1164 v3 = work( vecsidx+( k-1 )*3+2 )
1165 t1 = work( vecsidx+( k-1 )*3+3 )
1166 t2 = t1*v2
1167 IF( k.LT.istop ) THEN
1168*
1169* Do some work so next step is ready...
1170*
1171 t3 = t1*v3
1172 CALL claref( 'Col', a, lda, .false., z, ldz,
1173 $ .false., icol1, icol1, istart,
1174 $ istop, min( istart+1, i )-k+irow1,
1175 $ irow1, liloz, lihiz,
1176 $ work( vecsidx+1 ), v2, v3, t1, t2,
1177 $ t3 )
1178 irow1 = irow1 + 1
1179 icol1 = icol1 + 1
1180 ELSE
1181 IF( ( nr.EQ.3 ) .AND. ( mod( k-1,
1182 $ hbl ).LT.hbl-2 ) ) THEN
1183 t3 = t1*v3
1184 CALL claref( 'Row', a, lda, .false., z, ldz,
1185 $ .false., irow1, irow1, istart,
1186 $ istop, icol1, min( min( k2( ki )
1187 $ +1, i-1 ), i2 )-k+icol1, liloz,
1188 $ lihiz, work( vecsidx+1 ), v2,
1189 $ v3, t1, t2, t3 )
1190 END IF
1191 END IF
1192 150 CONTINUE
1193 END IF
1194*
1195* Send SMALLA back again.
1196*
1197 k = istart
1198 modkm1 = mod( k-1, hbl )
1199 IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN
1200 IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN
1201*
1202* Copy 6 elements from global A(K-1:K+4,K-1:K+4)
1203*
1204 itmp1 = icurrow( ki )
1205 itmp2 = icurcol( ki )
1206 CALL pclacp3( min( 6, n-k+2 ), k-1, a, desca,
1207 $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
1208 $ 1 )
1209*
1210 END IF
1211 IF( modkm1.EQ.hbl-1 ) THEN
1212*
1213* Copy 6 elements from global A(K-2:K+3,K-2:K+3)
1214*
1215 itmp1 = icurrow( ki )
1216 itmp2 = icurcol( ki )
1217 CALL pclacp3( min( 6, n-k+3 ), k-2, a, desca,
1218 $ smalla( 1, 1, ki ), 6, itmp1, itmp2,
1219 $ 1 )
1220 END IF
1221 END IF
1222*
1223 160 CONTINUE
1224*
1225 170 CONTINUE
1226*
1227* Now start major set of block ROW reflections
1228*
1229 DO 180 ki = 1, ibulge
1230 IF( ( myrow.NE.icurrow( ki ) ) .AND.
1231 $ ( down.NE.icurrow( ki ) ) )GO TO 180
1232 istart = max( k1( ki ), m )
1233 istop = min( k2( ki ), i-1 )
1234*
1235 IF( ( istop.GT.istart ) .AND.
1236 $ ( mod( istart-1, hbl ).LT.hbl-2 ) .AND.
1237 $ ( icurrow( ki ).EQ.myrow ) ) THEN
1238 irow1 = min( k2( ki )+1, i-1 ) + 1
1239 CALL infog1l( irow1, hbl, npcol, mycol, jafirst,
1240 $ itmp1, itmp2 )
1241 itmp2 = locali2
1242 ii = krow( ki )
1243 CALL claref( 'Row', a, lda, wantz, z, ldz, .true., ii,
1244 $ ii, istart, istop, itmp1, itmp2, liloz,
1245 $ lihiz, work( vecsidx+1 ), v2, v3, t1, t2,
1246 $ t3 )
1247 END IF
1248 180 CONTINUE
1249*
1250 DO 220 ki = 1, ibulge
1251 IF( krow( ki ).GT.kp2row( ki ) )
1252 $ GO TO 220
1253 IF( ( myrow.NE.icurrow( ki ) ) .AND.
1254 $ ( down.NE.icurrow( ki ) ) )GO TO 220
1255 istart = max( k1( ki ), m )
1256 istop = min( k2( ki ), i-1 )
1257 IF( ( istart.EQ.istop ) .OR.
1258 $ ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
1259 $ ( icurrow( ki ).NE.myrow ) ) THEN
1260 DO 210 k = istart, istop
1261 v2 = work( vecsidx+( k-1 )*3+1 )
1262 v3 = work( vecsidx+( k-1 )*3+2 )
1263 t1 = work( vecsidx+( k-1 )*3+3 )
1264 nr = min( 3, i-k+1 )
1265 IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
1266 $ kp2row( ki ) ) ) THEN
1267 IF( ( k.LT.istop ) .AND.
1268 $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1269 itmp1 = min( k2( ki )+1, i-1 ) + 1
1270 ELSE
1271 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1272 itmp1 = min( k2( ki )+1, i-1 ) + 1
1273 END IF
1274 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1275 itmp1 = min( k+4, i2 ) + 1
1276 END IF
1277 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1278 itmp1 = min( k+3, i2 ) + 1
1279 END IF
1280 END IF
1281*
1282* Find local coor of rows K through K+2
1283*
1284 irow1 = krow( ki )
1285 irow2 = kp2row( ki )
1286 IF( ( k.GT.istart ) .AND.
1287 $ ( mod( k-1, hbl ).GE.hbl-2 ) ) THEN
1288 IF( down.EQ.icurrow( ki ) ) THEN
1289 irow1 = irow1 + 1
1290 END IF
1291 IF( myrow.EQ.icurrow( ki ) ) THEN
1292 irow2 = irow2 + 1
1293 END IF
1294 END IF
1295 CALL infog1l( itmp1, hbl, npcol, mycol, jafirst,
1296 $ icol1, icol2 )
1297 icol2 = locali2
1298 IF( ( mod( k-1, hbl ).LT.hbl-2 ) .OR.
1299 $ ( nprow.EQ.1 ) ) THEN
1300 t2 = t1*v2
1301 t3 = t1*v3
1302 CALL claref( 'Row', a, lda, wantz, z, ldz,
1303 $ .false., irow1, irow1, istart,
1304 $ istop, icol1, icol2, liloz,
1305 $ lihiz, work( vecsidx+1 ), v2,
1306 $ v3, t1, t2, t3 )
1307 END IF
1308 IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1309 $ ( nprow.GT.1 ) ) THEN
1310 IF( irow1.NE.irow2 ) THEN
1311 CALL cgesd2d( contxt, 2, icol2-icol1+1,
1312 $ a( ( icol1-1 )*lda+irow1 ),
1313 $ lda, down, mycol )
1314 IF( skip .AND. ( istart.EQ.istop ) ) THEN
1315 CALL cgerv2d( contxt, 2, icol2-icol1+1,
1316 $ a( ( icol1-1 )*lda+
1317 $ irow1 ), lda, down,
1318 $ mycol )
1319 END IF
1320 ELSE IF( skip ) THEN
1321 CALL cgerv2d( contxt, 2, icol2-icol1+1,
1322 $ work( irbuf+1 ), 2, up,
1323 $ mycol )
1324 t2 = t1*v2
1325 t3 = t1*v3
1326 DO 190 j = icol1, icol2
1327 sum = conjg( t1 )*
1328 $ work( irbuf+2*( j-icol1 )+1 ) +
1329 $ conjg( t2 )*work( irbuf+2*
1330 $ ( j-icol1 )+2 ) +
1331 $ conjg( t3 )*a( ( j-1 )*lda+
1332 $ irow1 )
1333 work( irbuf+2*( j-icol1 )+1 )
1334 $ = work( irbuf+2*( j-icol1 )+1 ) -
1335 $ sum
1336 work( irbuf+2*( j-icol1 )+2 )
1337 $ = work( irbuf+2*( j-icol1 )+2 ) -
1338 $ sum*v2
1339 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
1340 $ lda+irow1 ) - sum*v3
1341 190 CONTINUE
1342 IF( istart.EQ.istop ) THEN
1343 CALL cgesd2d( contxt, 2, icol2-icol1+1,
1344 $ work( irbuf+1 ), 2, up,
1345 $ mycol )
1346 END IF
1347 END IF
1348 END IF
1349 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1350 $ ( nprow.GT.1 ) ) THEN
1351 IF( irow1.EQ.irow2 ) THEN
1352 IF( istart.EQ.istop ) THEN
1353 CALL cgesd2d( contxt, 2, icol2-icol1+1,
1354 $ a( ( icol1-1 )*lda+irow1-
1355 $ 1 ), lda, down, mycol )
1356 END IF
1357 IF( skip ) THEN
1358 CALL cgerv2d( contxt, 2, icol2-icol1+1,
1359 $ a( ( icol1-1 )*lda+irow1-
1360 $ 1 ), lda, down, mycol )
1361 END IF
1362 ELSE IF( skip ) THEN
1363 IF( istart.EQ.istop ) THEN
1364 CALL cgerv2d( contxt, 2, icol2-icol1+1,
1365 $ work( irbuf+1 ), 2, up,
1366 $ mycol )
1367 END IF
1368 t2 = t1*v2
1369 t3 = t1*v3
1370 DO 200 j = icol1, icol2
1371 sum = conjg( t1 )*
1372 $ work( irbuf+2*( j-icol1 )+2 ) +
1373 $ conjg( t2 )*a( ( j-1 )*lda+
1374 $ irow1 ) + conjg( t3 )*
1375 $ a( ( j-1 )*lda+irow1+1 )
1376 work( irbuf+2*( j-icol1 )+2 )
1377 $ = work( irbuf+2*( j-icol1 )+2 ) -
1378 $ sum
1379 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
1380 $ lda+irow1 ) - sum*v2
1381 a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*
1382 $ lda+irow1+1 ) - sum*v3
1383 200 CONTINUE
1384 CALL cgesd2d( contxt, 2, icol2-icol1+1,
1385 $ work( irbuf+1 ), 2, up,
1386 $ mycol )
1387*
1388 END IF
1389 END IF
1390 END IF
1391 210 CONTINUE
1392 END IF
1393 220 CONTINUE
1394*
1395 IF( skip )
1396 $ GO TO 290
1397*
1398 DO 260 ki = 1, ibulge
1399 IF( krow( ki ).GT.kp2row( ki ) )
1400 $ GO TO 260
1401 IF( ( myrow.NE.icurrow( ki ) ) .AND.
1402 $ ( down.NE.icurrow( ki ) ) )GO TO 260
1403 istart = max( k1( ki ), m )
1404 istop = min( k2( ki ), i-1 )
1405 IF( ( istart.EQ.istop ) .OR.
1406 $ ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
1407 $ ( icurrow( ki ).NE.myrow ) ) THEN
1408 DO 250 k = istart, istop
1409 v2 = work( vecsidx+( k-1 )*3+1 )
1410 v3 = work( vecsidx+( k-1 )*3+2 )
1411 t1 = work( vecsidx+( k-1 )*3+3 )
1412 nr = min( 3, i-k+1 )
1413 IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
1414 $ kp2row( ki ) ) ) THEN
1415 IF( ( k.LT.istop ) .AND.
1416 $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1417 itmp1 = min( k2( ki )+1, i-1 ) + 1
1418 ELSE
1419 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1420 itmp1 = min( k2( ki )+1, i-1 ) + 1
1421 END IF
1422 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1423 itmp1 = min( k+4, i2 ) + 1
1424 END IF
1425 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1426 itmp1 = min( k+3, i2 ) + 1
1427 END IF
1428 END IF
1429*
1430* Find local coor of rows K through K+2
1431*
1432 irow1 = krow( ki )
1433 irow2 = kp2row( ki )
1434 IF( ( k.GT.istart ) .AND.
1435 $ ( mod( k-1, hbl ).GE.hbl-2 ) ) THEN
1436 IF( down.EQ.icurrow( ki ) ) THEN
1437 irow1 = irow1 + 1
1438 END IF
1439 IF( myrow.EQ.icurrow( ki ) ) THEN
1440 irow2 = irow2 + 1
1441 END IF
1442 END IF
1443 CALL infog1l( itmp1, hbl, npcol, mycol, jafirst,
1444 $ icol1, icol2 )
1445 icol2 = locali2
1446 IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1447 $ ( nprow.GT.1 ) ) THEN
1448 IF( irow1.EQ.irow2 ) THEN
1449 CALL cgerv2d( contxt, 2, icol2-icol1+1,
1450 $ work( irbuf+1 ), 2, up,
1451 $ mycol )
1452 t2 = t1*v2
1453 t3 = t1*v3
1454 DO 230 j = icol1, icol2
1455 sum = conjg( t1 )*
1456 $ work( irbuf+2*( j-icol1 )+1 ) +
1457 $ conjg( t2 )*work( irbuf+2*
1458 $ ( j-icol1 )+2 ) +
1459 $ conjg( t3 )*a( ( j-1 )*lda+
1460 $ irow1 )
1461 work( irbuf+2*( j-icol1 )+1 )
1462 $ = work( irbuf+2*( j-icol1 )+1 ) -
1463 $ sum
1464 work( irbuf+2*( j-icol1 )+2 )
1465 $ = work( irbuf+2*( j-icol1 )+2 ) -
1466 $ sum*v2
1467 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
1468 $ lda+irow1 ) - sum*v3
1469 230 CONTINUE
1470 IF( istart.EQ.istop ) THEN
1471 CALL cgesd2d( contxt, 2, icol2-icol1+1,
1472 $ work( irbuf+1 ), 2, up,
1473 $ mycol )
1474 END IF
1475 END IF
1476 END IF
1477 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1478 $ ( nprow.GT.1 ) ) THEN
1479 IF( irow1.NE.irow2 ) THEN
1480 IF( istart.EQ.istop ) THEN
1481 CALL cgerv2d( contxt, 2, icol2-icol1+1,
1482 $ work( irbuf+1 ), 2, up,
1483 $ mycol )
1484 END IF
1485 t2 = t1*v2
1486 t3 = t1*v3
1487 DO 240 j = icol1, icol2
1488 sum = conjg( t1 )*
1489 $ work( irbuf+2*( j-icol1 )+2 ) +
1490 $ conjg( t2 )*a( ( j-1 )*lda+
1491 $ irow1 ) + conjg( t3 )*
1492 $ a( ( j-1 )*lda+irow1+1 )
1493 work( irbuf+2*( j-icol1 )+2 )
1494 $ = work( irbuf+2*( j-icol1 )+2 ) -
1495 $ sum
1496 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
1497 $ lda+irow1 ) - sum*v2
1498 a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*
1499 $ lda+irow1+1 ) - sum*v3
1500 240 CONTINUE
1501 CALL cgesd2d( contxt, 2, icol2-icol1+1,
1502 $ work( irbuf+1 ), 2, up,
1503 $ mycol )
1504 END IF
1505 END IF
1506 END IF
1507 250 CONTINUE
1508 END IF
1509 260 CONTINUE
1510*
1511 DO 280 ki = 1, ibulge
1512 IF( krow( ki ).GT.kp2row( ki ) )
1513 $ GO TO 280
1514 IF( ( myrow.NE.icurrow( ki ) ) .AND.
1515 $ ( down.NE.icurrow( ki ) ) )GO TO 280
1516 istart = max( k1( ki ), m )
1517 istop = min( k2( ki ), i-1 )
1518 IF( ( istart.EQ.istop ) .OR.
1519 $ ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
1520 $ ( icurrow( ki ).NE.myrow ) ) THEN
1521 DO 270 k = istart, istop
1522 v2 = work( vecsidx+( k-1 )*3+1 )
1523 v3 = work( vecsidx+( k-1 )*3+2 )
1524 t1 = work( vecsidx+( k-1 )*3+3 )
1525 nr = min( 3, i-k+1 )
1526 IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
1527 $ kp2row( ki ) ) ) THEN
1528 IF( ( k.LT.istop ) .AND.
1529 $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1530 itmp1 = min( k2( ki )+1, i-1 ) + 1
1531 ELSE
1532 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1533 itmp1 = min( k2( ki )+1, i-1 ) + 1
1534 END IF
1535 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1536 itmp1 = min( k+4, i2 ) + 1
1537 END IF
1538 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1539 itmp1 = min( k+3, i2 ) + 1
1540 END IF
1541 END IF
1542*
1543* Find local coor of rows K through K+2
1544*
1545 irow1 = krow( ki )
1546 irow2 = kp2row( ki )
1547 IF( ( k.GT.istart ) .AND.
1548 $ ( mod( k-1, hbl ).GE.hbl-2 ) ) THEN
1549 IF( down.EQ.icurrow( ki ) ) THEN
1550 irow1 = irow1 + 1
1551 END IF
1552 IF( myrow.EQ.icurrow( ki ) ) THEN
1553 irow2 = irow2 + 1
1554 END IF
1555 END IF
1556 CALL infog1l( itmp1, hbl, npcol, mycol, jafirst,
1557 $ icol1, icol2 )
1558 icol2 = locali2
1559 IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1560 $ ( nprow.GT.1 ) ) THEN
1561 IF( irow1.NE.irow2 ) THEN
1562 IF( istart.EQ.istop ) THEN
1563 CALL cgerv2d( contxt, 2, icol2-icol1+1,
1564 $ a( ( icol1-1 )*lda+
1565 $ irow1 ), lda, down,
1566 $ mycol )
1567 END IF
1568 END IF
1569 END IF
1570 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1571 $ ( nprow.GT.1 ) ) THEN
1572 IF( irow1.EQ.irow2 ) THEN
1573 CALL cgerv2d( contxt, 2, icol2-icol1+1,
1574 $ a( ( icol1-1 )*lda+irow1-
1575 $ 1 ), lda, down, mycol )
1576 END IF
1577 END IF
1578 END IF
1579 270 CONTINUE
1580 END IF
1581 280 CONTINUE
1582*
1583 290 CONTINUE
1584*
1585* Now start major set of block COL reflections
1586*
1587 DO 300 ki = 1, ibulge
1588 IF( ( mycol.NE.icurcol( ki ) ) .AND.
1589 $ ( right.NE.icurcol( ki ) ) )GO TO 300
1590 istart = max( k1( ki ), m )
1591 istop = min( k2( ki ), i-1 )
1592*
1593 IF( ( ( mod( istart-1, hbl ).LT.hbl-2 ) .OR. ( npcol.EQ.
1594 $ 1 ) ) .AND. ( icurcol( ki ).EQ.mycol ) .AND.
1595 $ ( i-istop+1.GE.3 ) ) THEN
1596 k = istart
1597 IF( ( k.LT.istop ) .AND. ( mod( k-1,
1598 $ hbl ).LT.hbl-2 ) ) THEN
1599 itmp1 = min( istart+1, i ) - 1
1600 ELSE
1601 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1602 itmp1 = min( k+3, i )
1603 END IF
1604 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1605 itmp1 = max( i1, k-1 ) - 1
1606 END IF
1607 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1608 itmp1 = max( i1, k-2 ) - 1
1609 END IF
1610 END IF
1611*
1612 icol1 = kcol( ki )
1613 CALL infog1l( i1, hbl, nprow, myrow, iafirst, irow1,
1614 $ irow2 )
1615 irow2 = numroc( itmp1, hbl, myrow, iafirst, nprow )
1616 IF( irow1.LE.irow2 ) THEN
1617 itmp2 = irow2
1618 ELSE
1619 itmp2 = -1
1620 END IF
1621 CALL claref( 'Col', a, lda, wantz, z, ldz, .true.,
1622 $ icol1, icol1, istart, istop, irow1,
1623 $ irow2, liloz, lihiz, work( vecsidx+1 ),
1624 $ v2, v3, t1, t2, t3 )
1625 k = istop
1626 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1627*
1628* Do from ITMP1+1 to MIN(K+3,I)
1629*
1630 IF( mod( k-1, hbl ).LT.hbl-3 ) THEN
1631 irow1 = itmp2 + 1
1632 IF( mod( ( itmp1 / hbl ), nprow ).EQ.myrow )
1633 $ THEN
1634 IF( itmp2.GT.0 ) THEN
1635 irow2 = itmp2 + min( k+3, i ) - itmp1
1636 ELSE
1637 irow2 = irow1 - 1
1638 END IF
1639 ELSE
1640 irow2 = irow1 - 1
1641 END IF
1642 ELSE
1643 CALL infog1l( itmp1+1, hbl, nprow, myrow,
1644 $ iafirst, irow1, irow2 )
1645 irow2 = numroc( min( k+3, i ), hbl, myrow,
1646 $ iafirst, nprow )
1647 END IF
1648 v2 = work( vecsidx+( k-1 )*3+1 )
1649 v3 = work( vecsidx+( k-1 )*3+2 )
1650 t1 = work( vecsidx+( k-1 )*3+3 )
1651 t2 = t1*v2
1652 t3 = t1*v3
1653 icol1 = kcol( ki ) + istop - istart
1654 CALL claref( 'Col', a, lda, .false., z, ldz,
1655 $ .false., icol1, icol1, istart, istop,
1656 $ irow1, irow2, liloz, lihiz,
1657 $ work( vecsidx+1 ), v2, v3, t1, t2,
1658 $ t3 )
1659 END IF
1660 END IF
1661 300 CONTINUE
1662*
1663 DO 360 ki = 1, ibulge
1664 IF( kcol( ki ).GT.kp2col( ki ) )
1665 $ GO TO 360
1666 IF( ( mycol.NE.icurcol( ki ) ) .AND.
1667 $ ( right.NE.icurcol( ki ) ) )GO TO 360
1668 istart = max( k1( ki ), m )
1669 istop = min( k2( ki ), i-1 )
1670 IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
1671*
1672* INFO is found in a buffer
1673*
1674 ispec = 1
1675 ELSE
1676*
1677* All INFO is local
1678*
1679 ispec = 0
1680 END IF
1681 DO 350 k = istart, istop
1682*
1683 v2 = work( vecsidx+( k-1 )*3+1 )
1684 v3 = work( vecsidx+( k-1 )*3+2 )
1685 t1 = work( vecsidx+( k-1 )*3+3 )
1686 nr = min( 3, i-k+1 )
1687 IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )
1688 $ THEN
1689*
1690 IF( ( k.LT.istop ) .AND.
1691 $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1692 itmp1 = min( istart+1, i ) - 1
1693 ELSE
1694 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1695 itmp1 = min( k+3, i )
1696 END IF
1697 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1698 itmp1 = max( i1, k-1 ) - 1
1699 END IF
1700 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1701 itmp1 = max( i1, k-2 ) - 1
1702 END IF
1703 END IF
1704 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1705 icol1 = kcol( ki ) + k - istart
1706 icol2 = kp2col( ki ) + k - istart
1707 ELSE
1708 icol1 = kcol( ki )
1709 icol2 = kp2col( ki )
1710 IF( k.GT.istart ) THEN
1711 IF( right.EQ.icurcol( ki ) ) THEN
1712 icol1 = icol1 + 1
1713 END IF
1714 IF( mycol.EQ.icurcol( ki ) ) THEN
1715 icol2 = icol2 + 1
1716 END IF
1717 END IF
1718 END IF
1719 CALL infog1l( i1, hbl, nprow, myrow, iafirst,
1720 $ irow1, irow2 )
1721 irow2 = numroc( itmp1, hbl, myrow, iafirst, nprow )
1722 IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1723 $ ( npcol.GT.1 ) ) THEN
1724 IF( icol1.NE.icol2 ) THEN
1725 CALL cgesd2d( contxt, irow2-irow1+1, 2,
1726 $ a( ( icol1-1 )*lda+irow1 ),
1727 $ lda, myrow, right )
1728 IF( ( istart.EQ.istop ) .AND. skip ) THEN
1729 CALL cgerv2d( contxt, irow2-irow1+1, 2,
1730 $ a( ( icol1-1 )*lda+irow1 ),
1731 $ lda, myrow, right )
1732 END IF
1733 ELSE IF( skip ) THEN
1734 t2 = t1*v2
1735 t3 = t1*v3
1736 CALL cgerv2d( contxt, irow2-irow1+1, 2,
1737 $ work( icbuf+1 ), irow2-irow1+1,
1738 $ myrow, left )
1739 ii = icbuf - irow1 + 1
1740 jj = icbuf + irow2 - 2*irow1 + 2
1741 DO 310 j = irow1, irow2
1742 sum = t1*work( ii+j ) + t2*work( jj+j ) +
1743 $ t3*a( ( icol1-1 )*lda+j )
1744 work( ii+j ) = work( ii+j ) - sum
1745 work( jj+j ) = work( jj+j ) -
1746 $ sum*conjg( v2 )
1747 a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
1748 $ lda+j ) - sum*conjg( v3 )
1749 310 CONTINUE
1750 IF( istart.EQ.istop ) THEN
1751 CALL cgesd2d( contxt, irow2-irow1+1, 2,
1752 $ work( icbuf+1 ),
1753 $ irow2-irow1+1, myrow, left )
1754 END IF
1755 END IF
1756 END IF
1757 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1758 $ ( npcol.GT.1 ) ) THEN
1759 IF( icol1.EQ.icol2 ) THEN
1760 IF( istart.EQ.istop ) THEN
1761 CALL cgesd2d( contxt, irow2-irow1+1, 2,
1762 $ a( ( icol1-2 )*lda+irow1 ),
1763 $ lda, myrow, right )
1764 END IF
1765 IF( skip ) THEN
1766 CALL cgerv2d( contxt, irow2-irow1+1, 2,
1767 $ a( ( icol1-2 )*lda+irow1 ),
1768 $ lda, myrow, right )
1769 END IF
1770 ELSE IF( skip ) THEN
1771 IF( istart.EQ.istop ) THEN
1772 CALL cgerv2d( contxt, irow2-irow1+1, 2,
1773 $ work( icbuf+1 ),
1774 $ irow2-irow1+1, myrow, left )
1775 END IF
1776 t2 = t1*v2
1777 t3 = t1*v3
1778 ii = icbuf + irow2 - 2*irow1 + 2
1779 DO 320 j = irow1, irow2
1780 sum = t1*work( j+ii ) +
1781 $ t2*a( ( icol1-1 )*lda+j ) +
1782 $ t3*a( icol1*lda+j )
1783 work( j+ii ) = work( j+ii ) - sum
1784 a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
1785 $ lda+j ) - sum*conjg( v2 )
1786 a( icol1*lda+j ) = a( icol1*lda+j ) -
1787 $ sum*conjg( v3 )
1788 320 CONTINUE
1789 CALL cgesd2d( contxt, irow2-irow1+1, 2,
1790 $ work( icbuf+1 ), irow2-irow1+1,
1791 $ myrow, left )
1792 END IF
1793 END IF
1794*
1795* If we want Z and we haven't already done any Z
1796*
1797 IF( ( wantz ) .AND. ( mod( k-1,
1798 $ hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN
1799*
1800* Accumulate transformations in the matrix Z
1801*
1802 irow1 = liloz
1803 irow2 = lihiz
1804 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1805 IF( icol1.NE.icol2 ) THEN
1806 CALL cgesd2d( contxt, irow2-irow1+1, 2,
1807 $ z( ( icol1-1 )*ldz+irow1 ),
1808 $ ldz, myrow, right )
1809 IF( ( istart.EQ.istop ) .AND. skip ) THEN
1810 CALL cgerv2d( contxt, irow2-irow1+1, 2,
1811 $ z( ( icol1-1 )*ldz+
1812 $ irow1 ), ldz, myrow,
1813 $ right )
1814 END IF
1815 ELSE IF( skip ) THEN
1816 CALL cgerv2d( contxt, irow2-irow1+1, 2,
1817 $ work( izbuf+1 ),
1818 $ irow2-irow1+1, myrow, left )
1819 t2 = t1*v2
1820 t3 = t1*v3
1821 icol1 = ( icol1-1 )*ldz
1822 ii = izbuf - irow1 + 1
1823 jj = izbuf + irow2 - 2*irow1 + 2
1824 DO 330 j = irow1, irow2
1825 sum = t1*work( ii+j ) +
1826 $ t2*work( jj+j ) + t3*z( icol1+j )
1827 work( ii+j ) = work( ii+j ) - sum
1828 work( jj+j ) = work( jj+j ) -
1829 $ sum*conjg( v2 )
1830 z( icol1+j ) = z( icol1+j ) -
1831 $ sum*conjg( v3 )
1832 330 CONTINUE
1833 IF( istart.EQ.istop ) THEN
1834 CALL cgesd2d( contxt, irow2-irow1+1, 2,
1835 $ work( izbuf+1 ),
1836 $ irow2-irow1+1, myrow,
1837 $ left )
1838 END IF
1839 END IF
1840 END IF
1841 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1842 IF( icol1.EQ.icol2 ) THEN
1843 IF( istart.EQ.istop ) THEN
1844 CALL cgesd2d( contxt, irow2-irow1+1, 2,
1845 $ z( ( icol1-2 )*ldz+
1846 $ irow1 ), ldz, myrow,
1847 $ right )
1848 END IF
1849 IF( skip ) THEN
1850 CALL cgerv2d( contxt, irow2-irow1+1, 2,
1851 $ z( ( icol1-2 )*ldz+
1852 $ irow1 ), ldz, myrow,
1853 $ right )
1854 END IF
1855 ELSE IF( skip ) THEN
1856 IF( istart.EQ.istop ) THEN
1857 CALL cgerv2d( contxt, irow2-irow1+1, 2,
1858 $ work( izbuf+1 ),
1859 $ irow2-irow1+1, myrow,
1860 $ left )
1861 END IF
1862 t2 = t1*v2
1863 t3 = t1*v3
1864 icol1 = ( icol1-1 )*ldz
1865 ii = izbuf + irow2 - 2*irow1 + 2
1866 DO 340 j = irow1, irow2
1867 sum = t1*work( ii+j ) +
1868 $ t2*z( j+icol1 ) +
1869 $ t3*z( j+icol1+ldz )
1870 work( ii+j ) = work( ii+j ) - sum
1871 z( j+icol1 ) = z( j+icol1 ) -
1872 $ sum*conjg( v2 )
1873 z( j+icol1+ldz ) = z( j+icol1+ldz ) -
1874 $ sum*conjg( v3 )
1875 340 CONTINUE
1876 CALL cgesd2d( contxt, irow2-irow1+1, 2,
1877 $ work( izbuf+1 ),
1878 $ irow2-irow1+1, myrow, left )
1879 END IF
1880 END IF
1881 END IF
1882 END IF
1883 350 CONTINUE
1884 360 CONTINUE
1885*
1886 IF( skip )
1887 $ GO TO 450
1888*
1889 DO 420 ki = 1, ibulge
1890 IF( kcol( ki ).GT.kp2col( ki ) )
1891 $ GO TO 420
1892 IF( ( mycol.NE.icurcol( ki ) ) .AND.
1893 $ ( right.NE.icurcol( ki ) ) )GO TO 420
1894 istart = max( k1( ki ), m )
1895 istop = min( k2( ki ), i-1 )
1896 IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
1897*
1898* INFO is found in a buffer
1899*
1900 ispec = 1
1901 ELSE
1902*
1903* All INFO is local
1904*
1905 ispec = 0
1906 END IF
1907 DO 410 k = istart, istop
1908*
1909 v2 = work( vecsidx+( k-1 )*3+1 )
1910 v3 = work( vecsidx+( k-1 )*3+2 )
1911 t1 = work( vecsidx+( k-1 )*3+3 )
1912 nr = min( 3, i-k+1 )
1913 IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )
1914 $ THEN
1915*
1916 IF( ( k.LT.istop ) .AND.
1917 $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
1918 itmp1 = min( istart+1, i ) - 1
1919 ELSE
1920 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1921 itmp1 = min( k+3, i )
1922 END IF
1923 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
1924 itmp1 = max( i1, k-1 ) - 1
1925 END IF
1926 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
1927 itmp1 = max( i1, k-2 ) - 1
1928 END IF
1929 END IF
1930 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
1931 icol1 = kcol( ki ) + k - istart
1932 icol2 = kp2col( ki ) + k - istart
1933 ELSE
1934 icol1 = kcol( ki )
1935 icol2 = kp2col( ki )
1936 IF( k.GT.istart ) THEN
1937 IF( right.EQ.icurcol( ki ) ) THEN
1938 icol1 = icol1 + 1
1939 END IF
1940 IF( mycol.EQ.icurcol( ki ) ) THEN
1941 icol2 = icol2 + 1
1942 END IF
1943 END IF
1944 END IF
1945 CALL infog1l( i1, hbl, nprow, myrow, iafirst,
1946 $ irow1, irow2 )
1947 irow2 = numroc( itmp1, hbl, myrow, iafirst, nprow )
1948 IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
1949 $ ( npcol.GT.1 ) ) THEN
1950 IF( icol1.EQ.icol2 ) THEN
1951 CALL cgerv2d( contxt, irow2-irow1+1, 2,
1952 $ work( icbuf+1 ), irow2-irow1+1,
1953 $ myrow, left )
1954 t2 = t1*v2
1955 t3 = t1*v3
1956 ii = icbuf - irow1 + 1
1957 jj = icbuf + irow2 - 2*irow1 + 2
1958 DO 370 j = irow1, irow2
1959 sum = t1*work( ii+j ) + t2*work( jj+j ) +
1960 $ t3*a( ( icol1-1 )*lda+j )
1961 work( ii+j ) = work( ii+j ) - sum
1962 work( jj+j ) = work( jj+j ) -
1963 $ sum*conjg( v2 )
1964 a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
1965 $ lda+j ) - sum*conjg( v3 )
1966 370 CONTINUE
1967 IF( istart.EQ.istop ) THEN
1968 CALL cgesd2d( contxt, irow2-irow1+1, 2,
1969 $ work( icbuf+1 ),
1970 $ irow2-irow1+1, myrow, left )
1971 END IF
1972 END IF
1973 END IF
1974 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
1975 $ ( npcol.GT.1 ) ) THEN
1976 IF( icol1.NE.icol2 ) THEN
1977 IF( istart.EQ.istop ) THEN
1978 CALL cgerv2d( contxt, irow2-irow1+1, 2,
1979 $ work( icbuf+1 ),
1980 $ irow2-irow1+1, myrow, left )
1981 END IF
1982 t2 = t1*v2
1983 t3 = t1*v3
1984 ii = icbuf + irow2 - 2*irow1 + 2
1985 DO 380 j = irow1, irow2
1986 sum = t1*work( j+ii ) +
1987 $ t2*a( ( icol1-1 )*lda+j ) +
1988 $ t3*a( icol1*lda+j )
1989 work( j+ii ) = work( j+ii ) - sum
1990 a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
1991 $ lda+j ) - sum*conjg( v2 )
1992 a( icol1*lda+j ) = a( icol1*lda+j ) -
1993 $ sum*conjg( v3 )
1994 380 CONTINUE
1995 CALL cgesd2d( contxt, irow2-irow1+1, 2,
1996 $ work( icbuf+1 ), irow2-irow1+1,
1997 $ myrow, left )
1998 END IF
1999 END IF
2000*
2001*
2002* If we want Z and we haven't already done any Z
2003 IF( ( wantz ) .AND. ( mod( k-1,
2004 $ hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN
2005*
2006* Accumulate transformations in the matrix Z
2007*
2008 irow1 = liloz
2009 irow2 = lihiz
2010 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
2011 IF( icol1.EQ.icol2 ) THEN
2012 CALL cgerv2d( contxt, irow2-irow1+1, 2,
2013 $ work( izbuf+1 ),
2014 $ irow2-irow1+1, myrow, left )
2015 t2 = t1*v2
2016 t3 = t1*v3
2017 icol1 = ( icol1-1 )*ldz
2018 ii = izbuf - irow1 + 1
2019 jj = izbuf + irow2 - 2*irow1 + 2
2020 DO 390 j = irow1, irow2
2021 sum = t1*work( ii+j ) +
2022 $ t2*work( jj+j ) + t3*z( icol1+j )
2023 work( ii+j ) = work( ii+j ) - sum
2024 work( jj+j ) = work( jj+j ) -
2025 $ sum*conjg( v2 )
2026 z( icol1+j ) = z( icol1+j ) -
2027 $ sum*conjg( v3 )
2028 390 CONTINUE
2029 IF( istart.EQ.istop ) THEN
2030 CALL cgesd2d( contxt, irow2-irow1+1, 2,
2031 $ work( izbuf+1 ),
2032 $ irow2-irow1+1, myrow,
2033 $ left )
2034 END IF
2035 END IF
2036 END IF
2037 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2038 IF( icol1.NE.icol2 ) THEN
2039 IF( istart.EQ.istop ) THEN
2040 CALL cgerv2d( contxt, irow2-irow1+1, 2,
2041 $ work( izbuf+1 ),
2042 $ irow2-irow1+1, myrow,
2043 $ left )
2044 END IF
2045 t2 = t1*v2
2046 t3 = t1*v3
2047 icol1 = ( icol1-1 )*ldz
2048 ii = izbuf + irow2 - 2*irow1 + 2
2049 DO 400 j = irow1, irow2
2050 sum = t1*work( ii+j ) +
2051 $ t2*z( j+icol1 ) +
2052 $ t3*z( j+icol1+ldz )
2053 work( ii+j ) = work( ii+j ) - sum
2054 z( j+icol1 ) = z( j+icol1 ) -
2055 $ sum*conjg( v2 )
2056 z( j+icol1+ldz ) = z( j+icol1+ldz ) -
2057 $ sum*conjg( v3 )
2058 400 CONTINUE
2059 CALL cgesd2d( contxt, irow2-irow1+1, 2,
2060 $ work( izbuf+1 ),
2061 $ irow2-irow1+1, myrow, left )
2062 END IF
2063 END IF
2064 END IF
2065 END IF
2066 410 CONTINUE
2067 420 CONTINUE
2068*
2069 DO 440 ki = 1, ibulge
2070 IF( kcol( ki ).GT.kp2col( ki ) )
2071 $ GO TO 440
2072 IF( ( mycol.NE.icurcol( ki ) ) .AND.
2073 $ ( right.NE.icurcol( ki ) ) )GO TO 440
2074 istart = max( k1( ki ), m )
2075 istop = min( k2( ki ), i-1 )
2076 IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
2077*
2078* INFO is found in a buffer
2079*
2080 ispec = 1
2081 ELSE
2082*
2083* All INFO is local
2084*
2085 ispec = 0
2086 END IF
2087 DO 430 k = istart, istop
2088*
2089 v2 = work( vecsidx+( k-1 )*3+1 )
2090 v3 = work( vecsidx+( k-1 )*3+2 )
2091 t1 = work( vecsidx+( k-1 )*3+3 )
2092 nr = min( 3, i-k+1 )
2093 IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )
2094 $ THEN
2095*
2096 IF( ( k.LT.istop ) .AND.
2097 $ ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
2098 itmp1 = min( istart+1, i ) - 1
2099 ELSE
2100 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
2101 itmp1 = min( k+3, i )
2102 END IF
2103 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
2104 itmp1 = max( i1, k-1 ) - 1
2105 END IF
2106 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2107 itmp1 = max( i1, k-2 ) - 1
2108 END IF
2109 END IF
2110 IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
2111 icol1 = kcol( ki ) + k - istart
2112 icol2 = kp2col( ki ) + k - istart
2113 ELSE
2114 icol1 = kcol( ki )
2115 icol2 = kp2col( ki )
2116 IF( k.GT.istart ) THEN
2117 IF( right.EQ.icurcol( ki ) ) THEN
2118 icol1 = icol1 + 1
2119 END IF
2120 IF( mycol.EQ.icurcol( ki ) ) THEN
2121 icol2 = icol2 + 1
2122 END IF
2123 END IF
2124 END IF
2125 CALL infog1l( i1, hbl, nprow, myrow, iafirst,
2126 $ irow1, irow2 )
2127 irow2 = numroc( itmp1, hbl, myrow, iafirst, nprow )
2128 IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
2129 $ ( npcol.GT.1 ) ) THEN
2130 IF( icol1.NE.icol2 ) THEN
2131 IF( istart.EQ.istop ) THEN
2132 CALL cgerv2d( contxt, irow2-irow1+1, 2,
2133 $ a( ( icol1-1 )*lda+irow1 ),
2134 $ lda, myrow, right )
2135 END IF
2136 END IF
2137 END IF
2138 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
2139 $ ( npcol.GT.1 ) ) THEN
2140 IF( icol1.EQ.icol2 ) THEN
2141 CALL cgerv2d( contxt, irow2-irow1+1, 2,
2142 $ a( ( icol1-2 )*lda+irow1 ),
2143 $ lda, myrow, right )
2144 END IF
2145 END IF
2146*
2147* If we want Z and we haven't already done any Z
2148*
2149 IF( ( wantz ) .AND. ( mod( k-1,
2150 $ hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN
2151*
2152* Accumulate transformations in the matrix Z
2153*
2154 irow1 = liloz
2155 irow2 = lihiz
2156 IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
2157 IF( icol1.NE.icol2 ) THEN
2158 IF( istart.EQ.istop ) THEN
2159 CALL cgerv2d( contxt, irow2-irow1+1, 2,
2160 $ z( ( icol1-1 )*ldz+
2161 $ irow1 ), ldz, myrow,
2162 $ right )
2163 END IF
2164 END IF
2165 END IF
2166 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2167 IF( icol1.EQ.icol2 ) THEN
2168 CALL cgerv2d( contxt, irow2-irow1+1, 2,
2169 $ z( ( icol1-2 )*ldz+irow1 ),
2170 $ ldz, myrow, right )
2171 END IF
2172 END IF
2173 END IF
2174 END IF
2175 430 CONTINUE
2176 440 CONTINUE
2177*
2178* Column work done
2179*
2180 450 CONTINUE
2181*
2182* Now do NR=2 work
2183*
2184 DO 530 ki = 1, ibulge
2185 istart = max( k1( ki ), m )
2186 istop = min( k2( ki ), i-1 )
2187 IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
2188*
2189* INFO is found in a buffer
2190*
2191 ispec = 1
2192 ELSE
2193*
2194* All INFO is local
2195*
2196 ispec = 0
2197 END IF
2198*
2199 DO 520 k = istart, istop
2200*
2201 v2 = work( vecsidx+( k-1 )*3+1 )
2202 v3 = work( vecsidx+( k-1 )*3+2 )
2203 t1 = work( vecsidx+( k-1 )*3+3 )
2204 nr = min( 3, i-k+1 )
2205 IF( nr.EQ.2 ) THEN
2206 IF ( icurrow( ki ).EQ.myrow ) THEN
2207 t2 = t1*v2
2208 END IF
2209 IF ( icurcol( ki ).EQ.mycol ) THEN
2210 t2 = t1*v2
2211 END IF
2212*
2213* Apply G from the left to transform the rows of the matrix
2214* in columns K to I2.
2215*
2216 CALL infog1l( k, hbl, npcol, mycol, jafirst, liloh,
2217 $ lihih )
2218 lihih = locali2
2219 CALL infog1l( 1, hbl, nprow, myrow, iafirst, itmp2,
2220 $ itmp1 )
2221 itmp1 = numroc( k+1, hbl, myrow, iafirst, nprow )
2222 IF( icurrow( ki ).EQ.myrow ) THEN
2223 IF( ( ispec.EQ.0 ) .OR. ( nprow.EQ.1 ) .OR.
2224 $ ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
2225 itmp1 = itmp1 - 1
2226 DO 460 j = ( liloh-1 )*lda,
2227 $ ( lihih-1 )*lda, lda
2228 sum = conjg( t1 )*a( itmp1+j ) +
2229 $ conjg( t2 )*a( itmp1+1+j )
2230 a( itmp1+j ) = a( itmp1+j ) - sum
2231 a( itmp1+1+j ) = a( itmp1+1+j ) - sum*v2
2232 460 CONTINUE
2233 ELSE
2234 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2235 CALL cgerv2d( contxt, 1, lihih-liloh+1,
2236 $ work( irbuf+1 ), 1, up,
2237 $ mycol )
2238 DO 470 j = liloh, lihih
2239 sum = conjg( t1 )*
2240 $ work( irbuf+j-liloh+1 ) +
2241 $ conjg( t2 )*a( ( j-1 )*lda+
2242 $ itmp1 )
2243 work( irbuf+j-liloh+1 ) = work( irbuf+
2244 $ j-liloh+1 ) - sum
2245 a( ( j-1 )*lda+itmp1 ) = a( ( j-1 )*
2246 $ lda+itmp1 ) - sum*v2
2247 470 CONTINUE
2248 CALL cgesd2d( contxt, 1, lihih-liloh+1,
2249 $ work( irbuf+1 ), 1, up,
2250 $ mycol )
2251 END IF
2252 END IF
2253 ELSE
2254 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
2255 $ ( icurrow( ki ).EQ.down ) ) THEN
2256 CALL cgesd2d( contxt, 1, lihih-liloh+1,
2257 $ a( ( liloh-1 )*lda+itmp1 ),
2258 $ lda, down, mycol )
2259 CALL cgerv2d( contxt, 1, lihih-liloh+1,
2260 $ a( ( liloh-1 )*lda+itmp1 ),
2261 $ lda, down, mycol )
2262 END IF
2263 END IF
2264*
2265* Apply G from the right to transform the columns of the
2266* matrix in rows I1 to MIN(K+3,I).
2267*
2268 CALL infog1l( i1, hbl, nprow, myrow, iafirst,
2269 $ liloh, lihih )
2270 lihih = numroc( i, hbl, myrow, iafirst, nprow )
2271*
2272 IF( icurcol( ki ).EQ.mycol ) THEN
2273* LOCAL A(LILOZ:LIHIZ,KCOL:KCOL+2)
2274 IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.
2275 $ ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
2276 CALL infog1l( k, hbl, npcol, mycol, jafirst,
2277 $ itmp1, itmp2 )
2278 itmp2 = numroc( k+1, hbl, mycol, jafirst,
2279 $ npcol )
2280 DO 480 j = liloh, lihih
2281 sum = t1*a( ( itmp1-1 )*lda+j ) +
2282 $ t2*a( itmp1*lda+j )
2283 a( ( itmp1-1 )*lda+j ) = a( ( itmp1-1 )*
2284 $ lda+j ) - sum
2285 a( itmp1*lda+j ) = a( itmp1*lda+j ) -
2286 $ sum*conjg( v2 )
2287 480 CONTINUE
2288 ELSE
2289 itmp1 = kcol( ki )
2290 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2291 CALL cgerv2d( contxt, lihih-liloh+1, 1,
2292 $ work( icbuf+1 ),
2293 $ lihih-liloh+1, myrow, left )
2294 DO 490 j = liloh, lihih
2295 sum = t1*work( icbuf+j ) +
2296 $ t2*a( ( itmp1-1 )*lda+j )
2297 work( icbuf+j ) = work( icbuf+j ) - sum
2298 a( ( itmp1-1 )*lda+j )
2299 $ = a( ( itmp1-1 )*lda+j ) -
2300 $ sum*conjg( v2 )
2301 490 CONTINUE
2302 CALL cgesd2d( contxt, lihih-liloh+1, 1,
2303 $ work( icbuf+1 ),
2304 $ lihih-liloh+1, myrow, left )
2305 END IF
2306 END IF
2307 ELSE
2308 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
2309 $ ( icurcol( ki ).EQ.right ) ) THEN
2310 itmp1 = kcol( ki )
2311 CALL cgesd2d( contxt, lihih-liloh+1, 1,
2312 $ a( ( itmp1-1 )*lda+liloh ),
2313 $ lda, myrow, right )
2314 CALL infog1l( k, hbl, npcol, mycol, jafirst,
2315 $ itmp1, itmp2 )
2316 itmp2 = numroc( k+1, hbl, mycol, jafirst,
2317 $ npcol )
2318 CALL cgerv2d( contxt, lihih-liloh+1, 1,
2319 $ a( ( itmp1-1 )*lda+liloh ),
2320 $ lda, myrow, right )
2321 END IF
2322 END IF
2323*
2324 IF( wantz ) THEN
2325*
2326* Accumulate transformations in the matrix Z
2327*
2328 IF( icurcol( ki ).EQ.mycol ) THEN
2329* LOCAL Z(LILOZ:LIHIZ,KCOL:KCOL+2)
2330 IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.
2331 $ ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
2332 itmp1 = kcol( ki ) + k - istart
2333 itmp1 = ( itmp1-1 )*ldz
2334 DO 500 j = liloz, lihiz
2335 sum = t1*z( j+itmp1 ) +
2336 $ t2*z( j+itmp1+ldz )
2337 z( j+itmp1 ) = z( j+itmp1 ) - sum
2338 z( j+itmp1+ldz ) = z( j+itmp1+ldz ) -
2339 $ sum*conjg( v2 )
2340 500 CONTINUE
2341 ELSE
2342 itmp1 = kcol( ki )
2343* IF WE ACTUALLY OWN COLUMN K
2344 IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
2345 CALL cgerv2d( contxt, lihiz-liloz+1, 1,
2346 $ work( izbuf+1 ), ldz,
2347 $ myrow, left )
2348 itmp1 = ( itmp1-1 )*ldz
2349 DO 510 j = liloz, lihiz
2350 sum = t1*work( izbuf+j ) +
2351 $ t2*z( j+itmp1 )
2352 work( izbuf+j ) = work( izbuf+j ) -
2353 $ sum
2354 z( j+itmp1 ) = z( j+itmp1 ) -
2355 $ sum*conjg( v2 )
2356 510 CONTINUE
2357 CALL cgesd2d( contxt, lihiz-liloz+1, 1,
2358 $ work( izbuf+1 ), ldz,
2359 $ myrow, left )
2360 END IF
2361 END IF
2362 ELSE
2363*
2364* NO WORK BUT NEED TO UPDATE ANYWAY????
2365*
2366 IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
2367 $ ( icurcol( ki ).EQ.right ) ) THEN
2368 itmp1 = kcol( ki )
2369 itmp1 = ( itmp1-1 )*ldz
2370 CALL cgesd2d( contxt, lihiz-liloz+1, 1,
2371 $ z( liloz+itmp1 ), ldz,
2372 $ myrow, right )
2373 CALL cgerv2d( contxt, lihiz-liloz+1, 1,
2374 $ z( liloz+itmp1 ), ldz,
2375 $ myrow, right )
2376 END IF
2377 END IF
2378 END IF
2379 END IF
2380 520 CONTINUE
2381*
2382* Adjust local information for this bulge
2383*
2384 IF( nprow.EQ.1 ) THEN
2385 krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1
2386 kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1
2387 END IF
2388 IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.
2389 $ ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )
2390 $ THEN
2391 krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1
2392 END IF
2393 IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.
2394 $ ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )
2395 $ THEN
2396 kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1
2397 END IF
2398 IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.
2399 $ ( ( myrow.EQ.icurrow( ki ) ) .OR. ( down.EQ.
2400 $ icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN
2401 CALL infog1l( k2( ki )+1, hbl, nprow, myrow, iafirst,
2402 $ krow( ki ), itmp2 )
2403 END IF
2404 IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.
2405 $ ( ( myrow.EQ.icurrow( ki ) ) .OR. ( up.EQ.
2406 $ icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN
2407 kp2row( ki ) = numroc( k2( ki )+3, hbl, myrow,
2408 $ iafirst, nprow )
2409 END IF
2410 IF( npcol.EQ.1 ) THEN
2411 kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1
2412 kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1
2413 END IF
2414 IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.
2415 $ ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )
2416 $ THEN
2417 kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1
2418 END IF
2419 IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.
2420 $ ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )
2421 $ THEN
2422 kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1
2423 END IF
2424 IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.
2425 $ ( ( mycol.EQ.icurcol( ki ) ) .OR. ( right.EQ.
2426 $ icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN
2427 CALL infog1l( k2( ki )+1, hbl, npcol, mycol, jafirst,
2428 $ kcol( ki ), itmp2 )
2429 END IF
2430 IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.
2431 $ ( ( mycol.EQ.icurcol( ki ) ) .OR. ( left.EQ.
2432 $ icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN
2433 kp2col( ki ) = numroc( k2( ki )+3, hbl, mycol,
2434 $ jafirst, npcol )
2435 END IF
2436 k1( ki ) = k2( ki ) + 1
2437 istop = min( k1( ki )+rotn-mod( k1( ki ), rotn ), i-2 )
2438 istop = min( istop, k1( ki )+hbl-3-
2439 $ mod( k1( ki )-1, hbl ) )
2440 istop = min( istop, i2-2 )
2441 istop = max( istop, k1( ki ) )
2442 IF( ( mod( k1( ki )-1, hbl ).EQ.hbl-2 ) .AND.
2443 $ ( istop.LT.min( i-2, i2-2 ) ) ) THEN
2444 istop = istop + 1
2445 END IF
2446 k2( ki ) = istop
2447 IF( k1( ki ).LE.istop ) THEN
2448 IF( ( mod( k1( ki )-1, hbl ).EQ.hbl-2 ) .AND.
2449 $ ( i-k1( ki ).GT.1 ) ) THEN
2450*
2451* Next step switches rows & cols
2452*
2453 icurrow( ki ) = mod( icurrow( ki )+1, nprow )
2454 icurcol( ki ) = mod( icurcol( ki )+1, npcol )
2455 END IF
2456 END IF
2457 530 CONTINUE
2458*
2459 IF( k2( ibulge ).LE.i-1 )
2460 $ GO TO 40
2461 END IF
2462*
2463 540 CONTINUE
2464*
2465* Failure to converge in remaining number of iterations
2466*
2467 info = i
2468 RETURN
2469*
2470 550 CONTINUE
2471*
2472 IF( l.EQ.i ) THEN
2473*
2474* H(I,I-1) is negligible: one eigenvalue has converged.
2475*
2476 CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow,
2477 $ icol, itmp1, itmp2 )
2478 IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
2479 w( i ) = a( ( icol-1 )*lda+irow )
2480 ELSE
2481 w( i ) = zero
2482 END IF
2483 ELSE IF( l.EQ.i-1 ) THEN
2484*
2485* H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
2486*
2487 CALL pclacp3( 2, i-1, a, desca, s1, 2*iblk, -1, -1, 0 )
2488 CALL clanv2( s1( 1, 1 ), s1( 1, 2 ), s1( 2, 1 ), s1( 2, 2 ),
2489 $ w( i-1 ), w( i ), cs, sn )
2490 CALL pclacp3( 2, i-1, a, desca, s1, 2*iblk, 0, 0, 1 )
2491*
2492 IF( node.NE.0 ) THEN
2493* Erase the eigenvalues other eigenvalues
2494 w( i-1 ) = zero
2495 w( i ) = zero
2496 END IF
2497*
2498 IF( wantt ) THEN
2499*
2500* Apply the transformation to A.
2501*
2502 IF( i2.GT.i ) THEN
2503 CALL pcrot( i2-i, a, i-1, i+1, desca, n, a, i, i+1,
2504 $ desca, n, cs, sn )
2505 END IF
2506 CALL pcrot( i-i1-1, a, i1, i-1, desca, 1, a, i1, i, desca,
2507 $ 1, cs, conjg( sn ) )
2508 END IF
2509 IF( wantz ) THEN
2510*
2511* Apply the transformation to Z.
2512*
2513 CALL pcrot( nz, z, iloz, i-1, descz, 1, z, iloz, i, descz,
2514 $ 1, cs, conjg( sn ) )
2515 END IF
2516*
2517 ELSE
2518*
2519* Find the eigenvalues in H(L:I,L:I), L < I-1
2520*
2521 jblk = i - l + 1
2522 IF( jblk.LE.2*iblk ) THEN
2523 CALL pclacp3( i-l+1, l, a, desca, s1, 2*iblk, 0, 0, 0 )
2524 CALL clahqr2( .false., .false., jblk, 1, jblk, s1, 2*iblk,
2525 $ w( l ), 1, jblk, z, ldz, ierr )
2526 IF( node.NE.0 ) THEN
2527*
2528* Erase the eigenvalues
2529*
2530 DO 560 k = l, i
2531 w( k ) = zero
2532 560 CONTINUE
2533 END IF
2534 END IF
2535 END IF
2536*
2537* Decrement number of remaining iterations, and return to start of
2538* the main loop with new value of I.
2539*
2540 itn = itn - its
2541 i = l - 1
2542 GO TO 10
2543*
2544 570 CONTINUE
2545 CALL cgsum2d( contxt, 'All', ' ', n, 1, w, n, -1, -1 )
2546 RETURN
2547*
2548* END OF PCLAHQR
2549*
2550 END
subroutine clahqr2(wantt, wantz, n, ilo, ihi, h, ldh, w, iloz, ihiz, z, ldz, info)
Definition clahqr2.f:3
subroutine clamsh(s, lds, nbulge, jblk, h, ldh, n, ulp)
Definition clamsh.f:2
subroutine clanv2(a, b, c, d, rt1, rt2, cs, sn)
Definition clanv2.f:2
subroutine claref(type, a, lda, wantz, z, ldz, block, irow1, icol1, istart, istop, itmp1, itmp2, liloz, lihiz, vecs, v2, v3, t1, t2, t3)
Definition claref.f:4
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pclaconsb(a, desca, i, l, m, h44, h33, h43h34, buf, lwork)
Definition pclaconsb.f:3
subroutine pclacp3(m, i, a, desca, b, ldb, ii, jj, rev)
Definition pclacp3.f:2
subroutine pclahqr(wantt, wantz, n, ilo, ihi, a, desca, w, iloz, ihiz, z, descz, work, lwork, iwork, ilwork, info)
Definition pclahqr.f:4
subroutine pclasmsub(a, desca, i, l, k, smlnum, buf, lwork)
Definition pclasmsub.f:2
subroutine pclawil(ii, jj, m, a, desca, h44, h33, h43h34, v)
Definition pclawil.f:2
subroutine pslabad(ictxt, small, large)
Definition pslabad.f:2
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2