SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pslaqr5.f
Go to the documentation of this file.
1 SUBROUTINE pslaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
2 $ SR, SI, H, DESCH, ILOZ, IHIZ, Z, DESCZ, WORK,
3 $ LWORK, IWORK, LIWORK )
4*
5* Contribution from the Department of Computing Science and HPC2N,
6* Umea University, Sweden
7*
8* -- ScaLAPACK auxiliary routine (version 2.0.2) --
9* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10* May 1 2012
11*
12 IMPLICIT NONE
13*
14* .. Scalar Arguments ..
15 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, N, NSHFTS,
16 $ LWORK, LIWORK
17 LOGICAL WANTT, WANTZ
18* ..
19* .. Array Arguments ..
20 INTEGER DESCH( * ), DESCZ( * ), IWORK( * )
21 REAL H( * ), SI( * ), SR( * ), Z( * ), WORK( * )
22* ..
23*
24* Purpose
25* =======
26*
27* This auxiliary subroutine called by PSLAQR0 performs a
28* single small-bulge multi-shift QR sweep by chasing separated
29* groups of bulges along the main block diagonal of H.
30*
31* WANTT (global input) logical scalar
32* WANTT = .TRUE. if the quasi-triangular Schur factor
33* is being computed. WANTT is set to .FALSE. otherwise.
34*
35* WANTZ (global input) logical scalar
36* WANTZ = .TRUE. if the orthogonal Schur factor is being
37* computed. WANTZ is set to .FALSE. otherwise.
38*
39* KACC22 (global input) integer with value 0, 1, or 2.
40* Specifies the computation mode of far-from-diagonal
41* orthogonal updates.
42* = 1: PSLAQR5 accumulates reflections and uses matrix-matrix
43* multiply to update the far-from-diagonal matrix entries.
44* = 2: PSLAQR5 accumulates reflections, uses matrix-matrix
45* multiply to update the far-from-diagonal matrix entries,
46* and takes advantage of 2-by-2 block structure during
47* matrix multiplies.
48*
49* N (global input) integer scalar
50* N is the order of the Hessenberg matrix H upon which this
51* subroutine operates.
52*
53* KTOP (global input) integer scalar
54* KBOT (global input) integer scalar
55* These are the first and last rows and columns of an
56* isolated diagonal block upon which the QR sweep is to be
57* applied. It is assumed without a check that
58* either KTOP = 1 or H(KTOP,KTOP-1) = 0
59* and
60* either KBOT = N or H(KBOT+1,KBOT) = 0.
61*
62* NSHFTS (global input) integer scalar
63* NSHFTS gives the number of simultaneous shifts. NSHFTS
64* must be positive and even.
65*
66* SR (global input) REAL array of size (NSHFTS)
67* SI (global input) REAL array of size (NSHFTS)
68* SR contains the real parts and SI contains the imaginary
69* parts of the NSHFTS shifts of origin that define the
70* multi-shift QR sweep.
71*
72* H (local input/output) REAL array of size
73* (DESCH(LLD_),*)
74* On input H contains a Hessenberg matrix. On output a
75* multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
76* to the isolated diagonal block in rows and columns KTOP
77* through KBOT.
78*
79* DESCH (global and local input) INTEGER array of dimension DLEN_.
80* The array descriptor for the distributed matrix H.
81*
82* ILOZ (global input) INTEGER
83* IHIZ (global input) INTEGER
84* Specify the rows of Z to which transformations must be
85* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
86*
87* Z (local input/output) REAL array of size
88* (DESCZ(LLD_),*)
89* If WANTZ = .TRUE., then the QR Sweep orthogonal
90* similarity transformation is accumulated into
91* Z(ILOZ:IHIZ,ILO:IHI) from the right.
92* If WANTZ = .FALSE., then Z is unreferenced.
93*
94* DESCZ (global and local input) INTEGER array of dimension DLEN_.
95* The array descriptor for the distributed matrix Z.
96*
97* WORK (local workspace) REAL array, dimension(DWORK)
98*
99* LWORK (local input) INTEGER
100* The length of the workspace array WORK.
101*
102* IWORK (local workspace) INTEGER array, dimension (LIWORK)
103*
104* LIWORK (local input) INTEGER
105* The length of the workspace array IWORK.
106*
107* ================================================================
108* Based on contributions by
109* Robert Granat, Department of Computing Science and HPC2N,
110* University of Umea, Sweden.
111*
112* ============================================================
113* References:
114* K. Braman, R. Byers, and R. Mathias,
115* The Multi-Shift QR Algorithm Part I: Maintaining Well Focused
116* Shifts, and Level 3 Performance.
117* SIAM J. Matrix Anal. Appl., 23(4):929--947, 2002.
118*
119* R. Granat, B. Kagstrom, and D. Kressner,
120* A Novel Parallel QR Algorithm for Hybrid Distributed Momory HPC
121* Systems.
122* SIAM J. Sci. Comput., 32(4):2345--2378, 2010.
123*
124* ============================================================
125* .. Parameters ..
126 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
127 $ LLD_, MB_, M_, NB_, N_, RSRC_
128 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
129 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
130 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
131 REAL ZERO, ONE
132 PARAMETER ( ZERO = 0.0e0, one = 1.0e0 )
133 INTEGER NTINY
134 parameter( ntiny = 11 )
135* ..
136* .. Local Scalars ..
137 REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
138 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
139 $ ulp, tau, elem, stamp, ddum, orth
140 INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
141 $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
142 $ m, m22, mbot, mend, mstart, mtop, nbmps, ndcol,
143 $ ns, nu, lldh, lldz, lldu, lldv, lldw, lldwh,
144 $ info, ictxt, nprow, npcol, nb, iroffh, itop,
145 $ nwin, myrow, mycol, lns, numwin, lkacc22,
146 $ lchain, win, idonejob, ipnext, anmwin, lenrbuf,
147 $ lencbuf, ichoff, lrsrc, lcsrc, lktop, lkbot,
148 $ ii, jj, swin, ewin, lnwin, dim, llktop, llkbot,
149 $ ipv, ipu, iph, ipw, ku, kwh, kwv, nve, lks,
150 $ idum, nho, dir, winid, indx, iloc, jloc, rsrc1,
151 $ csrc1, rsrc2, csrc2, rsrc3, csrc3, rsrc4, ipuu,
152 $ csrc4, lrows, lcols, indxs, ks, jloc1, iloc1,
153 $ lktop1, lktop2, wchunk, numchunk, oddeven,
154 $ chunknum, dim1, dim4, ipw3, hrows, zrows,
155 $ hcols, ipw1, ipw2, rsrc, east, jloc4, iloc4,
156 $ west, csrc, south, norht, indxe, north,
157 $ ihh, ipiw, lkbot1, nprocs, liroffh,
158 $ winfin, rws3, cls3, indx2, hrows2,
159 $ zrows2, hcols2, mnrbuf,
160 $ mxrbuf, mncbuf, mxcbuf, lwkopt
161 LOGICAL BLK22, BMP22, INTRO, DONEJOB, ODDNPROW,
162 $ ODDNPCOL, LQUERY, BCDONE
163 CHARACTER JBCMPZ*2, JOB
164* ..
165* .. External Functions ..
166 LOGICAL LSAME
167 INTEGER PILAENVX, ICEIL, INDXG2P, INDXG2L, NUMROC
168 REAL SLAMCH, SLANGE
169 EXTERNAL slamch, pilaenvx, iceil, indxg2p, indxg2l,
170 $ numroc, lsame, slange
171* ..
172* .. Intrinsic Functions ..
173 INTRINSIC abs, float, max, min, mod
174* ..
175* .. Local Arrays ..
176 REAL VT( 3 )
177* ..
178* .. External Subroutines ..
179 EXTERNAL sgemm, slabad, slamov, slaqr1, slarfg, slaset,
180 $ strmm, slaqr6
181* ..
182* .. Executable Statements ..
183*
184 info = 0
185 ictxt = desch( ctxt_ )
186 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
187 nprocs = nprow*npcol
188 lldh = desch( lld_ )
189 lldz = descz( lld_ )
190 nb = desch( mb_ )
191 iroffh = mod( ktop - 1, nb )
192 lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
193*
194* If there are no shifts, then there is nothing to do.
195*
196 IF( .NOT. lquery .AND. nshfts.LT.2 )
197 $ RETURN
198*
199* If the active block is empty or 1-by-1, then there
200* is nothing to do.
201*
202 IF( .NOT. lquery .AND. ktop.GE.kbot )
203 $ RETURN
204*
205* Shuffle shifts into pairs of real shifts and pairs of
206* complex conjugate shifts assuming complex conjugate
207* shifts are already adjacent to one another.
208*
209 IF( .NOT. lquery ) THEN
210 DO 10 i = 1, nshfts - 2, 2
211 IF( si( i ).NE.-si( i+1 ) ) THEN
212*
213 swap = sr( i )
214 sr( i ) = sr( i+1 )
215 sr( i+1 ) = sr( i+2 )
216 sr( i+2 ) = swap
217*
218 swap = si( i )
219 si( i ) = si( i+1 )
220 si( i+1 ) = si( i+2 )
221 si( i+2 ) = swap
222 END IF
223 10 CONTINUE
224 END IF
225*
226* NSHFTS is supposed to be even, but if is odd,
227* then simply reduce it by one. The shuffle above
228* ensures that the dropped shift is real and that
229* the remaining shifts are paired.
230*
231 ns = nshfts - mod( nshfts, 2 )
232*
233* Extract the size of the computational window.
234*
235 nwin = pilaenvx( ictxt, 19, 'PSLAQR5', jbcmpz, n, nb, nb, nb )
236 nwin = min( nwin, kbot-ktop+1 )
237*
238* Adjust number of simultaneous shifts if it exceeds the limit
239* set by the number of diagonal blocks in the active submatrix
240* H(KTOP:KBOT,KTOP:KBOT).
241*
242 ns = max( 2, min( ns, iceil( kbot-ktop+1, nb )*nwin/3 ) )
243 ns = ns - mod( ns, 2 )
244
245*
246* Decide the number of simultaneous computational windows
247* from the number of shifts - each window should contain up to
248* (NWIN / 3) shifts. Also compute the number of shifts per
249* window and make sure that number is even.
250*
251 lns = min( max( 2, nwin / 3 ), max( 2, ns / min(nprow,npcol) ) )
252 lns = lns - mod( lns, 2 )
253 numwin = max( 1, min( iceil( ns, lns ),
254 $ iceil( kbot-ktop+1, nb ) - 1 ) )
255 IF( nprow.NE.npcol ) THEN
256 numwin = min( numwin, min(nprow,npcol) )
257 lns = min( lns, max( 2, ns / min(nprow,npcol) ) )
258 lns = lns - mod( lns, 2 )
259 END IF
260*
261* Machine constants for deflation.
262*
263 safmin = slamch( 'SAFE MINIMUM' )
264 safmax = one / safmin
265 CALL slabad( safmin, safmax )
266 ulp = slamch( 'PRECISION' )
267 smlnum = safmin*( float( n ) / ulp )
268*
269* Use accumulated reflections to update far-from-diagonal
270* entries on a local level?
271*
272 IF( lns.LT.14 ) THEN
273 lkacc22 = 1
274 ELSE
275 lkacc22 = 2
276 END IF
277*
278* If so, exploit the 2-by-2 block structure?
279* ( Usually it is not efficient to exploit the 2-by-2 structure
280* because the block size is too small. )
281*
282 blk22 = ( lns.GT.2 ) .AND. ( kacc22.EQ.2 )
283*
284* Clear trash.
285*
286 IF( .NOT. lquery .AND. ktop+2.LE.kbot )
287 $ CALL pselset( h, ktop+2, ktop, desch, zero )
288*
289* NBMPS = number of 2-shift bulges in each chain
290*
291 nbmps = lns / 2
292*
293* KDU = width of slab
294*
295 kdu = 6*nbmps - 3
296*
297* LCHAIN = length of each chain
298*
299 lchain = 3 * nbmps + 1
300*
301* Check if workspace query.
302*
303 IF( lquery ) THEN
304 hrows = numroc( n, nb, myrow, desch(rsrc_), nprow )
305 hcols = numroc( n, nb, mycol, desch(csrc_), npcol )
306 lwkopt = (5+2*numwin)*nb**2 + 2*hrows*nb + hcols*nb +
307 $ max( hrows*nb, hcols*nb )
308 work(1) = float(lwkopt)
309 iwork(1) = 5*numwin
310 RETURN
311 END IF
312*
313* Check if KTOP and KBOT are valid.
314*
315 IF( ktop.LT.1 .OR. kbot.GT.n ) RETURN
316*
317* Create and chase NUMWIN chains of NBMPS bulges.
318*
319* Set up window introduction.
320*
321 anmwin = 0
322 intro = .true.
323 ipiw = 1
324*
325* Main loop:
326* While-loop over the computational windows which is
327* terminated when all windows have been introduced,
328* chased down to the bottom of the considered submatrix
329* and chased off.
330*
331 20 CONTINUE
332*
333* Set up next window as long as we have less than the prescribed
334* number of windows. Each window is described an integer quadruple:
335* 1. Local value of KTOP (below denoted by LKTOP)
336* 2. Local value of KBOT (below denoted by LKBOT)
337* 3-4. Processor indices (LRSRC,LCSRC) associated with the window.
338* (5. Mark that decides if a window is fully processed or not)
339*
340* Notice - the next window is only introduced if the first block
341* in the active submatrix does not contain any other windows.
342*
343 IF( anmwin.GT.0 ) THEN
344 lktop = iwork( 1+(anmwin-1)*5 )
345 ELSE
346 lktop = ktop
347 END IF
348 IF( intro .AND. (anmwin.EQ.0 .OR. lktop.GT.iceil(ktop,nb)*nb) )
349 $ THEN
350 anmwin = anmwin + 1
351*
352* Structure of IWORK:
353* IWORK( 1+(WIN-1)*5 ): start position
354* IWORK( 2+(WIN-1)*5 ): stop position
355* IWORK( 3+(WIN-1)*5 ): processor row id
356* IWORK( 4+(WIN-1)*5 ): processor col id
357* IWORK( 5+(WIN-1)*5 ): window status (0, 1, or 2)
358*
359 iwork( 1+(anmwin-1)*5 ) = ktop
360 iwork( 2+(anmwin-1)*5 ) = ktop +
361 $ min( nwin,nb-iroffh,kbot-ktop+1 ) - 1
362 iwork( 3+(anmwin-1)*5 ) = indxg2p( iwork(1+(anmwin-1)*5), nb,
363 $ myrow, desch(rsrc_), nprow )
364 iwork( 4+(anmwin-1)*5 ) = indxg2p( iwork(2+(anmwin-1)*5), nb,
365 $ mycol, desch(csrc_), npcol )
366 iwork( 5+(anmwin-1)*5 ) = 0
367 ipiw = 6+(anmwin-1)*5
368 IF( anmwin.EQ.numwin ) intro = .false.
369 END IF
370*
371* Do-loop over the number of windows.
372*
373 ipnext = 1
374 donejob = .false.
375 idonejob = 0
376 lenrbuf = 0
377 lencbuf = 0
378 ichoff = 0
379 DO 40 win = 1, anmwin
380*
381* Extract window information to simplify the rest.
382*
383 lrsrc = iwork( 3+(win-1)*5 )
384 lcsrc = iwork( 4+(win-1)*5 )
385 lktop = iwork( 1+(win-1)*5 )
386 lkbot = iwork( 2+(win-1)*5 )
387 lnwin = lkbot - lktop + 1
388*
389* Check if anything to do for current window, i.e., if the local
390* chain of bulges has reached the next block border etc.
391*
392 IF( iwork(5+(win-1)*5).LT.2 .AND. lnwin.GT.1 .AND.
393 $ (lnwin.GT.lchain .OR. lkbot.EQ.kbot ) ) THEN
394 liroffh = mod(lktop-1,nb)
395 swin = lktop-liroffh
396 ewin = min(kbot,lktop-liroffh+nb-1)
397 dim = ewin-swin+1
398 IF( dim.LE.ntiny .AND. .NOT.lkbot.EQ.kbot ) THEN
399 iwork( 5+(win-1)*5 ) = 2
400 GO TO 45
401 END IF
402 idonejob = 1
403 IF( iwork(5+(win-1)*5).EQ.0 ) THEN
404 iwork(5+(win-1)*5) = 1
405 END IF
406*
407* Let the process that owns the corresponding window do the
408* local bulge chase.
409*
410 IF( myrow.EQ.lrsrc .AND. mycol.EQ.lcsrc ) THEN
411*
412* Set the kind of job to do in SLAQR6:
413* 1. JOB = 'I': Introduce and chase bulges in window WIN
414* 2. JOB = 'C': Chase bulges from top to bottom of window WIN
415* 3. JOB = 'O': Chase bulges off window WIN
416* 4. JOB = 'A': All of 1-3 above is done - this will for
417* example happen for very small active
418* submatrices (like 2-by-2)
419*
420 llkbot = llktop + lnwin - 1
421 IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN
422 job = 'All steps'
423 ichoff = 1
424 ELSEIF( lktop.EQ.ktop ) THEN
425 job = 'Introduce and chase'
426 ELSEIF( lkbot.EQ.kbot ) THEN
427 job = 'Off-chase bulges'
428 ichoff = 1
429 ELSE
430 job = 'Chase bulges'
431 END IF
432*
433* Copy submatrix of H corresponding to window WIN into
434* workspace and set out additional workspace for storing
435* orthogonal transformations. This submatrix must be at
436* least (NTINY+1)-by-(NTINY+1) to fit into SLAQR6 - if not,
437* abort and go for cross border bulge chasing with this
438* particular window.
439*
440 ii = indxg2l( swin, nb, myrow, desch(rsrc_), nprow )
441 jj = indxg2l( swin, nb, mycol, desch(csrc_), npcol )
442 llktop = 1 + liroffh
443 llkbot = llktop + lnwin - 1
444*
445 ipu = ipnext
446 iph = ipu + lnwin**2
447 ipuu = iph + max(ntiny+1,dim)**2
448 ipv = ipuu + max(ntiny+1,dim)**2
449 ipnext = iph
450*
451 IF( lsame( job, 'A' ) .OR. lsame( job, 'O' ) .AND.
452 $ dim.LT.ntiny+1 ) THEN
453 CALL slaset( 'All', ntiny+1, ntiny+1, zero, one,
454 $ work(iph), ntiny+1 )
455 END IF
456 CALL slamov( 'Upper', dim, dim, h(ii+(jj-1)*lldh), lldh,
457 $ work(iph), max(ntiny+1,dim) )
458 CALL scopy( dim-1, h(ii+(jj-1)*lldh+1), lldh+1,
459 $ work(iph+1), max(ntiny+1,dim)+1 )
460 IF( lsame( job, 'C' ) .OR. lsame( job, 'O') ) THEN
461 CALL scopy( dim-2, h(ii+(jj-1)*lldh+2), lldh+1,
462 $ work(iph+2), max(ntiny+1,dim)+1 )
463 CALL scopy( dim-3, h(ii+(jj-1)*lldh+3), lldh+1,
464 $ work(iph+3), max(ntiny+1,dim)+1 )
465 CALL slaset( 'Lower', dim-4, dim-4, zero,
466 $ zero, work(iph+4), max(ntiny+1,dim) )
467 ELSE
468 CALL slaset( 'Lower', dim-2, dim-2, zero,
469 $ zero, work(iph+2), max(ntiny+1,dim) )
470 END IF
471*
472 ku = max(ntiny+1,dim) - kdu + 1
473 kwh = kdu + 1
474 nho = ( max(ntiny+1,dim)-kdu+1-4 ) - ( kdu+1 ) + 1
475 kwv = kdu + 4
476 nve = max(ntiny+1,dim) - kdu - kwv + 1
477 CALL slaset( 'All', max(ntiny+1,dim),
478 $ max(ntiny+1,dim), zero, one, work(ipuu),
479 $ max(ntiny+1,dim) )
480*
481* Small-bulge multi-shift QR sweep.
482*
483 lks = max( 1, ns - win*lns + 1 )
484 CALL slaqr6( job, wantt, .true., lkacc22,
485 $ max(ntiny+1,dim), llktop, llkbot, lns, sr( lks ),
486 $ si( lks ), work(iph), max(ntiny+1,dim), llktop,
487 $ llkbot, work(ipuu), max(ntiny+1,dim), work(ipu),
488 $ 3, work( iph+ku-1 ),
489 $ max(ntiny+1,dim), nve, work( iph+kwv-1 ),
490 $ max(ntiny+1,dim), nho, work( iph-1+ku+(kwh-1)*
491 $ max(ntiny+1,dim) ), max(ntiny+1,dim) )
492*
493* Copy submatrix of H back.
494*
495 CALL slamov( 'Upper', dim, dim, work(iph),
496 $ max(ntiny+1,dim), h(ii+(jj-1)*lldh), lldh )
497 CALL scopy( dim-1, work(iph+1), max(ntiny+1,dim)+1,
498 $ h(ii+(jj-1)*lldh+1), lldh+1 )
499 IF( lsame( job, 'I' ) .OR. lsame( job, 'C' ) ) THEN
500 CALL scopy( dim-2, work(iph+2), dim+1,
501 $ h(ii+(jj-1)*lldh+2), lldh+1 )
502 CALL scopy( dim-3, work(iph+3), dim+1,
503 $ h(ii+(jj-1)*lldh+3), lldh+1 )
504 ELSE
505 CALL slaset( 'Lower', dim-2, dim-2, zero,
506 $ zero, h(ii+(jj-1)*lldh+2), lldh )
507 END IF
508*
509* Copy actual submatrix of U to the correct place
510* of the buffer.
511*
512 CALL slamov( 'All', lnwin, lnwin,
513 $ work(ipuu+(max(ntiny+1,dim)*liroffh)+liroffh),
514 $ max(ntiny+1,dim), work(ipu), lnwin )
515 END IF
516*
517* In case the local submatrix was smaller than
518* (NTINY+1)-by-(NTINY+1) we go here and proceed.
519*
520 45 CONTINUE
521 ELSE
522 iwork( 5+(win-1)*5 ) = 2
523 END IF
524*
525* Increment counter for buffers of orthogonal transformations.
526*
527 IF( myrow.EQ.lrsrc .OR. mycol.EQ.lcsrc ) THEN
528 IF( idonejob.EQ.1 .AND. iwork(5+(win-1)*5).LT.2 ) THEN
529 IF( myrow.EQ.lrsrc ) lenrbuf = lenrbuf + lnwin*lnwin
530 IF( mycol.EQ.lcsrc ) lencbuf = lencbuf + lnwin*lnwin
531 END IF
532 END IF
533 40 CONTINUE
534*
535* Did some work in the above do-loop?
536*
537 CALL igsum2d( ictxt, 'All', '1-Tree', 1, 1, idonejob, 1, -1, -1 )
538 donejob = idonejob.GT.0
539*
540* Chased off bulges from first window?
541*
542 IF( nprocs.GT.1 )
543 $ CALL igamx2d( ictxt, 'All', '1-Tree', 1, 1, ichoff, 1, -1,
544 $ -1, -1, -1, -1 )
545*
546* If work was done in the do-loop over local windows, perform
547* updates, otherwise go for cross border bulge chasing and updates.
548*
549 IF( donejob ) THEN
550*
551* Broadcast orthogonal transformations.
552*
553 49 CONTINUE
554 IF( lenrbuf.GT.0 .OR. lencbuf.GT.0 ) THEN
555 DO 50 dir = 1, 2
556 bcdone = .false.
557 DO 60 win = 1, anmwin
558 IF( ( lenrbuf.EQ.0 .AND. lencbuf.EQ.0 ) .OR.
559 $ bcdone ) GO TO 62
560 lrsrc = iwork( 3+(win-1)*5 )
561 lcsrc = iwork( 4+(win-1)*5 )
562 IF( myrow.EQ.lrsrc .AND. mycol.EQ.lcsrc ) THEN
563 IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.
564 $ npcol.GT.1 ) THEN
565 CALL sgebs2d( ictxt, 'Row', '1-Tree', lenrbuf,
566 $ 1, work, lenrbuf )
567 ELSEIF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.
568 $ nprow.GT.1 ) THEN
569 CALL sgebs2d( ictxt, 'Col', '1-Tree', lencbuf,
570 $ 1, work, lencbuf )
571 END IF
572 IF( lenrbuf.GT.0 )
573 $ CALL slamov( 'All', lenrbuf, 1, work, lenrbuf,
574 $ work(1+lenrbuf), lencbuf )
575 bcdone = .true.
576 ELSEIF( myrow.EQ.lrsrc .AND. dir.EQ.1 ) THEN
577 IF( lenrbuf.GT.0 .AND. npcol.GT.1 ) THEN
578 CALL sgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,
579 $ 1, work, lenrbuf, lrsrc, lcsrc )
580 bcdone = .true.
581 END IF
582 ELSEIF( mycol.EQ.lcsrc .AND. dir.EQ.2 ) THEN
583 IF( lencbuf.GT.0 .AND. nprow.GT.1 ) THEN
584 CALL sgebr2d( ictxt, 'Col', '1-Tree', lencbuf,
585 $ 1, work(1+lenrbuf), lencbuf, lrsrc, lcsrc )
586 bcdone = .true.
587 END IF
588 END IF
589 62 CONTINUE
590 60 CONTINUE
591 50 CONTINUE
592 END IF
593*
594* Compute updates - make sure to skip windows that was skipped
595* regarding local bulge chasing.
596*
597 DO 65 dir = 1, 2
598 winid = 0
599 IF( dir.EQ.1 ) THEN
600 ipnext = 1
601 ELSE
602 ipnext = 1 + lenrbuf
603 END IF
604 DO 70 win = 1, anmwin
605 IF( iwork( 5+(win-1)*5 ).EQ.2 ) GO TO 75
606 lrsrc = iwork( 3+(win-1)*5 )
607 lcsrc = iwork( 4+(win-1)*5 )
608 lktop = iwork( 1+(win-1)*5 )
609 lkbot = iwork( 2+(win-1)*5 )
610 lnwin = lkbot - lktop + 1
611 IF( (myrow.EQ.lrsrc.AND.lenrbuf.GT.0.AND.dir.EQ.1) .OR.
612 $ (mycol.EQ.lcsrc.AND.lencbuf.GT.0.AND.dir.EQ.2 ) )
613 $ THEN
614*
615* Set up workspaces.
616*
617 ipu = ipnext
618 ipnext = ipu + lnwin*lnwin
619 ipw = 1 + lenrbuf + lencbuf
620 liroffh = mod(lktop-1,nb)
621 winid = winid + 1
622*
623* Recompute JOB to see if block structure of U could
624* possibly be exploited or not.
625*
626 IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN
627 job = 'All steps'
628 ELSEIF( lktop.EQ.ktop ) THEN
629 job = 'Introduce and chase'
630 ELSEIF( lkbot.EQ.kbot ) THEN
631 job = 'Off-chase bulges'
632 ELSE
633 job = 'Chase bulges'
634 END IF
635 END IF
636*
637* Use U to update far-from-diagonal entries in H.
638* If required, use U to update Z as well.
639*
640 IF( .NOT. blk22 .OR. .NOT. lsame(job,'C')
641 $ .OR. lns.LE.2 ) THEN
642*
643 IF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.
644 $ mycol.EQ.lcsrc ) THEN
645 IF( wantt ) THEN
646 DO 80 indx = 1, lktop-liroffh-1, nb
647 CALL infog2l( indx, lktop, desch, nprow,
648 $ npcol, myrow, mycol, iloc, jloc, rsrc1,
649 $ csrc1 )
650 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
651 lrows = min( nb, lktop-indx )
652 CALL sgemm('No transpose', 'No transpose',
653 $ lrows, lnwin, lnwin, one,
654 $ h((jloc-1)*lldh+iloc), lldh,
655 $ work( ipu ), lnwin, zero,
656 $ work(ipw),
657 $ lrows )
658 CALL slamov( 'All', lrows, lnwin,
659 $ work(ipw), lrows,
660 $ h((jloc-1)*lldh+iloc), lldh )
661 END IF
662 80 CONTINUE
663 END IF
664 IF( wantz ) THEN
665 DO 90 indx = 1, n, nb
666 CALL infog2l( indx, lktop, descz, nprow,
667 $ npcol, myrow, mycol, iloc, jloc, rsrc1,
668 $ csrc1 )
669 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
670 lrows = min(nb,n-indx+1)
671 CALL sgemm( 'No transpose',
672 $ 'No transpose', lrows, lnwin, lnwin,
673 $ one, z((jloc-1)*lldz+iloc), lldz,
674 $ work( ipu ), lnwin, zero,
675 $ work(ipw), lrows )
676 CALL slamov( 'All', lrows, lnwin,
677 $ work(ipw), lrows,
678 $ z((jloc-1)*lldz+iloc), lldz )
679 END IF
680 90 CONTINUE
681 END IF
682 END IF
683*
684* Update the rows of H affected by the bulge-chase.
685*
686 IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.
687 $ myrow.EQ.lrsrc ) THEN
688 IF( wantt ) THEN
689 IF( iceil(lkbot,nb).EQ.iceil(kbot,nb) ) THEN
690 lcols = min(iceil(kbot,nb)*nb,n) - kbot
691 ELSE
692 lcols = 0
693 END IF
694 IF( lcols.GT.0 ) THEN
695 indx = kbot + 1
696 CALL infog2l( lktop, indx, desch, nprow,
697 $ npcol, myrow, mycol, iloc, jloc,
698 $ rsrc1, csrc1 )
699 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
700 CALL sgemm( 'Transpose', 'No Transpose',
701 $ lnwin, lcols, lnwin, one, work(ipu),
702 $ lnwin, h((jloc-1)*lldh+iloc), lldh,
703 $ zero, work(ipw), lnwin )
704 CALL slamov( 'All', lnwin, lcols,
705 $ work(ipw), lnwin,
706 $ h((jloc-1)*lldh+iloc), lldh )
707 END IF
708 END IF
709 93 CONTINUE
710 indxs = iceil(lkbot,nb)*nb + 1
711 DO 95 indx = indxs, n, nb
712 CALL infog2l( lktop, indx,
713 $ desch, nprow, npcol, myrow, mycol,
714 $ iloc, jloc, rsrc1, csrc1 )
715 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
716 lcols = min( nb, n-indx+1 )
717 CALL sgemm( 'Transpose', 'No Transpose',
718 $ lnwin, lcols, lnwin, one, work(ipu),
719 $ lnwin, h((jloc-1)*lldh+iloc), lldh,
720 $ zero, work(ipw),
721 $ lnwin )
722 CALL slamov( 'All', lnwin, lcols,
723 $ work(ipw), lnwin,
724 $ h((jloc-1)*lldh+iloc), lldh )
725 END IF
726 95 CONTINUE
727 END IF
728 END IF
729 ELSE
730 ks = lnwin-lns/2*3
731*
732* The LNWIN-by-LNWIN matrix U containing the accumulated
733* orthogonal transformations has the following structure:
734*
735* [ U11 U12 ]
736* U = [ ],
737* [ U21 U22 ]
738*
739* where U21 is KS-by-KS upper triangular and U12 is
740* (LNWIN-KS)-by-(LNWIN-KS) lower triangular.
741* Here, KS = LNS.
742*
743* Update the columns of H and Z affected by the bulge
744* chasing.
745*
746* Compute H2*U21 + H1*U11 in workspace.
747*
748 IF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.
749 $ mycol.EQ.lcsrc ) THEN
750 IF( wantt ) THEN
751 DO 100 indx = 1, lktop-liroffh-1, nb
752 CALL infog2l( indx, lktop, desch, nprow,
753 $ npcol, myrow, mycol, iloc, jloc, rsrc1,
754 $ csrc1 )
755 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
756 jloc1 = indxg2l( lktop+lnwin-ks, nb,
757 $ mycol, desch( csrc_ ), npcol )
758 lrows = min( nb, lktop-indx )
759 CALL slamov( 'All', lrows, ks,
760 $ h((jloc1-1)*lldh+iloc ), lldh,
761 $ work(ipw), lrows )
762 CALL strmm( 'Right', 'Upper',
763 $ 'No transpose','Non-unit', lrows,
764 $ ks, one, work( ipu+lnwin-ks ), lnwin,
765 $ work(ipw), lrows )
766 CALL sgemm('No transpose', 'No transpose',
767 $ lrows, ks, lnwin-ks, one,
768 $ h((jloc-1)*lldh+iloc), lldh,
769 $ work( ipu ), lnwin, one, work(ipw),
770 $ lrows )
771*
772* Compute H1*U12 + H2*U22 in workspace.
773*
774 CALL slamov( 'All', lrows, lnwin-ks,
775 $ h((jloc-1)*lldh+iloc), lldh,
776 $ work( ipw+ks*lrows ), lrows )
777 CALL strmm( 'Right', 'Lower',
778 $ 'No transpose', 'Non-Unit',
779 $ lrows, lnwin-ks, one,
780 $ work( ipu+lnwin*ks ), lnwin,
781 $ work( ipw+ks*lrows ), lrows )
782 CALL sgemm('No transpose', 'No transpose',
783 $ lrows, lnwin-ks, ks, one,
784 $ h((jloc1-1)*lldh+iloc), lldh,
785 $ work( ipu+lnwin*ks+lnwin-ks ), lnwin,
786 $ one, work( ipw+ks*lrows ), lrows )
787*
788* Copy workspace to H.
789*
790 CALL slamov( 'All', lrows, lnwin,
791 $ work(ipw), lrows,
792 $ h((jloc-1)*lldh+iloc), lldh )
793 END IF
794 100 CONTINUE
795 END IF
796*
797 IF( wantz ) THEN
798*
799* Compute Z2*U21 + Z1*U11 in workspace.
800*
801 DO 110 indx = 1, n, nb
802 CALL infog2l( indx, lktop, descz, nprow,
803 $ npcol, myrow, mycol, iloc, jloc, rsrc1,
804 $ csrc1 )
805 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
806 jloc1 = indxg2l( lktop+lnwin-ks, nb,
807 $ mycol, descz( csrc_ ), npcol )
808 lrows = min(nb,n-indx+1)
809 CALL slamov( 'All', lrows, ks,
810 $ z((jloc1-1)*lldz+iloc ), lldz,
811 $ work(ipw), lrows )
812 CALL strmm( 'Right', 'Upper',
813 $ 'No transpose', 'Non-unit',
814 $ lrows, ks, one, work( ipu+lnwin-ks ),
815 $ lnwin, work(ipw), lrows )
816 CALL sgemm( 'No transpose',
817 $ 'No transpose', lrows, ks, lnwin-ks,
818 $ one, z((jloc-1)*lldz+iloc), lldz,
819 $ work( ipu ), lnwin, one, work(ipw),
820 $ lrows )
821*
822* Compute Z1*U12 + Z2*U22 in workspace.
823*
824 CALL slamov( 'All', lrows, lnwin-ks,
825 $ z((jloc-1)*lldz+iloc), lldz,
826 $ work( ipw+ks*lrows ), lrows)
827 CALL strmm( 'Right', 'Lower',
828 $ 'No transpose', 'Non-unit',
829 $ lrows, lnwin-ks, one,
830 $ work( ipu+lnwin*ks ), lnwin,
831 $ work( ipw+ks*lrows ), lrows )
832 CALL sgemm( 'No transpose',
833 $ 'No transpose', lrows, lnwin-ks, ks,
834 $ one, z((jloc1-1)*lldz+iloc), lldz,
835 $ work( ipu+lnwin*ks+lnwin-ks ), lnwin,
836 $ one, work( ipw+ks*lrows ),
837 $ lrows )
838*
839* Copy workspace to Z.
840*
841 CALL slamov( 'All', lrows, lnwin,
842 $ work(ipw), lrows,
843 $ z((jloc-1)*lldz+iloc), lldz )
844 END IF
845 110 CONTINUE
846 END IF
847 END IF
848*
849 IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.
850 $ myrow.EQ.lrsrc ) THEN
851 IF( wantt ) THEN
852 indxs = iceil(lkbot,nb)*nb + 1
853 DO 120 indx = indxs, n, nb
854 CALL infog2l( lktop, indx,
855 $ desch, nprow, npcol, myrow, mycol, iloc,
856 $ jloc, rsrc1, csrc1 )
857 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
858*
859* Compute U21**T*H2 + U11**T*H1 in workspace.
860*
861 iloc1 = indxg2l( lktop+lnwin-ks, nb,
862 $ myrow, desch( rsrc_ ), nprow )
863 lcols = min( nb, n-indx+1 )
864 CALL slamov( 'All', ks, lcols,
865 $ h((jloc-1)*lldh+iloc1), lldh,
866 $ work(ipw), lnwin )
867 CALL strmm( 'Left', 'Upper', 'Transpose',
868 $ 'Non-unit', ks, lcols, one,
869 $ work( ipu+lnwin-ks ), lnwin,
870 $ work(ipw), lnwin )
871 CALL sgemm( 'Transpose', 'No transpose',
872 $ ks, lcols, lnwin-ks, one, work(ipu),
873 $ lnwin, h((jloc-1)*lldh+iloc), lldh,
874 $ one, work(ipw), lnwin )
875*
876* Compute U12**T*H1 + U22**T*H2 in workspace.
877*
878 CALL slamov( 'All', lnwin-ks, lcols,
879 $ h((jloc-1)*lldh+iloc), lldh,
880 $ work( ipw+ks ), lnwin )
881 CALL strmm( 'Left', 'Lower', 'Transpose',
882 $ 'Non-unit', lnwin-ks, lcols, one,
883 $ work( ipu+lnwin*ks ), lnwin,
884 $ work( ipw+ks ), lnwin )
885 CALL sgemm( 'Transpose', 'No Transpose',
886 $ lnwin-ks, lcols, ks, one,
887 $ work( ipu+lnwin*ks+lnwin-ks ), lnwin,
888 $ h((jloc-1)*lldh+iloc1), lldh,
889 $ one, work( ipw+ks ), lnwin )
890*
891* Copy workspace to H.
892*
893 CALL slamov( 'All', lnwin, lcols,
894 $ work(ipw), lnwin,
895 $ h((jloc-1)*lldh+iloc), lldh )
896 END IF
897 120 CONTINUE
898 END IF
899 END IF
900 END IF
901*
902* Update position information about current window.
903*
904 IF( dir.EQ.2 ) THEN
905 IF( lkbot.EQ.kbot ) THEN
906 lktop = kbot+1
907 lkbot = kbot+1
908 iwork( 1+(win-1)*5 ) = lktop
909 iwork( 2+(win-1)*5 ) = lkbot
910 iwork( 5+(win-1)*5 ) = 2
911 ELSE
912 lktop = min( lktop + lnwin - lchain,
913 $ iceil( lktop, nb )*nb - lchain + 1,
914 $ kbot )
915 iwork( 1+(win-1)*5 ) = lktop
916 lkbot = min( lkbot + lnwin - lchain,
917 $ iceil( lkbot, nb )*nb, kbot )
918 iwork( 2+(win-1)*5 ) = lkbot
919 lnwin = lkbot-lktop+1
920 IF( lnwin.EQ.lchain ) iwork(5+(win-1)*5) = 2
921 END IF
922 END IF
923 75 CONTINUE
924 70 CONTINUE
925 65 CONTINUE
926*
927* If bulges were chasen off from first window, the window is
928* removed.
929*
930 IF( ichoff.GT.0 ) THEN
931 DO 128 win = 2, anmwin
932 iwork( 1+(win-2)*5 ) = iwork( 1+(win-1)*5 )
933 iwork( 2+(win-2)*5 ) = iwork( 2+(win-1)*5 )
934 iwork( 3+(win-2)*5 ) = iwork( 3+(win-1)*5 )
935 iwork( 4+(win-2)*5 ) = iwork( 4+(win-1)*5 )
936 iwork( 5+(win-2)*5 ) = iwork( 5+(win-1)*5 )
937 128 CONTINUE
938 anmwin = anmwin - 1
939 ipiw = 6+(anmwin-1)*5
940 END IF
941*
942* If we have no more windows, return.
943*
944 IF( anmwin.LT.1 ) RETURN
945*
946 ELSE
947*
948* Set up windows such that as many bulges as possible can be
949* moved over the border to the next block. Make sure that the
950* cross border window is at least (NTINY+1)-by-(NTINY+1), unless
951* we are chasing off the bulges from the last window. This is
952* accomplished by setting the bottom index LKBOT such that the
953* local window has the correct size.
954*
955* If LKBOT then becomes larger than KBOT, the endpoint of the whole
956* global submatrix, or LKTOP from a window located already residing
957* at the other side of the border, this is taken care of by some
958* dirty tricks.
959*
960 DO 130 win = 1, anmwin
961 lktop1 = iwork( 1+(win-1)*5 )
962 lkbot = iwork( 2+(win-1)*5 )
963 lnwin = max( 6, min( lkbot - lktop1 + 1, lchain ) )
964 lkbot1 = max( min( kbot, iceil(lktop1,nb)*nb+lchain),
965 $ min( kbot, min( lktop1+2*lnwin-1,
966 $ (iceil(lktop1,nb)+1)*nb ) ) )
967 iwork( 2+(win-1)*5 ) = lkbot1
968 130 CONTINUE
969 ichoff = 0
970*
971* Keep a record over what windows that were moved over the borders
972* such that we can delay some windows due to lack of space on the
973* other side of the border; we do not want to leave any of the
974* bulges behind...
975*
976* IWORK( 5+(WIN-1)*5 ) = 0: window WIN has not been processed
977* IWORK( 5+(WIN-1)*5 ) = 1: window WIN is being processed (need to
978* know for updates)
979* IWORK( 5+(WIN-1)*5 ) = 2: window WIN has been fully processed
980*
981* So, start by marking all windows as not processed.
982*
983 DO 135 win = 1, anmwin
984 iwork( 5+(win-1)*5 ) = 0
985 135 CONTINUE
986*
987* Do the cross border bulge-chase as follows: Start from the
988* first window (the one that is closest to be chased off the
989* diagonal of H) and take the odd windows first followed by the
990* even ones. To not get into hang-problems on processor meshes
991* with at least one odd dimension, the windows will in such a case
992* be processed in chunks of {the minimum odd process dimension}-1
993* windows to avoid overlapping processor scopes in forming the
994* cross border computational windows and the cross border update
995* regions.
996*
997 wchunk = max( 1, min( anmwin, nprow-1, npcol-1 ) )
998 numchunk = iceil( anmwin, wchunk )
999*
1000* Based on the computed chunk of windows, start working with
1001* crossborder bulge-chasing. Repeat this as long as there is
1002* still work left to do (137 is a kind of do-while statement).
1003*
1004 137 CONTINUE
1005*
1006* Zero out LENRBUF and LENCBUF each time we restart this loop.
1007*
1008 lenrbuf = 0
1009 lencbuf = 0
1010*
1011 DO 140 oddeven = 1, min( 2, anmwin )
1012 DO 150 chunknum = 1, numchunk
1013 ipnext = 1
1014 DO 160 win = oddeven+(chunknum-1)*wchunk,
1015 $ min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2
1016*
1017* Get position and size of the WIN:th active window and
1018* make sure that we skip the cross border bulge for this
1019* window if the window is not shared between several data
1020* layout blocks (and processors).
1021*
1022* Also, delay windows that do not have sufficient size of
1023* the other side of the border. Moreover, make sure to skip
1024* windows that was already processed in the last round of
1025* the do-while loop (137).
1026*
1027 IF( iwork( 5+(win-1)*5 ).EQ.2 ) GO TO 165
1028 lktop = iwork( 1+(win-1)*5 )
1029 lkbot = iwork( 2+(win-1)*5 )
1030 IF( win.GT.1 ) THEN
1031 lktop2 = iwork( 1+(win-2)*5 )
1032 ELSE
1033 lktop2 = kbot+1
1034 END IF
1035 IF( iceil(lktop,nb).EQ.iceil(lkbot,nb) .OR.
1036 $ lkbot.GE.lktop2 ) GO TO 165
1037 lnwin = lkbot - lktop + 1
1038 IF( lnwin.LE.ntiny .AND. lkbot.NE.kbot .AND.
1039 $ .NOT. mod(lkbot,nb).EQ.0 ) GO TO 165
1040*
1041* If window is going to be processed, mark it as processed.
1042*
1043 iwork( 5+(win-1)*5 ) = 1
1044*
1045* Extract processors for current cross border window,
1046* as below:
1047*
1048* 1 | 2
1049* --+--
1050* 3 | 4
1051*
1052 rsrc1 = iwork( 3+(win-1)*5 )
1053 csrc1 = iwork( 4+(win-1)*5 )
1054 rsrc2 = rsrc1
1055 csrc2 = mod( csrc1+1, npcol )
1056 rsrc3 = mod( rsrc1+1, nprow )
1057 csrc3 = csrc1
1058 rsrc4 = mod( rsrc1+1, nprow )
1059 csrc4 = mod( csrc1+1, npcol )
1060*
1061* Form group of four processors for cross border window.
1062*
1063 IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.
1064 $ ( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) .OR.
1065 $ ( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) .OR.
1066 $ ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) ) THEN
1067*
1068* Compute the upper and lower parts of the active
1069* window.
1070*
1071 dim1 = nb - mod(lktop-1,nb)
1072 dim4 = lnwin - dim1
1073*
1074* Temporarily compute a new value of the size of the
1075* computational window that is larger than or equal to
1076* NTINY+1; call the *real* value DIM.
1077*
1078 dim = lnwin
1079 lnwin = max(ntiny+1,lnwin)
1080*
1081* Divide workspace.
1082*
1083 ipu = ipnext
1084 iph = ipu + dim**2
1085 ipuu = iph + lnwin**2
1086 ipv = ipuu + lnwin**2
1087 ipnext = iph
1088 IF( dim.LT.lnwin ) THEN
1089 CALL slaset( 'All', lnwin, lnwin, zero,
1090 $ one, work( iph ), lnwin )
1091 ELSE
1092 CALL slaset( 'All', dim, dim, zero,
1093 $ zero, work( iph ), lnwin )
1094 END IF
1095*
1096* Form the active window.
1097*
1098 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN
1099 iloc = indxg2l( lktop, nb, myrow,
1100 $ desch( rsrc_ ), nprow )
1101 jloc = indxg2l( lktop, nb, mycol,
1102 $ desch( csrc_ ), npcol )
1103 CALL slamov( 'All', dim1, dim1,
1104 $ h((jloc-1)*lldh+iloc), lldh, work(iph),
1105 $ lnwin )
1106 IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 ) THEN
1107* Proc#1 <==> Proc#4
1108 CALL sgesd2d( ictxt, dim1, dim1,
1109 $ work(iph), lnwin, rsrc4, csrc4 )
1110 CALL sgerv2d( ictxt, dim4, dim4,
1111 $ work(iph+dim1*lnwin+dim1),
1112 $ lnwin, rsrc4, csrc4 )
1113 END IF
1114 END IF
1115 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN
1116 iloc = indxg2l( lktop+dim1, nb, myrow,
1117 $ desch( rsrc_ ), nprow )
1118 jloc = indxg2l( lktop+dim1, nb, mycol,
1119 $ desch( csrc_ ), npcol )
1120 CALL slamov( 'All', dim4, dim4,
1121 $ h((jloc-1)*lldh+iloc), lldh,
1122 $ work(iph+dim1*lnwin+dim1),
1123 $ lnwin )
1124 IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 ) THEN
1125* Proc#4 <==> Proc#1
1126 CALL sgesd2d( ictxt, dim4, dim4,
1127 $ work(iph+dim1*lnwin+dim1),
1128 $ lnwin, rsrc1, csrc1 )
1129 CALL sgerv2d( ictxt, dim1, dim1,
1130 $ work(iph), lnwin, rsrc1, csrc1 )
1131 END IF
1132 END IF
1133 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN
1134 iloc = indxg2l( lktop, nb, myrow,
1135 $ desch( rsrc_ ), nprow )
1136 jloc = indxg2l( lktop+dim1, nb, mycol,
1137 $ desch( csrc_ ), npcol )
1138 CALL slamov( 'All', dim1, dim4,
1139 $ h((jloc-1)*lldh+iloc), lldh,
1140 $ work(iph+dim1*lnwin), lnwin )
1141 IF( rsrc2.NE.rsrc1 .OR. csrc2.NE.csrc1 ) THEN
1142* Proc#2 ==> Proc#1
1143 CALL sgesd2d( ictxt, dim1, dim4,
1144 $ work(iph+dim1*lnwin),
1145 $ lnwin, rsrc1, csrc1 )
1146 END IF
1147 END IF
1148 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN
1149 IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 ) THEN
1150* Proc#2 ==> Proc#4
1151 CALL sgesd2d( ictxt, dim1, dim4,
1152 $ work(iph+dim1*lnwin),
1153 $ lnwin, rsrc4, csrc4 )
1154 END IF
1155 END IF
1156 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN
1157 iloc = indxg2l( lktop+dim1, nb, myrow,
1158 $ desch( rsrc_ ), nprow )
1159 jloc = indxg2l( lktop+dim1-1, nb, mycol,
1160 $ desch( csrc_ ), npcol )
1161 CALL slamov( 'All', 1, 1,
1162 $ h((jloc-1)*lldh+iloc), lldh,
1163 $ work(iph+(dim1-1)*lnwin+dim1),
1164 $ lnwin )
1165 IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 ) THEN
1166* Proc#3 ==> Proc#1
1167 CALL sgesd2d( ictxt, 1, 1,
1168 $ work(iph+(dim1-1)*lnwin+dim1),
1169 $ lnwin, rsrc1, csrc1 )
1170 END IF
1171 END IF
1172 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN
1173 IF( rsrc3.NE.rsrc4 .OR. csrc3.NE.csrc4 ) THEN
1174* Proc#3 ==> Proc#4
1175 CALL sgesd2d( ictxt, 1, 1,
1176 $ work(iph+(dim1-1)*lnwin+dim1),
1177 $ lnwin, rsrc4, csrc4 )
1178 END IF
1179 END IF
1180 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN
1181 IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 ) THEN
1182* Proc#1 <== Proc#2
1183 CALL sgerv2d( ictxt, dim1, dim4,
1184 $ work(iph+dim1*lnwin),
1185 $ lnwin, rsrc2, csrc2 )
1186 END IF
1187 IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 ) THEN
1188* Proc#1 <== Proc#3
1189 CALL sgerv2d( ictxt, 1, 1,
1190 $ work(iph+(dim1-1)*lnwin+dim1),
1191 $ lnwin, rsrc3, csrc3 )
1192 END IF
1193 END IF
1194 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN
1195 IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 ) THEN
1196* Proc#4 <== Proc#2
1197 CALL sgerv2d( ictxt, dim1, dim4,
1198 $ work(iph+dim1*lnwin),
1199 $ lnwin, rsrc2, csrc2 )
1200 END IF
1201 IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 ) THEN
1202* Proc#4 <== Proc#3
1203 CALL sgerv2d( ictxt, 1, 1,
1204 $ work(iph+(dim1-1)*lnwin+dim1),
1205 $ lnwin, rsrc3, csrc3 )
1206 END IF
1207 END IF
1208*
1209* Prepare for call to SLAQR6 - it could happen that no
1210* bulges where introduced in the pre-cross border step
1211* since the chain was too long to fit in the top-left
1212* part of the cross border window. In such a case, the
1213* bulges are introduced here instead. It could also
1214* happen that the bottom-right part is too small to hold
1215* the whole chain -- in such a case, the bulges are
1216* chasen off immediately, as well.
1217*
1218 IF( (myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1) .OR.
1219 $ (myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4) ) THEN
1220 IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot .AND.
1221 $ (dim1.LE.lchain .OR. dim1.LE.ntiny ) ) THEN
1222 job = 'All steps'
1223 ichoff = 1
1224 ELSEIF( lktop.EQ.ktop .AND.
1225 $ ( dim1.LE.lchain .OR. dim1.LE.ntiny ) ) THEN
1226 job = 'Introduce and chase'
1227 ELSEIF( lkbot.EQ.kbot ) THEN
1228 job = 'Off-chase bulges'
1229 ichoff = 1
1230 ELSE
1231 job = 'Chase bulges'
1232 END IF
1233 ku = lnwin - kdu + 1
1234 kwh = kdu + 1
1235 nho = ( lnwin-kdu+1-4 ) - ( kdu+1 ) + 1
1236 kwv = kdu + 4
1237 nve = lnwin - kdu - kwv + 1
1238 CALL slaset( 'All', lnwin, lnwin,
1239 $ zero, one, work(ipuu), lnwin )
1240*
1241* Small-bulge multi-shift QR sweep.
1242*
1243 lks = max(1, ns - win*lns + 1)
1244 CALL slaqr6( job, wantt, .true., lkacc22, lnwin,
1245 $ 1, dim, lns, sr( lks ), si( lks ),
1246 $ work(iph), lnwin, 1, dim,
1247 $ work(ipuu), lnwin, work(ipu), 3,
1248 $ work( iph+ku-1 ), lnwin, nve,
1249 $ work( iph+kwv-1 ), lnwin, nho,
1250 $ work( iph-1+ku+(kwh-1)*lnwin ), lnwin )
1251*
1252* Copy local submatrices of H back to global matrix.
1253*
1254 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN
1255 iloc = indxg2l( lktop, nb, myrow,
1256 $ desch( rsrc_ ), nprow )
1257 jloc = indxg2l( lktop, nb, mycol,
1258 $ desch( csrc_ ), npcol )
1259 CALL slamov( 'All', dim1, dim1, work(iph),
1260 $ lnwin, h((jloc-1)*lldh+iloc),
1261 $ lldh )
1262 END IF
1263 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN
1264 iloc = indxg2l( lktop+dim1, nb, myrow,
1265 $ desch( rsrc_ ), nprow )
1266 jloc = indxg2l( lktop+dim1, nb, mycol,
1267 $ desch( csrc_ ), npcol )
1268 CALL slamov( 'All', dim4, dim4,
1269 $ work(iph+dim1*lnwin+dim1),
1270 $ lnwin, h((jloc-1)*lldh+iloc), lldh )
1271 END IF
1272*
1273* Copy actual submatrix of U to the correct place of
1274* the buffer.
1275*
1276 CALL slamov( 'All', dim, dim,
1277 $ work(ipuu), lnwin, work(ipu), dim )
1278 END IF
1279*
1280* Return data to process 2 and 3.
1281*
1282 rws3 = min(3,dim4)
1283 cls3 = min(3,dim1)
1284 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN
1285 IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 ) THEN
1286* Proc#1 ==> Proc#3
1287 CALL sgesd2d( ictxt, rws3, cls3,
1288 $ work( iph+(dim1-cls3)*lnwin+dim1 ),
1289 $ lnwin, rsrc3, csrc3 )
1290 END IF
1291 END IF
1292 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN
1293 IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 ) THEN
1294* Proc#4 ==> Proc#2
1295 CALL sgesd2d( ictxt, dim1, dim4,
1296 $ work( iph+dim1*lnwin),
1297 $ lnwin, rsrc2, csrc2 )
1298 END IF
1299 END IF
1300 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN
1301 iloc = indxg2l( lktop, nb, myrow,
1302 $ desch( rsrc_ ), nprow )
1303 jloc = indxg2l( lktop+dim1, nb, mycol,
1304 $ desch( csrc_ ), npcol )
1305 IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 ) THEN
1306* Proc#2 <== Proc#4
1307 CALL sgerv2d( ictxt, dim1, dim4,
1308 $ work(iph+dim1*lnwin),
1309 $ lnwin, rsrc4, csrc4 )
1310 END IF
1311 CALL slamov( 'All', dim1, dim4,
1312 $ work( iph+dim1*lnwin ), lnwin,
1313 $ h((jloc-1)*lldh+iloc), lldh )
1314 END IF
1315 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN
1316 iloc = indxg2l( lktop+dim1, nb, myrow,
1317 $ desch( rsrc_ ), nprow )
1318 jloc = indxg2l( lktop+dim1-cls3, nb, mycol,
1319 $ desch( csrc_ ), npcol )
1320 IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 ) THEN
1321* Proc#3 <== Proc#1
1322 CALL sgerv2d( ictxt, rws3, cls3,
1323 $ work( iph+(dim1-cls3)*lnwin+dim1 ),
1324 $ lnwin, rsrc1, csrc1 )
1325 END IF
1326 CALL slamov( 'Upper', rws3, cls3,
1327 $ work( iph+(dim1-cls3)*lnwin+dim1 ),
1328 $ lnwin, h((jloc-1)*lldh+iloc),
1329 $ lldh )
1330 IF( rws3.GT.1 .AND. cls3.GT.1 ) THEN
1331 elem = work( iph+(dim1-cls3)*lnwin+dim1+1 )
1332 IF( elem.NE.zero ) THEN
1333 CALL slamov( 'Lower', rws3-1, cls3-1,
1334 $ work( iph+(dim1-cls3)*lnwin+dim1+1 ),
1335 $ lnwin, h((jloc-1)*lldh+iloc+1), lldh )
1336 END IF
1337 END IF
1338 END IF
1339*
1340* Restore correct value of LNWIN.
1341*
1342 lnwin = dim
1343*
1344 END IF
1345*
1346* Increment counter for buffers of orthogonal
1347* transformations.
1348*
1349 IF( myrow.EQ.rsrc1 .OR. mycol.EQ.csrc1 .OR.
1350 $ myrow.EQ.rsrc4 .OR. mycol.EQ.csrc4 ) THEN
1351 IF( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc4 )
1352 $ lenrbuf = lenrbuf + lnwin*lnwin
1353 IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 )
1354 $ lencbuf = lencbuf + lnwin*lnwin
1355 END IF
1356*
1357* If no cross border bulge chasing was performed for the
1358* current WIN:th window, the processor jump to this point
1359* and consider the next one.
1360*
1361 165 CONTINUE
1362*
1363 160 CONTINUE
1364*
1365* Broadcast orthogonal transformations -- this will only happen
1366* if the buffer associated with the orthogonal transformations
1367* is not empty (controlled by LENRBUF, for row-wise
1368* broadcasts, and LENCBUF, for column-wise broadcasts).
1369*
1370 DO 170 dir = 1, 2
1371 bcdone = .false.
1372 DO 180 win = oddeven+(chunknum-1)*wchunk,
1373 $ min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2
1374 IF( ( lenrbuf.EQ.0 .AND. lencbuf.EQ.0 ) .OR.
1375 $ bcdone ) GO TO 185
1376 rsrc1 = iwork( 3+(win-1)*5 )
1377 csrc1 = iwork( 4+(win-1)*5 )
1378 rsrc4 = mod( rsrc1+1, nprow )
1379 csrc4 = mod( csrc1+1, npcol )
1380 IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.
1381 $ ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) ) THEN
1382 IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.
1383 $ npcol.GT.1 .AND. nprocs.GT.2 ) THEN
1384 IF( myrow.EQ.rsrc1 .OR. ( myrow.EQ.rsrc4
1385 $ .AND. rsrc4.NE.rsrc1 ) ) THEN
1386 CALL sgebs2d( ictxt, 'Row', '1-Tree',
1387 $ lenrbuf, 1, work, lenrbuf )
1388 ELSE
1389 CALL sgebr2d( ictxt, 'Row', '1-Tree',
1390 $ lenrbuf, 1, work, lenrbuf, rsrc1,
1391 $ csrc1 )
1392 END IF
1393 ELSEIF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.
1394 $ nprow.GT.1 .AND. nprocs.GT.2 ) THEN
1395 IF( mycol.EQ.csrc1 .OR. ( mycol.EQ.csrc4
1396 $ .AND. csrc4.NE.csrc1 ) ) THEN
1397 CALL sgebs2d( ictxt, 'Col', '1-Tree',
1398 $ lencbuf, 1, work, lencbuf )
1399 ELSE
1400 CALL sgebr2d( ictxt, 'Col', '1-Tree',
1401 $ lencbuf, 1, work(1+lenrbuf), lencbuf,
1402 $ rsrc1, csrc1 )
1403 END IF
1404 END IF
1405 IF( lenrbuf.GT.0 .AND. ( mycol.EQ.csrc1 .OR.
1406 $ ( mycol.EQ.csrc4 .AND. csrc4.NE.csrc1 ) ) )
1407 $ CALL slamov( 'All', lenrbuf, 1, work, lenrbuf,
1408 $ work(1+lenrbuf), lencbuf )
1409 bcdone = .true.
1410 ELSEIF( myrow.EQ.rsrc1 .AND. dir.EQ.1 ) THEN
1411 IF( lenrbuf.GT.0 .AND. npcol.GT.1 )
1412 $ CALL sgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,
1413 $ 1, work, lenrbuf, rsrc1, csrc1 )
1414 bcdone = .true.
1415 ELSEIF( mycol.EQ.csrc1 .AND. dir.EQ.2 ) THEN
1416 IF( lencbuf.GT.0 .AND. nprow.GT.1 )
1417 $ CALL sgebr2d( ictxt, 'Col', '1-Tree', lencbuf,
1418 $ 1, work(1+lenrbuf), lencbuf, rsrc1, csrc1 )
1419 bcdone = .true.
1420 ELSEIF( myrow.EQ.rsrc4 .AND. dir.EQ.1 ) THEN
1421 IF( lenrbuf.GT.0 .AND. npcol.GT.1 )
1422 $ CALL sgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,
1423 $ 1, work, lenrbuf, rsrc4, csrc4 )
1424 bcdone = .true.
1425 ELSEIF( mycol.EQ.csrc4 .AND. dir.EQ.2 ) THEN
1426 IF( lencbuf.GT.0 .AND. nprow.GT.1 )
1427 $ CALL sgebr2d( ictxt, 'Col', '1-Tree', lencbuf,
1428 $ 1, work(1+lenrbuf), lencbuf, rsrc4, csrc4 )
1429 bcdone = .true.
1430 END IF
1431 185 CONTINUE
1432 180 CONTINUE
1433 170 CONTINUE
1434*
1435* Prepare for computing cross border updates by exchanging
1436* data in cross border update regions in H and Z.
1437*
1438 DO 190 dir = 1, 2
1439 winid = 0
1440 ipw3 = 1
1441 DO 200 win = oddeven+(chunknum-1)*wchunk,
1442 $ min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2
1443 IF( iwork( 5+(win-1)*5 ).NE.1 ) GO TO 205
1444*
1445* Make sure this part of the code is only executed when
1446* there has been some work performed on the WIN:th
1447* window.
1448*
1449 lktop = iwork( 1+(win-1)*5 )
1450 lkbot = iwork( 2+(win-1)*5 )
1451*
1452* Extract processor indices associated with
1453* the current window.
1454*
1455 rsrc1 = iwork( 3+(win-1)*5 )
1456 csrc1 = iwork( 4+(win-1)*5 )
1457 rsrc4 = mod( rsrc1+1, nprow )
1458 csrc4 = mod( csrc1+1, npcol )
1459*
1460* Compute local number of rows and columns
1461* of H and Z to exchange.
1462*
1463 IF(((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)
1464 $ .OR.((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.
1465 $ dir.EQ.1)) THEN
1466 winid = winid + 1
1467 lnwin = lkbot - lktop + 1
1468 ipu = ipnext
1469 dim1 = nb - mod(lktop-1,nb)
1470 dim4 = lnwin - dim1
1471 ipnext = ipu + lnwin*lnwin
1472 IF( dir.EQ.2 ) THEN
1473 IF( wantz ) THEN
1474 zrows = numroc( n, nb, myrow, descz( rsrc_ ),
1475 $ nprow )
1476 ELSE
1477 zrows = 0
1478 END IF
1479 IF( wantt ) THEN
1480 hrows = numroc( lktop-1, nb, myrow,
1481 $ desch( rsrc_ ), nprow )
1482 ELSE
1483 hrows = 0
1484 END IF
1485 ELSE
1486 zrows = 0
1487 hrows = 0
1488 END IF
1489 IF( dir.EQ.1 ) THEN
1490 IF( wantt ) THEN
1491 hcols = numroc( n - (lktop+dim1-1), nb,
1492 $ mycol, csrc4, npcol )
1493 IF( mycol.EQ.csrc4 ) hcols = hcols - dim4
1494 ELSE
1495 hcols = 0
1496 END IF
1497 ELSE
1498 hcols = 0
1499 END IF
1500 ipw = max( 1 + lenrbuf + lencbuf, ipw3 )
1501 ipw1 = ipw + hrows * lnwin
1502 IF( wantz ) THEN
1503 ipw2 = ipw1 + lnwin * hcols
1504 ipw3 = ipw2 + zrows * lnwin
1505 ELSE
1506 ipw3 = ipw1 + lnwin * hcols
1507 END IF
1508 END IF
1509*
1510* Let each process row and column involved in the updates
1511* exchange data in H and Z with their neighbours.
1512*
1513 IF( dir.EQ.2 .AND. wantt .AND. lencbuf.GT.0 ) THEN
1514 IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 ) THEN
1515 DO 210 indx = 1, nprow
1516 IF( mycol.EQ.csrc1 ) THEN
1517 CALL infog2l( 1+(indx-1)*nb, lktop, desch,
1518 $ nprow, npcol, myrow, mycol, iloc,
1519 $ jloc1, rsrc, csrc1 )
1520 IF( myrow.EQ.rsrc ) THEN
1521 CALL slamov( 'All', hrows, dim1,
1522 $ h((jloc1-1)*lldh+iloc), lldh,
1523 $ work(ipw), hrows )
1524 IF( npcol.GT.1 ) THEN
1525 east = mod( mycol + 1, npcol )
1526 CALL sgesd2d( ictxt, hrows, dim1,
1527 $ work(ipw), hrows, rsrc, east )
1528 CALL sgerv2d( ictxt, hrows, dim4,
1529 $ work(ipw+hrows*dim1), hrows,
1530 $ rsrc, east )
1531 END IF
1532 END IF
1533 END IF
1534 IF( mycol.EQ.csrc4 ) THEN
1535 CALL infog2l( 1+(indx-1)*nb, lktop+dim1,
1536 $ desch, nprow, npcol, myrow, mycol,
1537 $ iloc, jloc4, rsrc, csrc4 )
1538 IF( myrow.EQ.rsrc ) THEN
1539 CALL slamov( 'All', hrows, dim4,
1540 $ h((jloc4-1)*lldh+iloc), lldh,
1541 $ work(ipw+hrows*dim1), hrows )
1542 IF( npcol.GT.1 ) THEN
1543 west = mod( mycol - 1 + npcol,
1544 $ npcol )
1545 CALL sgesd2d( ictxt, hrows, dim4,
1546 $ work(ipw+hrows*dim1), hrows,
1547 $ rsrc, west )
1548 CALL sgerv2d( ictxt, hrows, dim1,
1549 $ work(ipw), hrows, rsrc, west )
1550 END IF
1551 END IF
1552 END IF
1553 210 CONTINUE
1554 END IF
1555 END IF
1556*
1557 IF( dir.EQ.1 .AND. wantt .AND. lenrbuf.GT.0 ) THEN
1558 IF( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc4 ) THEN
1559 DO 220 indx = 1, npcol
1560 IF( myrow.EQ.rsrc1 ) THEN
1561 IF( indx.EQ.1 ) THEN
1562 IF( lkbot.LT.n ) THEN
1563 CALL infog2l( lktop, lkbot+1, desch,
1564 $ nprow, npcol, myrow, mycol,
1565 $ iloc1, jloc, rsrc1, csrc )
1566 ELSE
1567 csrc = -1
1568 END IF
1569 ELSEIF( mod(lkbot,nb).NE.0 ) THEN
1570 CALL infog2l( lktop,
1571 $ (iceil(lkbot,nb)+(indx-2))*nb+1,
1572 $ desch, nprow, npcol, myrow, mycol,
1573 $ iloc1, jloc, rsrc1, csrc )
1574 ELSE
1575 CALL infog2l( lktop,
1576 $ (iceil(lkbot,nb)+(indx-1))*nb+1,
1577 $ desch, nprow, npcol, myrow, mycol,
1578 $ iloc1, jloc, rsrc1, csrc )
1579 END IF
1580 IF( mycol.EQ.csrc ) THEN
1581 CALL slamov( 'All', dim1, hcols,
1582 $ h((jloc-1)*lldh+iloc1), lldh,
1583 $ work(ipw1), lnwin )
1584 IF( nprow.GT.1 ) THEN
1585 south = mod( myrow + 1, nprow )
1586 CALL sgesd2d( ictxt, dim1, hcols,
1587 $ work(ipw1), lnwin, south,
1588 $ csrc )
1589 CALL sgerv2d( ictxt, dim4, hcols,
1590 $ work(ipw1+dim1), lnwin, south,
1591 $ csrc )
1592 END IF
1593 END IF
1594 END IF
1595 IF( myrow.EQ.rsrc4 ) THEN
1596 IF( indx.EQ.1 ) THEN
1597 IF( lkbot.LT.n ) THEN
1598 CALL infog2l( lktop+dim1, lkbot+1,
1599 $ desch, nprow, npcol, myrow,
1600 $ mycol, iloc4, jloc, rsrc4,
1601 $ csrc )
1602 ELSE
1603 csrc = -1
1604 END IF
1605 ELSEIF( mod(lkbot,nb).NE.0 ) THEN
1606 CALL infog2l( lktop+dim1,
1607 $ (iceil(lkbot,nb)+(indx-2))*nb+1,
1608 $ desch, nprow, npcol, myrow, mycol,
1609 $ iloc4, jloc, rsrc4, csrc )
1610 ELSE
1611 CALL infog2l( lktop+dim1,
1612 $ (iceil(lkbot,nb)+(indx-1))*nb+1,
1613 $ desch, nprow, npcol, myrow, mycol,
1614 $ iloc4, jloc, rsrc4, csrc )
1615 END IF
1616 IF( mycol.EQ.csrc ) THEN
1617 CALL slamov( 'All', dim4, hcols,
1618 $ h((jloc-1)*lldh+iloc4), lldh,
1619 $ work(ipw1+dim1), lnwin )
1620 IF( nprow.GT.1 ) THEN
1621 north = mod( myrow - 1 + nprow,
1622 $ nprow )
1623 CALL sgesd2d( ictxt, dim4, hcols,
1624 $ work(ipw1+dim1), lnwin, north,
1625 $ csrc )
1626 CALL sgerv2d( ictxt, dim1, hcols,
1627 $ work(ipw1), lnwin, north,
1628 $ csrc )
1629 END IF
1630 END IF
1631 END IF
1632 220 CONTINUE
1633 END IF
1634 END IF
1635*
1636 IF( dir.EQ.2 .AND. wantz .AND. lencbuf.GT.0) THEN
1637 IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 ) THEN
1638 DO 230 indx = 1, nprow
1639 IF( mycol.EQ.csrc1 ) THEN
1640 CALL infog2l( 1+(indx-1)*nb, lktop,
1641 $ descz, nprow, npcol, myrow, mycol,
1642 $ iloc, jloc1, rsrc, csrc1 )
1643 IF( myrow.EQ.rsrc ) THEN
1644 CALL slamov( 'All', zrows, dim1,
1645 $ z((jloc1-1)*lldz+iloc), lldz,
1646 $ work(ipw2), zrows )
1647 IF( npcol.GT.1 ) THEN
1648 east = mod( mycol + 1, npcol )
1649 CALL sgesd2d( ictxt, zrows, dim1,
1650 $ work(ipw2), zrows, rsrc,
1651 $ east )
1652 CALL sgerv2d( ictxt, zrows, dim4,
1653 $ work(ipw2+zrows*dim1),
1654 $ zrows, rsrc, east )
1655 END IF
1656 END IF
1657 END IF
1658 IF( mycol.EQ.csrc4 ) THEN
1659 CALL infog2l( 1+(indx-1)*nb,
1660 $ lktop+dim1, descz, nprow, npcol,
1661 $ myrow, mycol, iloc, jloc4, rsrc,
1662 $ csrc4 )
1663 IF( myrow.EQ.rsrc ) THEN
1664 CALL slamov( 'All', zrows, dim4,
1665 $ z((jloc4-1)*lldz+iloc), lldz,
1666 $ work(ipw2+zrows*dim1), zrows )
1667 IF( npcol.GT.1 ) THEN
1668 west = mod( mycol - 1 + npcol,
1669 $ npcol )
1670 CALL sgesd2d( ictxt, zrows, dim4,
1671 $ work(ipw2+zrows*dim1),
1672 $ zrows, rsrc, west )
1673 CALL sgerv2d( ictxt, zrows, dim1,
1674 $ work(ipw2), zrows, rsrc,
1675 $ west )
1676 END IF
1677 END IF
1678 END IF
1679 230 CONTINUE
1680 END IF
1681 END IF
1682*
1683* If no exchanges was performed for the current window,
1684* all processors jump to this point and try the next
1685* one.
1686*
1687 205 CONTINUE
1688*
1689 200 CONTINUE
1690*
1691* Compute crossborder bulge-chase updates.
1692*
1693 winid = 0
1694 IF( dir.EQ.1 ) THEN
1695 ipnext = 1
1696 ELSE
1697 ipnext = 1 + lenrbuf
1698 END IF
1699 ipw3 = 1
1700 DO 240 win = oddeven+(chunknum-1)*wchunk,
1701 $ min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2
1702 IF( iwork( 5+(win-1)*5 ).NE.1 ) GO TO 245
1703*
1704* Only perform this part of the code if there was really
1705* some work performed on the WIN:th window.
1706*
1707 lktop = iwork( 1+(win-1)*5 )
1708 lkbot = iwork( 2+(win-1)*5 )
1709 lnwin = lkbot - lktop + 1
1710*
1711* Extract the processor indices associated with
1712* the current window.
1713*
1714 rsrc1 = iwork( 3+(win-1)*5 )
1715 csrc1 = iwork( 4+(win-1)*5 )
1716 rsrc4 = mod( rsrc1+1, nprow )
1717 csrc4 = mod( csrc1+1, npcol )
1718*
1719 IF(((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)
1720 $ .OR.((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.
1721 $ dir.EQ.1)) THEN
1722*
1723* Set up workspaces.
1724*
1725 winid = winid + 1
1726 lktop = iwork( 1+(win-1)*5 )
1727 lkbot = iwork( 2+(win-1)*5 )
1728 lnwin = lkbot - lktop + 1
1729 dim1 = nb - mod(lktop-1,nb)
1730 dim4 = lnwin - dim1
1731 ipu = ipnext + (winid-1)*lnwin*lnwin
1732 IF( dir.EQ.2 ) THEN
1733 IF( wantz ) THEN
1734 zrows = numroc( n, nb, myrow, descz( rsrc_ ),
1735 $ nprow )
1736 ELSE
1737 zrows = 0
1738 END IF
1739 IF( wantt ) THEN
1740 hrows = numroc( lktop-1, nb, myrow,
1741 $ desch( rsrc_ ), nprow )
1742 ELSE
1743 hrows = 0
1744 END IF
1745 ELSE
1746 zrows = 0
1747 hrows = 0
1748 END IF
1749 IF( dir.EQ.1 ) THEN
1750 IF( wantt ) THEN
1751 hcols = numroc( n - (lktop+dim1-1), nb,
1752 $ mycol, csrc4, npcol )
1753 IF( mycol.EQ.csrc4 ) hcols = hcols - dim4
1754 ELSE
1755 hcols = 0
1756 END IF
1757 ELSE
1758 hcols = 0
1759 END IF
1760*
1761* IPW = local copy of overlapping column block of H
1762* IPW1 = local copy of overlapping row block of H
1763* IPW2 = local copy of overlapping column block of Z
1764* IPW3 = workspace for right hand side of matrix
1765* multiplication
1766*
1767 ipw = max( 1 + lenrbuf + lencbuf, ipw3 )
1768 ipw1 = ipw + hrows * lnwin
1769 IF( wantz ) THEN
1770 ipw2 = ipw1 + lnwin * hcols
1771 ipw3 = ipw2 + zrows * lnwin
1772 ELSE
1773 ipw3 = ipw1 + lnwin * hcols
1774 END IF
1775*
1776* Recompute job to see if special structure of U
1777* could possibly be exploited.
1778*
1779 IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN
1780 job = 'All steps'
1781 ELSEIF( lktop.EQ.ktop .AND.
1782 $ ( dim1.LT.lchain+1 .OR. dim1.LE.ntiny ) )
1783 $ THEN
1784 job = 'Introduce and chase'
1785 ELSEIF( lkbot.EQ.kbot ) THEN
1786 job = 'Off-chase bulges'
1787 ELSE
1788 job = 'Chase bulges'
1789 END IF
1790 END IF
1791*
1792* Test if to exploit sparsity structure of
1793* orthogonal matrix U.
1794*
1795 ks = dim1+dim4-lns/2*3
1796 IF( .NOT. blk22 .OR. dim1.NE.ks .OR.
1797 $ dim4.NE.ks .OR. lsame(job,'I') .OR.
1798 $ lsame(job,'O') .OR. lns.LE.2 ) THEN
1799*
1800* Update the columns of H and Z.
1801*
1802 IF( dir.EQ.2 .AND. wantt .AND. lencbuf.GT.0 ) THEN
1803 DO 250 indx = 1, min(lktop-1,1+(nprow-1)*nb), nb
1804 IF( mycol.EQ.csrc1 ) THEN
1805 CALL infog2l( indx, lktop, desch, nprow,
1806 $ npcol, myrow, mycol, iloc, jloc,
1807 $ rsrc, csrc1 )
1808 IF( myrow.EQ.rsrc ) THEN
1809 CALL sgemm( 'No transpose',
1810 $ 'No transpose', hrows, dim1,
1811 $ lnwin, one, work( ipw ), hrows,
1812 $ work( ipu ), lnwin, zero,
1813 $ work(ipw3), hrows )
1814 CALL slamov( 'All', hrows, dim1,
1815 $ work(ipw3), hrows,
1816 $ h((jloc-1)*lldh+iloc), lldh )
1817 END IF
1818 END IF
1819 IF( mycol.EQ.csrc4 ) THEN
1820 CALL infog2l( indx, lktop+dim1, desch,
1821 $ nprow, npcol, myrow, mycol, iloc,
1822 $ jloc, rsrc, csrc4 )
1823 IF( myrow.EQ.rsrc ) THEN
1824 CALL sgemm( 'No transpose',
1825 $ 'No transpose', hrows, dim4,
1826 $ lnwin, one, work( ipw ), hrows,
1827 $ work( ipu+lnwin*dim1 ), lnwin,
1828 $ zero, work(ipw3), hrows )
1829 CALL slamov( 'All', hrows, dim4,
1830 $ work(ipw3), hrows,
1831 $ h((jloc-1)*lldh+iloc), lldh )
1832 END IF
1833 END IF
1834 250 CONTINUE
1835 END IF
1836*
1837 IF( dir.EQ.2 .AND. wantz .AND. lencbuf.GT.0 ) THEN
1838 DO 260 indx = 1, min(n,1+(nprow-1)*nb), nb
1839 IF( mycol.EQ.csrc1 ) THEN
1840 CALL infog2l( indx, lktop, descz, nprow,
1841 $ npcol, myrow, mycol, iloc, jloc,
1842 $ rsrc, csrc1 )
1843 IF( myrow.EQ.rsrc ) THEN
1844 CALL sgemm( 'No transpose',
1845 $ 'No transpose', zrows, dim1,
1846 $ lnwin, one, work( ipw2 ),
1847 $ zrows, work( ipu ), lnwin,
1848 $ zero, work(ipw3), zrows )
1849 CALL slamov( 'All', zrows, dim1,
1850 $ work(ipw3), zrows,
1851 $ z((jloc-1)*lldz+iloc), lldz )
1852 END IF
1853 END IF
1854 IF( mycol.EQ.csrc4 ) THEN
1855 CALL infog2l( indx, lktop+dim1, descz,
1856 $ nprow, npcol, myrow, mycol, iloc,
1857 $ jloc, rsrc, csrc4 )
1858 IF( myrow.EQ.rsrc ) THEN
1859 CALL sgemm( 'No transpose',
1860 $ 'No transpose', zrows, dim4,
1861 $ lnwin, one, work( ipw2 ),
1862 $ zrows,
1863 $ work( ipu+lnwin*dim1 ), lnwin,
1864 $ zero, work(ipw3), zrows )
1865 CALL slamov( 'All', zrows, dim4,
1866 $ work(ipw3), zrows,
1867 $ z((jloc-1)*lldz+iloc), lldz )
1868 END IF
1869 END IF
1870 260 CONTINUE
1871 END IF
1872*
1873* Update the rows of H.
1874*
1875 IF( dir.EQ.1 .AND. wantt .AND. lenrbuf.GT.0 ) THEN
1876 IF( lkbot.LT.n ) THEN
1877 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4 .AND.
1878 $ mod(lkbot,nb).NE.0 ) THEN
1879 indx = lkbot + 1
1880 CALL infog2l( lktop, indx, desch, nprow,
1881 $ npcol, myrow, mycol, iloc, jloc,
1882 $ rsrc1, csrc4 )
1883 CALL sgemm( 'Transpose', 'No Transpose',
1884 $ dim1, hcols, lnwin, one, work(ipu),
1885 $ lnwin, work( ipw1 ), lnwin, zero,
1886 $ work(ipw3), dim1 )
1887 CALL slamov( 'All', dim1, hcols,
1888 $ work(ipw3), dim1,
1889 $ h((jloc-1)*lldh+iloc), lldh )
1890 END IF
1891 IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4 .AND.
1892 $ mod(lkbot,nb).NE.0 ) THEN
1893 indx = lkbot + 1
1894 CALL infog2l( lktop+dim1, indx, desch,
1895 $ nprow, npcol, myrow, mycol, iloc,
1896 $ jloc, rsrc4, csrc4 )
1897 CALL sgemm( 'Transpose', 'No Transpose',
1898 $ dim4, hcols, lnwin, one,
1899 $ work( ipu+dim1*lnwin ), lnwin,
1900 $ work( ipw1), lnwin, zero,
1901 $ work(ipw3), dim4 )
1902 CALL slamov( 'All', dim4, hcols,
1903 $ work(ipw3), dim4,
1904 $ h((jloc-1)*lldh+iloc), lldh )
1905 END IF
1906 indxs = iceil(lkbot,nb)*nb + 1
1907 IF( mod(lkbot,nb).NE.0 ) THEN
1908 indxe = min(n,indxs+(npcol-2)*nb)
1909 ELSE
1910 indxe = min(n,indxs+(npcol-1)*nb)
1911 END IF
1912 DO 270 indx = indxs, indxe, nb
1913 IF( myrow.EQ.rsrc1 ) THEN
1914 CALL infog2l( lktop, indx, desch,
1915 $ nprow, npcol, myrow, mycol, iloc,
1916 $ jloc, rsrc1, csrc )
1917 IF( mycol.EQ.csrc ) THEN
1918 CALL sgemm( 'Transpose',
1919 $ 'No Transpose', dim1, hcols,
1920 $ lnwin, one, work( ipu ), lnwin,
1921 $ work( ipw1 ), lnwin, zero,
1922 $ work(ipw3), dim1 )
1923 CALL slamov( 'All', dim1, hcols,
1924 $ work(ipw3), dim1,
1925 $ h((jloc-1)*lldh+iloc), lldh )
1926 END IF
1927 END IF
1928 IF( myrow.EQ.rsrc4 ) THEN
1929 CALL infog2l( lktop+dim1, indx, desch,
1930 $ nprow, npcol, myrow, mycol, iloc,
1931 $ jloc, rsrc4, csrc )
1932 IF( mycol.EQ.csrc ) THEN
1933 CALL sgemm( 'Transpose',
1934 $ 'No Transpose', dim4, hcols,
1935 $ lnwin, one,
1936 $ work( ipu+lnwin*dim1 ), lnwin,
1937 $ work( ipw1 ), lnwin,
1938 $ zero, work(ipw3), dim4 )
1939 CALL slamov( 'All', dim4, hcols,
1940 $ work(ipw3), dim4,
1941 $ h((jloc-1)*lldh+iloc), lldh )
1942 END IF
1943 END IF
1944 270 CONTINUE
1945 END IF
1946 END IF
1947 ELSE
1948*
1949* Update the columns of H and Z.
1950*
1951* Compute H2*U21 + H1*U11 on the left side of the border.
1952*
1953 IF( dir.EQ.2 .AND. wantt .AND. lencbuf.GT.0 ) THEN
1954 indxe = min(lktop-1,1+(nprow-1)*nb)
1955 DO 280 indx = 1, indxe, nb
1956 IF( mycol.EQ.csrc1 ) THEN
1957 CALL infog2l( indx, lktop, desch, nprow,
1958 $ npcol, myrow, mycol, iloc, jloc,
1959 $ rsrc, csrc1 )
1960 IF( myrow.EQ.rsrc ) THEN
1961 CALL slamov( 'All', hrows, ks,
1962 $ work( ipw+hrows*dim4), hrows,
1963 $ work(ipw3), hrows )
1964 CALL strmm( 'Right', 'Upper',
1965 $ 'No transpose',
1966 $ 'Non-unit', hrows, ks, one,
1967 $ work( ipu+dim4 ), lnwin,
1968 $ work(ipw3), hrows )
1969 CALL sgemm( 'No transpose',
1970 $ 'No transpose', hrows, ks, dim4,
1971 $ one, work( ipw ), hrows,
1972 $ work( ipu ), lnwin, one,
1973 $ work(ipw3), hrows )
1974 CALL slamov( 'All', hrows, ks,
1975 $ work(ipw3), hrows,
1976 $ h((jloc-1)*lldh+iloc), lldh )
1977 END IF
1978 END IF
1979*
1980* Compute H1*U12 + H2*U22 on the right side of
1981* the border.
1982*
1983 IF( mycol.EQ.csrc4 ) THEN
1984 CALL infog2l( indx, lktop+dim1, desch,
1985 $ nprow, npcol, myrow, mycol, iloc,
1986 $ jloc, rsrc, csrc4 )
1987 IF( myrow.EQ.rsrc ) THEN
1988 CALL slamov( 'All', hrows, dim4,
1989 $ work(ipw), hrows, work( ipw3 ),
1990 $ hrows )
1991 CALL strmm( 'Right', 'Lower',
1992 $ 'No transpose',
1993 $ 'Non-unit', hrows, dim4, one,
1994 $ work( ipu+lnwin*ks ), lnwin,
1995 $ work( ipw3 ), hrows )
1996 CALL sgemm( 'No transpose',
1997 $ 'No transpose', hrows, dim4, ks,
1998 $ one, work( ipw+hrows*dim4),
1999 $ hrows,
2000 $ work( ipu+lnwin*ks+dim4 ), lnwin,
2001 $ one, work( ipw3 ), hrows )
2002 CALL slamov( 'All', hrows, dim4,
2003 $ work(ipw3), hrows,
2004 $ h((jloc-1)*lldh+iloc), lldh )
2005 END IF
2006 END IF
2007 280 CONTINUE
2008 END IF
2009*
2010 IF( dir.EQ.2 .AND. wantz .AND. lencbuf.GT.0 ) THEN
2011*
2012* Compute Z2*U21 + Z1*U11 on the left side
2013* of border.
2014*
2015 indxe = min(n,1+(nprow-1)*nb)
2016 DO 290 indx = 1, indxe, nb
2017 IF( mycol.EQ.csrc1 ) THEN
2018 CALL infog2l( indx, i, descz, nprow,
2019 $ npcol, myrow, mycol, iloc, jloc,
2020 $ rsrc, csrc1 )
2021 IF( myrow.EQ.rsrc ) THEN
2022 CALL slamov( 'All', zrows, ks,
2023 $ work( ipw2+zrows*dim4),
2024 $ zrows, work(ipw3), zrows )
2025 CALL strmm( 'Right', 'Upper',
2026 $ 'No transpose',
2027 $ 'Non-unit', zrows, ks, one,
2028 $ work( ipu+dim4 ), lnwin,
2029 $ work(ipw3), zrows )
2030 CALL sgemm( 'No transpose',
2031 $ 'No transpose', zrows, ks,
2032 $ dim4, one, work( ipw2 ),
2033 $ zrows, work( ipu ), lnwin,
2034 $ one, work(ipw3), zrows )
2035 CALL slamov( 'All', zrows, ks,
2036 $ work(ipw3), zrows,
2037 $ z((jloc-1)*lldz+iloc), lldz )
2038 END IF
2039 END IF
2040*
2041* Compute Z1*U12 + Z2*U22 on the right side
2042* of border.
2043*
2044 IF( mycol.EQ.csrc4 ) THEN
2045 CALL infog2l( indx, i+dim1, descz,
2046 $ nprow, npcol, myrow, mycol, iloc,
2047 $ jloc, rsrc, csrc4 )
2048 IF( myrow.EQ.rsrc ) THEN
2049 CALL slamov( 'All', zrows, dim4,
2050 $ work(ipw2), zrows,
2051 $ work( ipw3 ), zrows )
2052 CALL strmm( 'Right', 'Lower',
2053 $ 'No transpose',
2054 $ 'Non-unit', zrows, dim4,
2055 $ one, work( ipu+lnwin*ks ),
2056 $ lnwin, work( ipw3 ), zrows )
2057 CALL sgemm( 'No transpose',
2058 $ 'No transpose', zrows, dim4,
2059 $ ks, one,
2060 $ work( ipw2+zrows*(dim4)),
2061 $ zrows,
2062 $ work( ipu+lnwin*ks+dim4 ),
2063 $ lnwin, one, work( ipw3 ),
2064 $ zrows )
2065 CALL slamov( 'All', zrows, dim4,
2066 $ work(ipw3), zrows,
2067 $ z((jloc-1)*lldz+iloc), lldz )
2068 END IF
2069 END IF
2070 290 CONTINUE
2071 END IF
2072*
2073 IF( dir.EQ.1 .AND. wantt .AND. lenrbuf.GT.0) THEN
2074 IF ( lkbot.LT.n ) THEN
2075*
2076* Compute U21**T*H2 + U11**T*H1 on the upper
2077* side of the border.
2078*
2079 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4.AND.
2080 $ mod(lkbot,nb).NE.0 ) THEN
2081 indx = lkbot + 1
2082 CALL infog2l( lktop, indx, desch, nprow,
2083 $ npcol, myrow, mycol, iloc, jloc,
2084 $ rsrc1, csrc4 )
2085 CALL slamov( 'All', ks, hcols,
2086 $ work( ipw1+dim4 ), lnwin,
2087 $ work(ipw3), ks )
2088 CALL strmm( 'Left', 'Upper', 'Transpose',
2089 $ 'Non-unit', ks, hcols, one,
2090 $ work( ipu+dim4 ), lnwin,
2091 $ work(ipw3), ks )
2092 CALL sgemm( 'Transpose', 'No transpose',
2093 $ ks, hcols, dim4, one, work(ipu),
2094 $ lnwin, work(ipw1), lnwin,
2095 $ one, work(ipw3), ks )
2096 CALL slamov( 'All', ks, hcols,
2097 $ work(ipw3), ks,
2098 $ h((jloc-1)*lldh+iloc), lldh )
2099 END IF
2100*
2101* Compute U12**T*H1 + U22**T*H2 one the lower
2102* side of the border.
2103*
2104 IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4.AND.
2105 $ mod(lkbot,nb).NE.0 ) THEN
2106 indx = lkbot + 1
2107 CALL infog2l( lktop+dim1, indx, desch,
2108 $ nprow, npcol, myrow, mycol, iloc,
2109 $ jloc, rsrc4, csrc4 )
2110 CALL slamov( 'All', dim4, hcols,
2111 $ work( ipw1 ), lnwin,
2112 $ work( ipw3 ), dim4 )
2113 CALL strmm( 'Left', 'Lower', 'Transpose',
2114 $ 'Non-unit', dim4, hcols, one,
2115 $ work( ipu+lnwin*ks ), lnwin,
2116 $ work( ipw3 ), dim4 )
2117 CALL sgemm( 'Transpose', 'No Transpose',
2118 $ dim4, hcols, ks, one,
2119 $ work( ipu+lnwin*ks+dim4 ), lnwin,
2120 $ work( ipw1+dim1 ), lnwin,
2121 $ one, work( ipw3), dim4 )
2122 CALL slamov( 'All', dim4, hcols,
2123 $ work(ipw3), dim4,
2124 $ h((jloc-1)*lldh+iloc), lldh )
2125 END IF
2126*
2127* Compute U21**T*H2 + U11**T*H1 on upper side
2128* on border.
2129*
2130 indxs = iceil(lkbot,nb)*nb+1
2131 IF( mod(lkbot,nb).NE.0 ) THEN
2132 indxe = min(n,indxs+(npcol-2)*nb)
2133 ELSE
2134 indxe = min(n,indxs+(npcol-1)*nb)
2135 END IF
2136 DO 300 indx = indxs, indxe, nb
2137 IF( myrow.EQ.rsrc1 ) THEN
2138 CALL infog2l( lktop, indx, desch,
2139 $ nprow, npcol, myrow, mycol, iloc,
2140 $ jloc, rsrc1, csrc )
2141 IF( mycol.EQ.csrc ) THEN
2142 CALL slamov( 'All', ks, hcols,
2143 $ work( ipw1+dim4 ), lnwin,
2144 $ work(ipw3), ks )
2145 CALL strmm( 'Left', 'Upper',
2146 $ 'Transpose', 'Non-unit',
2147 $ ks, hcols, one,
2148 $ work( ipu+dim4 ), lnwin,
2149 $ work(ipw3), ks )
2150 CALL sgemm( 'Transpose',
2151 $ 'No transpose', ks, hcols,
2152 $ dim4, one, work(ipu), lnwin,
2153 $ work(ipw1), lnwin, one,
2154 $ work(ipw3), ks )
2155 CALL slamov( 'All', ks, hcols,
2156 $ work(ipw3), ks,
2157 $ h((jloc-1)*lldh+iloc), lldh )
2158 END IF
2159 END IF
2160*
2161* Compute U12**T*H1 + U22**T*H2 on lower
2162* side of border.
2163*
2164 IF( myrow.EQ.rsrc4 ) THEN
2165 CALL infog2l( lktop+dim1, indx, desch,
2166 $ nprow, npcol, myrow, mycol, iloc,
2167 $ jloc, rsrc4, csrc )
2168 IF( mycol.EQ.csrc ) THEN
2169 CALL slamov( 'All', dim4, hcols,
2170 $ work( ipw1 ), lnwin,
2171 $ work( ipw3 ), dim4 )
2172 CALL strmm( 'Left', 'Lower',
2173 $ 'Transpose','Non-unit',
2174 $ dim4, hcols, one,
2175 $ work( ipu+lnwin*ks ), lnwin,
2176 $ work( ipw3 ), dim4 )
2177 CALL sgemm( 'Transpose',
2178 $ 'No Transpose', dim4, hcols,
2179 $ ks, one,
2180 $ work( ipu+lnwin*ks+dim4 ),
2181 $ lnwin, work( ipw1+dim1 ),
2182 $ lnwin, one, work( ipw3),
2183 $ dim4 )
2184 CALL slamov( 'All', dim4, hcols,
2185 $ work(ipw3), dim4,
2186 $ h((jloc-1)*lldh+iloc), lldh )
2187 END IF
2188 END IF
2189 300 CONTINUE
2190 END IF
2191 END IF
2192 END IF
2193*
2194* Update window information - mark processed windows are
2195* completed.
2196*
2197 IF( dir.EQ.2 ) THEN
2198 IF( lkbot.EQ.kbot ) THEN
2199 lktop = kbot+1
2200 lkbot = kbot+1
2201 iwork( 1+(win-1)*5 ) = lktop
2202 iwork( 2+(win-1)*5 ) = lkbot
2203 ELSE
2204 lktop = min( lktop + lnwin - lchain,
2205 $ min( kbot, iceil( lkbot, nb )*nb ) -
2206 $ lchain + 1 )
2207 iwork( 1+(win-1)*5 ) = lktop
2208 lkbot = min( max( lkbot + lnwin - lchain,
2209 $ lktop + nwin - 1), min( kbot,
2210 $ iceil( lkbot, nb )*nb ) )
2211 iwork( 2+(win-1)*5 ) = lkbot
2212 END IF
2213 IF( iwork( 5+(win-1)*5 ).EQ.1 )
2214 $ iwork( 5+(win-1)*5 ) = 2
2215 iwork( 3+(win-1)*5 ) = rsrc4
2216 iwork( 4+(win-1)*5 ) = csrc4
2217 END IF
2218*
2219* If nothing was done for the WIN:th window, all
2220* processors come here and consider the next one
2221* instead.
2222*
2223 245 CONTINUE
2224 240 CONTINUE
2225 190 CONTINUE
2226 150 CONTINUE
2227 140 CONTINUE
2228*
2229* Chased off bulges from first window?
2230*
2231 IF( nprocs.GT.1 )
2232 $ CALL igamx2d( ictxt, 'All', '1-Tree', 1, 1, ichoff, 1,
2233 $ -1, -1, -1, -1, -1 )
2234*
2235* If the bulge was chasen off from first window it is removed.
2236*
2237 IF( ichoff.GT.0 ) THEN
2238 DO 198 win = 2, anmwin
2239 iwork( 1+(win-2)*5 ) = iwork( 1+(win-1)*5 )
2240 iwork( 2+(win-2)*5 ) = iwork( 2+(win-1)*5 )
2241 iwork( 3+(win-2)*5 ) = iwork( 3+(win-1)*5 )
2242 iwork( 4+(win-2)*5 ) = iwork( 4+(win-1)*5 )
2243 198 CONTINUE
2244 anmwin = anmwin - 1
2245 ipiw = 6+(anmwin-1)*5
2246 END IF
2247*
2248* If we have no more windows, return.
2249*
2250 IF( anmwin.LT.1 ) RETURN
2251*
2252* Check for any more windows to bring over the border.
2253*
2254 winfin = 0
2255 DO 199 win = 1, anmwin
2256 winfin = winfin+iwork( 5+(win-1)*5 )
2257 199 CONTINUE
2258 IF( winfin.LT.2*anmwin ) GO TO 137
2259*
2260* Zero out process mark for each window - this is legal now when
2261* the process starts over with local bulge-chasing etc.
2262*
2263 DO 201 win = 1, anmwin
2264 iwork( 5+(win-1)*5 ) = 0
2265 201 CONTINUE
2266*
2267 END IF
2268*
2269* Go back to local bulge-chase and see if there is more work to do.
2270*
2271 GO TO 20
2272*
2273* End of PSLAQR5
2274*
2275 END
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pselset(a, ia, ja, desca, alpha)
Definition pselset.f:2
subroutine pslaqr5(wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h, desch, iloz, ihiz, z, descz, work, lwork, iwork, liwork)
Definition pslaqr5.f:4
subroutine slaqr6(job, wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h, ldh, iloz, ihiz, z, ldz, v, ldv, u, ldu, nv, wv, ldwv, nh, wh, ldwh)
Definition slaqr6.f:4