SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pslaqr3.f
Go to the documentation of this file.
1 RECURSIVE SUBROUTINE pslaqr3( WANTT, WANTZ, N, KTOP, KBOT, NW, H,
2 $ DESCH, ILOZ, IHIZ, Z, DESCZ, NS, ND,
3 $ SR, SI, V, DESCV, NH, T, DESCT, NV,
4 $ WV, DESCW, WORK, LWORK, IWORK,
5 $ LIWORK, RECLEVEL )
6*
7* Contribution from the Department of Computing Science and HPC2N,
8* Umea University, Sweden
9*
10* -- ScaLAPACK auxiliary routine (version 2.0.1) --
11* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
12* Univ. of Colorado Denver and University of California, Berkeley.
13* January, 2012
14*
15 IMPLICIT NONE
16*
17* .. Scalar Arguments ..
18 INTEGER ihiz, iloz, kbot, ktop, lwork, n, nd, nh, ns,
19 $ nv, nw, liwork, reclevel
20 LOGICAL wantt, wantz
21* ..
22* .. Array Arguments ..
23 INTEGER desch( * ), descz( * ), desct( * ), descv( * ),
24 $ descw( * ), iwork( * )
25 REAL h( * ), si( kbot ), sr( kbot ), t( * ),
26 $ v( * ), work( * ), wv( * ),
27 $ z( * )
28* ..
29*
30* Purpose
31* =======
32*
33* Aggressive early deflation:
34*
35* This subroutine accepts as input an upper Hessenberg matrix H and
36* performs an orthogonal similarity transformation designed to detect
37* and deflate fully converged eigenvalues from a trailing principal
38* submatrix. On output H has been overwritten by a new Hessenberg
39* matrix that is a perturbation of an orthogonal similarity
40* transformation of H. It is to be hoped that the final version of H
41* has many zero subdiagonal entries.
42*
43* Notes
44* =====
45*
46* Each global data object is described by an associated description
47* vector. This vector stores the information required to establish
48* the mapping between an object element and its corresponding process
49* and memory location.
50*
51* Let A be a generic term for any 2D block cyclicly distributed array.
52* Such a global array has an associated description vector DESCA.
53* In the following comments, the character _ should be read as
54* "of the global array".
55*
56* NOTATION STORED IN EXPLANATION
57* --------------- -------------- --------------------------------------
58* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
59* DTYPE_A = 1.
60* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
61* the BLACS process grid A is distribu-
62* ted over. The context itself is glo-
63* bal, but the handle (the integer
64* value) may vary.
65* M_A (global) DESCA( M_ ) The number of rows in the global
66* array A.
67* N_A (global) DESCA( N_ ) The number of columns in the global
68* array A.
69* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
70* the rows of the array.
71* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
72* the columns of the array.
73* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
74* row of the array A is distributed.
75* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
76* first column of the array A is
77* distributed.
78* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
79* array. LLD_A >= MAX(1,LOCr(M_A)).
80*
81* Let K be the number of rows or columns of a distributed matrix,
82* and assume that its process grid has dimension p x q.
83* LOCr( K ) denotes the number of elements of K that a process
84* would receive if K were distributed over the p processes of its
85* process column.
86* Similarly, LOCc( K ) denotes the number of elements of K that a
87* process would receive if K were distributed over the q processes of
88* its process row.
89* The values of LOCr() and LOCc() may be determined via a call to the
90* ScaLAPACK tool function, NUMROC:
91* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
92* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
93* An upper bound for these quantities may be computed by:
94* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
95* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
96*
97* Arguments
98* =========
99*
100* WANTT (global input) LOGICAL
101* If .TRUE., then the Hessenberg matrix H is fully updated
102* so that the quasi-triangular Schur factor may be
103* computed (in cooperation with the calling subroutine).
104* If .FALSE., then only enough of H is updated to preserve
105* the eigenvalues.
106*
107* WANTZ (global input) LOGICAL
108* If .TRUE., then the orthogonal matrix Z is updated so
109* so that the orthogonal Schur factor may be computed
110* (in cooperation with the calling subroutine).
111* If .FALSE., then Z is not referenced.
112*
113* N (global input) INTEGER
114* The order of the matrix H and (if WANTZ is .TRUE.) the
115* order of the orthogonal matrix Z.
116*
117* KTOP (global input) INTEGER
118* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
119* KBOT and KTOP together determine an isolated block
120* along the diagonal of the Hessenberg matrix.
121*
122* KBOT (global input) INTEGER
123* It is assumed without a check that either
124* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
125* determine an isolated block along the diagonal of the
126* Hessenberg matrix.
127*
128* NW (global input) INTEGER
129* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
130*
131* H (local input/output) REAL array, dimension
132* (DESCH(LLD_),*)
133* On input the initial N-by-N section of H stores the
134* Hessenberg matrix undergoing aggressive early deflation.
135* On output H has been transformed by an orthogonal
136* similarity transformation, perturbed, and the returned
137* to Hessenberg form that (it is to be hoped) has some
138* zero subdiagonal entries.
139*
140* DESCH (global and local input) INTEGER array of dimension DLEN_.
141* The array descriptor for the distributed matrix H.
142*
143* ILOZ (global input) INTEGER
144* IHIZ (global input) INTEGER
145* Specify the rows of Z to which transformations must be
146* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
147*
148* Z (input/output) REAL array, dimension
149* (DESCH(LLD_),*)
150* IF WANTZ is .TRUE., then on output, the orthogonal
151* similarity transformation mentioned above has been
152* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
153* If WANTZ is .FALSE., then Z is unreferenced.
154*
155* DESCZ (global and local input) INTEGER array of dimension DLEN_.
156* The array descriptor for the distributed matrix Z.
157*
158* NS (global output) INTEGER
159* The number of unconverged (ie approximate) eigenvalues
160* returned in SR and SI that may be used as shifts by the
161* calling subroutine.
162*
163* ND (global output) INTEGER
164* The number of converged eigenvalues uncovered by this
165* subroutine.
166*
167* SR (global output) REAL array, dimension KBOT
168* SI (global output) REAL array, dimension KBOT
169* On output, the real and imaginary parts of approximate
170* eigenvalues that may be used for shifts are stored in
171* SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
172* SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
173* The real and imaginary parts of converged eigenvalues
174* are stored in SR(KBOT-ND+1) through SR(KBOT) and
175* SI(KBOT-ND+1) through SI(KBOT), respectively.
176*
177* V (global workspace) REAL array, dimension
178* (DESCV(LLD_),*)
179* An NW-by-NW distributed work array.
180*
181* DESCV (global and local input) INTEGER array of dimension DLEN_.
182* The array descriptor for the distributed matrix V.
183*
184* NH (input) INTEGER scalar
185* The number of columns of T. NH.GE.NW.
186*
187* T (global workspace) REAL array, dimension
188* (DESCV(LLD_),*)
189*
190* DESCT (global and local input) INTEGER array of dimension DLEN_.
191* The array descriptor for the distributed matrix T.
192*
193* NV (global input) INTEGER
194* The number of rows of work array WV available for
195* workspace. NV.GE.NW.
196*
197* WV (global workspace) REAL array, dimension
198* (DESCW(LLD_),*)
199*
200* DESCW (global and local input) INTEGER array of dimension DLEN_.
201* The array descriptor for the distributed matrix WV.
202*
203* WORK (local workspace) REAL array, dimension LWORK.
204* On exit, WORK(1) is set to an estimate of the optimal value
205* of LWORK for the given values of N, NW, KTOP and KBOT.
206*
207* LWORK (local input) INTEGER
208* The dimension of the work array WORK. LWORK = 2*NW
209* suffices, but greater efficiency may result from larger
210* values of LWORK.
211*
212* If LWORK = -1, then a workspace query is assumed; PSLAQR3
213* only estimates the optimal workspace size for the given
214* values of N, NW, KTOP and KBOT. The estimate is returned
215* in WORK(1). No error message related to LWORK is issued
216* by XERBLA. Neither H nor Z are accessed.
217*
218* IWORK (local workspace) INTEGER array, dimension (LIWORK)
219*
220* LIWORK (local input) INTEGER
221* The length of the workspace array IWORK
222*
223* ================================================================
224* Based on contributions by
225* Robert Granat and Meiyue Shao,
226* Department of Computing Science and HPC2N,
227* Umea University, Sweden
228*
229* ================================================================
230* .. Parameters ..
231 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
232 $ lld_, mb_, m_, nb_, n_, rsrc_
233 INTEGER recmax
234 LOGICAL sortgrad
235 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
236 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
237 $ rsrc_ = 7, csrc_ = 8, lld_ = 9, recmax = 3,
238 $ sortgrad = .false. )
239 REAL zero, one
240 PARAMETER ( zero = 0.0, one = 1.0 )
241* ..
242* .. Local Scalars ..
243 REAL aa, bb, beta, cc, cs, dd, evi, evk, foo, s,
244 $ safmax, safmin, smlnum, sn, tau, ulp,
245 $ elem, elem1, elem2, elem3, r1, anorm, rnorm,
246 $ resaed
247 INTEGER i, ifst, ilst, info, infqr, j, jw, k, kcol,
248 $ kend, kln, krow, kwtop, ltop, lwk1, lwk2, lwk3,
249 $ lwkopt, nmin, lldh, lldz, lldt, lldv, lldwv,
250 $ ictxt, nprow, nmax, npcol, myrow, mycol, nb,
251 $ iroffh, m, rcols, taurows, rrows, taucols,
252 $ itau, ir, ipw, nprocs, mloc, iroffhh,
253 $ icoffhh, hhrsrc, hhcsrc, hhrows, hhcols,
254 $ iroffzz, icoffzz, zzrsrc, zzcsrc, zzrows,
255 $ zzcols, ierr, tzrows0, tzcols0, ierr0, ipt0,
256 $ ipz0, ipw0, nb2, round, lilst, kk, lilst0,
257 $ iwrk1, rsrc, csrc, lwk4, lwk5, iwrk2, lwk6,
258 $ lwk7, lwk8, ilwkopt, tzrows, tzcols, nsel,
259 $ npmin, ictxt_new, myrow_new, mycol_new
260 LOGICAL bulge, sorted, lquery
261* ..
262* .. Local Arrays ..
263 INTEGER par( 6 ), descr( dlen_ ),
264 $ desctau( dlen_ ), deschh( dlen_ ),
265 $ desczz( dlen_ ), desctz0( dlen_ ),
266 $ pmap( 64*64 )
267 REAL ddum( 1 )
268* ..
269* .. External Functions ..
270 REAL slamch, pslange
271 INTEGER pilaenvx, numroc, indxg2p, iceil, blacs_pnum
272 EXTERNAL slamch, pilaenvx, numroc, indxg2p, pslange,
273 $ iceil, blacs_pnum
274* ..
275* .. External Subroutines ..
276 EXTERNAL pscopy, psgehrd, psgemm, slabad, pslacpy,
277 $ pslaqr1, slanv2, pslaqr0, pslarf, pslarfg,
279 $ pslamve, blacs_gridinfo, blacs_gridmap,
280 $ blacs_gridexit, psgemr2d
281* ..
282* .. Intrinsic Functions ..
283 INTRINSIC abs, float, int, max, min, sqrt
284* ..
285* .. Executable Statements ..
286 ictxt = desch( ctxt_ )
287 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
288 nprocs = nprow*npcol
289*
290* Extract local leading dimensions, blockfactors, offset for
291* keeping the alignment requirements and size of deflation window.
292*
293 lldh = desch( lld_ )
294 lldz = descz( lld_ )
295 lldt = desct( lld_ )
296 lldv = descv( lld_ )
297 lldwv = descw( lld_ )
298 nb = desch( mb_ )
299 iroffh = mod( ktop - 1, nb )
300 jw = min( nw, kbot-ktop+1 )
301 nsel = nb+jw
302*
303* Extract environment variables for parallel eigenvalue reordering.
304*
305 par(1) = pilaenvx(ictxt, 17, 'PSLAQR3', 'SV', jw, nb, -1, -1)
306 par(2) = pilaenvx(ictxt, 18, 'PSLAQR3', 'SV', jw, nb, -1, -1)
307 par(3) = pilaenvx(ictxt, 19, 'PSLAQR3', 'SV', jw, nb, -1, -1)
308 par(4) = pilaenvx(ictxt, 20, 'PSLAQR3', 'SV', jw, nb, -1, -1)
309 par(5) = pilaenvx(ictxt, 21, 'PSLAQR3', 'SV', jw, nb, -1, -1)
310 par(6) = pilaenvx(ictxt, 22, 'PSLAQR3', 'SV', jw, nb, -1, -1)
311*
312* Check if workspace query.
313*
314 lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
315*
316* Estimate optimal workspace.
317*
318 IF( jw.LE.2 ) THEN
319 lwkopt = 1
320 ELSE
321*
322* Workspace query calls to PSGEHRD and PSORMHR.
323*
324 taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
325 taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
326 $ npcol )
327 CALL psgehrd( jw, 1, jw, t, 1, 1, desct, work, work, -1,
328 $ info )
329 lwk1 = int( work( 1 ) ) + taurows*taucols
330*
331* Workspace query call to PSORMHR.
332*
333 CALL psormhr( 'Right', 'No', jw, jw, 1, jw, t, 1, 1, desct,
334 $ work, v, 1, 1, descv, work, -1, info )
335 lwk2 = int( work( 1 ) )
336*
337* Workspace query call to PSLAQR0.
338*
339 nmin = pilaenvx( ictxt, 12, 'PSLAQR3', 'SV', jw, 1, jw, lwork )
340 nmax = ( n-1 ) / 3
341 IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
342 $ .AND. reclevel.LT.recmax ) THEN
343 CALL pslaqr0( .true., .true., jw+iroffh, 1+iroffh,
344 $ jw+iroffh, t, desct, sr, si, 1, jw, v, descv,
345 $ work, -1, iwork, liwork-nsel, infqr,
346 $ reclevel+1 )
347 lwk3 = int( work( 1 ) )
348 iwrk1 = iwork( 1 )
349 ELSE
350 rsrc = desct( rsrc_ )
351 csrc = desct( csrc_ )
352 desct( rsrc_ ) = 0
353 desct( csrc_ ) = 0
354 CALL pslaqr1( .true., .true., jw+iroffh, 1, jw+iroffh, t,
355 $ desct, sr, si, 1, jw+iroffh, v, descv, work, -1,
356 $ iwork, liwork-nsel, infqr )
357 desct( rsrc_ ) = rsrc
358 desct( csrc_ ) = csrc
359 lwk3 = int( work( 1 ) )
360 iwrk1 = iwork( 1 )
361 END IF
362*
363* Workspace in case of alignment problems.
364*
365 tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
366 tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
367 lwk4 = 2 * tzrows0*tzcols0
368*
369* Workspace check for reordering.
370*
371 CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
372 $ desct, v, 1, 1, descv, ddum, ddum, mloc, work, -1,
373 $ iwork, liwork-nsel, info )
374 lwk5 = int( work( 1 ) )
375 iwrk2 = iwork( 1 )
376*
377* Extra workspace for reflecting back spike
378* (workspace for PSLARF approximated for simplicity).
379*
380 rrows = numroc( n+iroffh, nb, myrow, descv(rsrc_), nprow )
381 rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
382 lwk6 = rrows*rcols + taurows*taucols +
383 $ 2*iceil(iceil(jw+iroffh,nb),nprow)*nb
384 $ *iceil(iceil(jw+iroffh,nb),npcol)*nb
385*
386* Extra workspace needed by PBLAS update calls
387* (also estimated for simplicity).
388*
389 lwk7 = max( iceil(iceil(jw,nb),nprow)*nb *
390 $ iceil(iceil(n-kbot,nb),npcol)*nb,
391 $ iceil(iceil(ihiz-iloz+1,nb),nprow)*nb *
392 $ iceil(iceil(jw,nb),npcol)*nb,
393 $ iceil(iceil(kbot-jw,nb),nprow)*nb *
394 $ iceil(iceil(jw,nb),npcol)*nb )
395*
396* Residual check workspace.
397*
398 tzrows = numroc( jw+iroffh, nb, myrow, desct(rsrc_), nprow )
399 tzcols = numroc( jw+iroffh, nb, mycol, desct(csrc_), npcol )
400 lwk8 = 2*tzrows*tzcols
401*
402* Optimal workspace.
403*
404 lwkopt = max( lwk1, lwk2, lwk3+lwk4, lwk5, lwk6, lwk7, lwk8 )
405 ilwkopt = max( iwrk1, iwrk2 )
406 END IF
407*
408* Quick return in case of workspace query.
409*
410 work( 1 ) = float( lwkopt )
411*
412* IWORK(1:NSEL) is used as the array SELECT for PSTRORD.
413*
414 iwork( 1 ) = ilwkopt + nsel
415 IF( lquery )
416 $ RETURN
417*
418* Nothing to do for an empty active block ...
419 ns = 0
420 nd = 0
421 IF( ktop.GT.kbot )
422 $ RETURN
423* ... nor for an empty deflation window.
424*
425 IF( nw.LT.1 )
426 $ RETURN
427*
428* Machine constants.
429*
430 safmin = slamch( 'SAFE MINIMUM' )
431 safmax = one / safmin
432 CALL slabad( safmin, safmax )
433 ulp = slamch( 'PRECISION' )
434 smlnum = safmin*( float( n ) / ulp )
435*
436* Setup deflation window.
437*
438 jw = min( nw, kbot-ktop+1 )
439 kwtop = kbot - jw + 1
440 IF( kwtop.EQ.ktop ) THEN
441 s = zero
442 ELSE
443 CALL pselget( 'All', '1-Tree', s, h, kwtop, kwtop-1, desch )
444 END IF
445*
446 IF( kbot.EQ.kwtop ) THEN
447*
448* 1-by-1 deflation window: not much to do.
449*
450 CALL pselget( 'All', '1-Tree', sr( kwtop ), h, kwtop, kwtop,
451 $ desch )
452 si( kwtop ) = zero
453 ns = 1
454 nd = 0
455 IF( abs( s ).LE.max( smlnum, ulp*abs( sr( kwtop ) ) ) )
456 $ THEN
457 ns = 0
458 nd = 1
459 IF( kwtop.GT.ktop )
460 $ CALL pselset( h, kwtop, kwtop-1 , desch, zero )
461 END IF
462 RETURN
463 END IF
464*
465 IF( kwtop.EQ.ktop .AND. kbot-kwtop.EQ.1 ) THEN
466*
467* 2-by-2 deflation window: a little more to do.
468*
469 CALL pselget( 'All', '1-Tree', aa, h, kwtop, kwtop, desch )
470 CALL pselget( 'All', '1-Tree', bb, h, kwtop, kwtop+1, desch )
471 CALL pselget( 'All', '1-Tree', cc, h, kwtop+1, kwtop, desch )
472 CALL pselget( 'All', '1-Tree', dd, h, kwtop+1, kwtop+1, desch )
473 CALL slanv2( aa, bb, cc, dd, sr(kwtop), si(kwtop),
474 $ sr(kwtop+1), si(kwtop+1), cs, sn )
475 ns = 0
476 nd = 2
477 IF( cc.EQ.zero ) THEN
478 i = kwtop
479 IF( i+2.LE.n .AND. wantt )
480 $ CALL psrot( n-i-1, h, i, i+2, desch, desch(m_), h, i+1,
481 $ i+2, desch, desch(m_), cs, sn, work, lwork, info )
482 IF( i.GT.1 )
483 $ CALL psrot( i-1, h, 1, i, desch, 1, h, 1, i+1, desch, 1,
484 $ cs, sn, work, lwork, info )
485 IF( wantz )
486 $ CALL psrot( ihiz-iloz+1, z, iloz, i, descz, 1, z, iloz,
487 $ i+1, descz, 1, cs, sn, work, lwork, info )
488 CALL pselset( h, i, i, desch, aa )
489 CALL pselset( h, i, i+1, desch, bb )
490 CALL pselset( h, i+1, i, desch, cc )
491 CALL pselset( h, i+1, i+1, desch, dd )
492 END IF
493 work( 1 ) = float( lwkopt )
494 RETURN
495 END IF
496*
497* Calculate new value for IROFFH in case deflation window
498* was adjusted.
499*
500 iroffh = mod( kwtop - 1, nb )
501*
502* Adjust number of rows and columns of T matrix descriptor
503* to prepare for call to PDBTRORD.
504*
505 desct( m_ ) = jw+iroffh
506 desct( n_ ) = jw+iroffh
507*
508* Convert to spike-triangular form. (In case of a rare QR failure,
509* this routine continues to do aggressive early deflation using that
510* part of the deflation window that converged using INFQR here and
511* there to keep track.)
512*
513* Copy the trailing submatrix to the working space.
514*
515 CALL pslaset( 'All', iroffh, jw+iroffh, zero, one, t, 1, 1,
516 $ desct )
517 CALL pslaset( 'All', jw, iroffh, zero, zero, t, 1+iroffh, 1,
518 $ desct )
519 CALL pslacpy( 'All', 1, jw, h, kwtop, kwtop, desch, t, 1+iroffh,
520 $ 1+iroffh, desct )
521 CALL pslacpy( 'Upper', jw-1, jw-1, h, kwtop+1, kwtop, desch, t,
522 $ 1+iroffh+1, 1+iroffh, desct )
523 IF( jw.GT.2 )
524 $ CALL pslaset( 'Lower', jw-2, jw-2, zero, zero, t, 1+iroffh+2,
525 $ 1+iroffh, desct )
526 CALL pslacpy( 'All', jw-1, 1, h, kwtop+1, kwtop+jw-1, desch, t,
527 $ 1+iroffh+1, 1+iroffh+jw-1, desct )
528*
529* Initialize the working orthogonal matrix.
530*
531 CALL pslaset( 'All', jw+iroffh, jw+iroffh, zero, one, v, 1, 1,
532 $ descv )
533*
534* Compute the Schur form of T.
535*
536 npmin = pilaenvx( ictxt, 23, 'PSLAQR3', 'SV', jw, nb, nprow,
537 $ npcol )
538 nmin = pilaenvx( ictxt, 12, 'PSLAQR3', 'SV', jw, 1, jw, lwork )
539 nmax = ( n-1 ) / 3
540 IF( min(nprow, npcol).LE.npmin+1 .OR. reclevel.GE.1 ) THEN
541*
542* The AED window is large enough.
543* Compute the Schur decomposition with all processors.
544*
545 IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
546 $ .AND. reclevel.LT.recmax ) THEN
547 CALL pslaqr0( .true., .true., jw+iroffh, 1+iroffh,
548 $ jw+iroffh, t, desct, sr( kwtop-iroffh ),
549 $ si( kwtop-iroffh ), 1+iroffh, jw+iroffh, v, descv,
550 $ work, lwork, iwork(nsel+1), liwork-nsel, infqr,
551 $ reclevel+1 )
552 ELSE
553 IF( desct(rsrc_).EQ.0 .AND. desct(csrc_).EQ.0 ) THEN
554 IF( jw+iroffh.GT.desct( mb_ ) ) THEN
555 CALL pslaqr1( .true., .true., jw+iroffh, 1,
556 $ jw+iroffh, t, desct, sr( kwtop-iroffh ),
557 $ si( kwtop-iroffh ), 1, jw+iroffh, v,
558 $ descv, work, lwork, iwork(nsel+1), liwork-nsel,
559 $ infqr )
560 ELSE
561 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
562 CALL slahqr( .true., .true., jw+iroffh, 1+iroffh,
563 $ jw+iroffh, t, desct(lld_),
564 $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
565 $ 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
566 ELSE
567 infqr = 0
568 END IF
569 IF( nprocs.GT.1 )
570 $ CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
571 $ 1, -1, -1, -1, -1, -1 )
572 END IF
573 ELSEIF( jw+iroffh.LE.desct( mb_ ) ) THEN
574 IF( myrow.EQ.desct(rsrc_) .AND. mycol.EQ.desct(csrc_) )
575 $ THEN
576 CALL slahqr( .true., .true., jw+iroffh, 1+iroffh,
577 $ jw+iroffh, t, desct(lld_),
578 $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
579 $ 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
580 ELSE
581 infqr = 0
582 END IF
583 IF( nprocs.GT.1 )
584 $ CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
585 $ 1, -1, -1, -1, -1, -1 )
586 ELSE
587 tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
588 tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
589 CALL descinit( desctz0, jw+iroffh, jw+iroffh, nb, nb, 0,
590 $ 0, ictxt, max(1,tzrows0), ierr0 )
591 ipt0 = 1
592 ipz0 = ipt0 + max(1,tzrows0)*tzcols0
593 ipw0 = ipz0 + max(1,tzrows0)*tzcols0
594 CALL pslamve( 'All', jw+iroffh, jw+iroffh, t, 1, 1,
595 $ desct, work(ipt0), 1, 1, desctz0, work(ipw0) )
596 CALL pslaset( 'All', jw+iroffh, jw+iroffh, zero, one,
597 $ work(ipz0), 1, 1, desctz0 )
598 CALL pslaqr1( .true., .true., jw+iroffh, 1,
599 $ jw+iroffh, work(ipt0), desctz0,
600 $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
601 $ 1, jw+iroffh, work(ipz0),
602 $ desctz0, work(ipw0), lwork-ipw0+1, iwork(nsel+1),
603 $ liwork-nsel, infqr )
604 CALL pslamve( 'All', jw+iroffh, jw+iroffh, work(ipt0), 1,
605 $ 1, desctz0, t, 1, 1, desct, work(ipw0) )
606 CALL pslamve( 'All', jw+iroffh, jw+iroffh, work(ipz0), 1,
607 $ 1, desctz0, v, 1, 1, descv, work(ipw0) )
608 END IF
609 END IF
610 ELSE
611*
612* The AED window is too small.
613* Redistribute the AED window to a subgrid
614* and do the computation on the subgrid.
615*
616 ictxt_new = ictxt
617 DO 20 i = 0, npmin-1
618 DO 10 j = 0, npmin-1
619 pmap( j+1+i*npmin ) = blacs_pnum( ictxt, i, j )
620 10 CONTINUE
621 20 CONTINUE
622 CALL blacs_gridmap( ictxt_new, pmap, npmin, npmin, npmin )
623 CALL blacs_gridinfo( ictxt_new, npmin, npmin, myrow_new,
624 $ mycol_new )
625 IF( myrow.GE.npmin .OR. mycol.GE.npmin ) ictxt_new = -1
626 IF( ictxt_new.GE.0 ) THEN
627 tzrows0 = numroc( jw, nb, myrow_new, 0, npmin )
628 tzcols0 = numroc( jw, nb, mycol_new, 0, npmin )
629 CALL descinit( desctz0, jw, jw, nb, nb, 0,
630 $ 0, ictxt_new, max(1,tzrows0), ierr0 )
631 ipt0 = 1
632 ipz0 = ipt0 + max(1,tzrows0)*max(1,tzcols0)
633 ipw0 = ipz0 + max(1,tzrows0)*max(1,tzcols0)
634 ELSE
635 ipt0 = 1
636 ipz0 = 2
637 ipw0 = 3
638 desctz0( ctxt_ ) = -1
639 infqr = 0
640 END IF
641 CALL psgemr2d( jw, jw, t, 1+iroffh, 1+iroffh, desct,
642 $ work(ipt0), 1, 1, desctz0, ictxt )
643 IF( ictxt_new.GE.0 ) THEN
644 CALL pslaset( 'All', jw, jw, zero, one, work(ipz0), 1, 1,
645 $ desctz0 )
646 nmin = pilaenvx( ictxt_new, 12, 'PSLAQR3', 'SV', jw, 1, jw,
647 $ lwork )
648 IF( jw.GT.nmin .AND. jw.LE.nmax .AND. reclevel.LT.1 ) THEN
649 CALL pslaqr0( .true., .true., jw, 1, jw, work(ipt0),
650 $ desctz0, sr( kwtop ), si( kwtop ), 1, jw,
651 $ work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
652 $ iwork(nsel+1), liwork-nsel, infqr,
653 $ reclevel+1 )
654 ELSE
655 CALL pslaqr1( .true., .true., jw, 1, jw, work(ipt0),
656 $ desctz0, sr( kwtop ), si( kwtop ), 1, jw,
657 $ work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
658 $ iwork(nsel+1), liwork-nsel, infqr )
659 END IF
660 END IF
661 CALL psgemr2d( jw, jw, work(ipt0), 1, 1, desctz0, t, 1+iroffh,
662 $ 1+iroffh, desct, ictxt )
663 CALL psgemr2d( jw, jw, work(ipz0), 1, 1, desctz0, v, 1+iroffh,
664 $ 1+iroffh, descv, ictxt )
665 IF( ictxt_new.GE.0 )
666 $ CALL blacs_gridexit( ictxt_new )
667 IF( myrow+mycol.GT.0 ) THEN
668 DO 40 j = 0, jw-1
669 sr( kwtop+j ) = zero
670 si( kwtop+j ) = zero
671 40 CONTINUE
672 END IF
673 CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr, 1, -1, -1,
674 $ -1, -1, -1 )
675 CALL sgsum2d( ictxt, 'All', ' ', jw, 1, sr(kwtop), jw, -1, -1 )
676 CALL sgsum2d( ictxt, 'All', ' ', jw, 1, si(kwtop), jw, -1, -1 )
677 END IF
678*
679* Adjust INFQR for offset from block border in submatrices.
680*
681 IF( infqr.NE.0 )
682 $ infqr = infqr - iroffh
683*
684* PSTRORD needs a clean margin near the diagonal.
685*
686 DO 50 j = 1, jw - 3
687 CALL pselset( t, j+2, j, desct, zero )
688 CALL pselset( t, j+3, j, desct, zero )
689 50 CONTINUE
690 IF( jw.GT.2 )
691 $ CALL pselset( t, jw, jw-2, desct, zero )
692*
693* Check local residual for AED Schur decomposition.
694*
695 resaed = 0.0
696*
697* Clean up the array SELECT for PSTRORD.
698*
699 DO 60 j = 1, nsel
700 iwork( j ) = 0
701 60 CONTINUE
702*
703* Set local M counter to zero.
704*
705 mloc = 0
706*
707* Outer deflation detection loop (label 80).
708* In this loop a bunch of undeflatable eigenvalues
709* are moved simultaneously.
710*
711 DO 70 j = 1, iroffh + infqr
712 iwork( j ) = 1
713 70 CONTINUE
714*
715 ns = jw
716 ilst = infqr + 1 + iroffh
717 IF( ilst.GT.1 ) THEN
718 CALL pselget( 'All', '1-Tree', elem, t, ilst, ilst-1, desct )
719 bulge = elem.NE.zero
720 IF( bulge ) ilst = ilst+1
721 END IF
722*
723 80 CONTINUE
724 IF( ilst.LE.ns+iroffh ) THEN
725*
726* Find the top-left corner of the local window.
727*
728 lilst = max(ilst,ns+iroffh-nb+1)
729 IF( lilst.GT.1 ) THEN
730 CALL pselget( 'All', '1-Tree', elem, t, lilst, lilst-1,
731 $ desct )
732 bulge = elem.NE.zero
733 IF( bulge ) lilst = lilst+1
734 END IF
735*
736* Lock all eigenvalues outside the local window.
737*
738 DO 90 j = iroffh+1, lilst-1
739 iwork( j ) = 1
740 90 CONTINUE
741 lilst0 = lilst
742*
743* Inner deflation detection loop (label 100).
744* In this loop, the undeflatable eigenvalues are moved to the
745* top-left corner of the local window.
746*
747 100 CONTINUE
748 IF( lilst.LE.ns+iroffh ) THEN
749 IF( ns.EQ.1 ) THEN
750 bulge = .false.
751 ELSE
752 CALL pselget( 'All', '1-Tree', elem, t, ns+iroffh,
753 $ ns+iroffh-1, desct )
754 bulge = elem.NE.zero
755 END IF
756*
757* Small spike tip test for deflation.
758*
759 IF( .NOT.bulge ) THEN
760*
761* Real eigenvalue.
762*
763 CALL pselget( 'All', '1-Tree', elem, t, ns+iroffh,
764 $ ns+iroffh, desct )
765 foo = abs( elem )
766 IF( foo.EQ.zero )
767 $ foo = abs( s )
768 CALL pselget( 'All', '1-Tree', elem, v, 1+iroffh,
769 $ ns+iroffh, descv )
770 IF( abs( s*elem ).LE.max( smlnum, ulp*foo ) ) THEN
771*
772* Deflatable.
773*
774 ns = ns - 1
775 ELSE
776*
777* Undeflatable: move it up out of the way.
778*
779 ifst = ns
780 DO 110 j = lilst, jw+iroffh
781 iwork( j ) = 0
782 110 CONTINUE
783 iwork( ifst+iroffh ) = 1
784 CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
785 $ 1, desct, v, 1, 1, descv, work,
786 $ work(jw+iroffh+1), mloc,
787 $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
788 $ iwork(nsel+1), liwork-nsel, info )
789*
790* Adjust the array SELECT explicitly so that it does not
791* rely on the output of PSTRORD.
792*
793 iwork( ifst+iroffh ) = 0
794 iwork( lilst ) = 1
795 lilst = lilst + 1
796*
797* In case of a rare exchange failure, adjust the
798* pointers ILST and LILST to the current place to avoid
799* unexpected behaviors.
800*
801 IF( info.NE.0 ) THEN
802 lilst = max(info, lilst)
803 ilst = max(info, ilst)
804 END IF
805 END IF
806 ELSE
807*
808* Complex conjugate pair.
809*
810 CALL pselget( 'All', '1-Tree', elem1, t, ns+iroffh,
811 $ ns+iroffh, desct )
812 CALL pselget( 'All', '1-Tree', elem2, t, ns+iroffh,
813 $ ns+iroffh-1, desct )
814 CALL pselget( 'All', '1-Tree', elem3, t, ns+iroffh-1,
815 $ ns+iroffh, desct )
816 foo = abs( elem1 ) + sqrt( abs( elem2 ) )*
817 $ sqrt( abs( elem3 ) )
818 IF( foo.EQ.zero )
819 $ foo = abs( s )
820 CALL pselget( 'All', '1-Tree', elem1, v, 1+iroffh,
821 $ ns+iroffh, descv )
822 CALL pselget( 'All', '1-Tree', elem2, v, 1+iroffh,
823 $ ns+iroffh-1, descv )
824 IF( max( abs( s*elem1 ), abs( s*elem2 ) ).LE.
825 $ max( smlnum, ulp*foo ) ) THEN
826*
827* Deflatable.
828*
829 ns = ns - 2
830 ELSE
831*
832* Undeflatable: move them up out of the way.
833*
834 ifst = ns
835 DO 120 j = lilst, jw+iroffh
836 iwork( j ) = 0
837 120 CONTINUE
838 iwork( ifst+iroffh ) = 1
839 iwork( ifst+iroffh-1 ) = 1
840 CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
841 $ 1, desct, v, 1, 1, descv, work,
842 $ work(jw+iroffh+1), mloc,
843 $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
844 $ iwork(nsel+1), liwork-nsel, info )
845*
846* Adjust the array SELECT explicitly so that it does not
847* rely on the output of PSTRORD.
848*
849 iwork( ifst+iroffh ) = 0
850 iwork( ifst+iroffh-1 ) = 0
851 iwork( lilst ) = 1
852 iwork( lilst+1 ) = 1
853 lilst = lilst + 2
854*
855* In case of a rare exchange failure, adjust the
856* pointers ILST and LILST to the current place to avoid
857* unexpected behaviors.
858*
859 IF( info.NE.0 ) THEN
860 lilst = max(info, lilst)
861 ilst = max(info, ilst)
862 END IF
863 END IF
864 END IF
865*
866* End of inner deflation detection loop.
867*
868 GO TO 100
869 END IF
870*
871* Unlock the eigenvalues outside the local window.
872* Then undeflatable eigenvalues are moved to the proper position.
873*
874 DO 130 j = ilst, lilst0-1
875 iwork( j ) = 0
876 130 CONTINUE
877 CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
878 $ desct, v, 1, 1, descv, work, work(jw+iroffh+1),
879 $ m, work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
880 $ iwork(nsel+1), liwork-nsel, info )
881 ilst = m + 1
882*
883* In case of a rare exchange failure, adjust the pointer ILST to
884* the current place to avoid unexpected behaviors.
885*
886 IF( info.NE.0 )
887 $ ilst = max(info, ilst)
888*
889* End of outer deflation detection loop.
890*
891 GO TO 80
892 END IF
893
894*
895* Post-reordering step: copy output eigenvalues to output.
896*
897 CALL scopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
898 CALL scopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
899*
900* Check local residual for reordered AED Schur decomposition.
901*
902 resaed = 0.0
903*
904* Return to Hessenberg form.
905*
906 IF( ns.EQ.0 )
907 $ s = zero
908*
909 IF( ns.LT.jw .AND. sortgrad ) THEN
910*
911* Sorting diagonal blocks of T improves accuracy for
912* graded matrices. Bubble sort deals well with exchange
913* failures. Eigenvalues/shifts from T are also restored.
914*
915 round = 0
916 sorted = .false.
917 i = ns + 1 + iroffh
918 140 CONTINUE
919 IF( sorted )
920 $ GO TO 180
921 sorted = .true.
922 round = round + 1
923*
924 kend = i - 1
925 i = infqr + 1 + iroffh
926 IF( i.EQ.ns+iroffh ) THEN
927 k = i + 1
928 ELSE IF( si( kwtop-iroffh + i-1 ).EQ.zero ) THEN
929 k = i + 1
930 ELSE
931 k = i + 2
932 END IF
933 150 CONTINUE
934 IF( k.LE.kend ) THEN
935 IF( k.EQ.i+1 ) THEN
936 evi = abs( sr( kwtop-iroffh+i-1 ) )
937 ELSE
938 evi = abs( sr( kwtop-iroffh+i-1 ) ) +
939 $ abs( si( kwtop-iroffh+i-1 ) )
940 END IF
941*
942 IF( k.EQ.kend ) THEN
943 evk = abs( sr( kwtop-iroffh+k-1 ) )
944 ELSEIF( si( kwtop-iroffh+k-1 ).EQ.zero ) THEN
945 evk = abs( sr( kwtop-iroffh+k-1 ) )
946 ELSE
947 evk = abs( sr( kwtop-iroffh+k-1 ) ) +
948 $ abs( si( kwtop-iroffh+k-1 ) )
949 END IF
950*
951 IF( evi.GE.evk ) THEN
952 i = k
953 ELSE
954 mloc = 0
955 sorted = .false.
956 ifst = i
957 ilst = k
958 DO 160 j = 1, i-1
959 iwork( j ) = 1
960 mloc = mloc + 1
961 160 CONTINUE
962 IF( k.EQ.i+2 ) THEN
963 iwork( i ) = 0
964 iwork(i+1) = 0
965 ELSE
966 iwork( i ) = 0
967 END IF
968 IF( k.NE.kend .AND. si( kwtop-iroffh+k-1 ).NE.zero ) THEN
969 iwork( k ) = 1
970 iwork(k+1) = 1
971 mloc = mloc + 2
972 ELSE
973 iwork( k ) = 1
974 IF( k.LT.kend ) iwork(k+1) = 0
975 mloc = mloc + 1
976 END IF
977 DO 170 j = k+2, jw+iroffh
978 iwork( j ) = 0
979 170 CONTINUE
980 CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
981 $ desct, v, 1, 1, descv, work, work(jw+iroffh+1), m,
982 $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
983 $ iwork(nsel+1), liwork-nsel, ierr )
984 CALL scopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
985 CALL scopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
986 IF( ierr.EQ.0 ) THEN
987 i = ilst
988 ELSE
989 i = k
990 END IF
991 END IF
992 IF( i.EQ.kend ) THEN
993 k = i + 1
994 ELSE IF( si( kwtop-iroffh+i-1 ).EQ.zero ) THEN
995 k = i + 1
996 ELSE
997 k = i + 2
998 END IF
999 GO TO 150
1000 END IF
1001 GO TO 140
1002 180 CONTINUE
1003 END IF
1004*
1005* Restore number of rows and columns of T matrix descriptor.
1006*
1007 desct( m_ ) = nw+iroffh
1008 desct( n_ ) = nh+iroffh
1009*
1010 IF( ns.LT.jw .OR. s.EQ.zero ) THEN
1011 IF( ns.GT.1 .AND. s.NE.zero ) THEN
1012*
1013* Reflect spike back into lower triangle.
1014*
1015 rrows = numroc( ns+iroffh, nb, myrow, descv(rsrc_), nprow )
1016 rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
1017 CALL descinit( descr, ns+iroffh, 1, nb, 1, descv(rsrc_),
1018 $ descv(csrc_), ictxt, max(1, rrows), info )
1019 taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
1020 taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
1021 $ npcol )
1022 CALL descinit( desctau, 1, jw+iroffh, 1, nb, descv(rsrc_),
1023 $ descv(csrc_), ictxt, max(1, taurows), info )
1024*
1025 ir = 1
1026 itau = ir + descr( lld_ ) * rcols
1027 ipw = itau + desctau( lld_ ) * taucols
1028*
1029 CALL pslaset( 'All', ns+iroffh, 1, zero, zero, work(itau),
1030 $ 1, 1, desctau )
1031*
1032 CALL pscopy( ns, v, 1+iroffh, 1+iroffh, descv, descv(m_),
1033 $ work(ir), 1+iroffh, 1, descr, 1 )
1034 CALL pslarfg( ns, beta, 1+iroffh, 1, work(ir), 2+iroffh, 1,
1035 $ descr, 1, work(itau+iroffh) )
1036 CALL pselset( work(ir), 1+iroffh, 1, descr, one )
1037*
1038 CALL pslaset( 'Lower', jw-2, jw-2, zero, zero, t, 3+iroffh,
1039 $ 1+iroffh, desct )
1040*
1041 CALL pslarf( 'Left', ns, jw, work(ir), 1+iroffh, 1, descr,
1042 $ 1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
1043 $ desct, work( ipw ) )
1044 CALL pslarf( 'Right', ns, ns, work(ir), 1+iroffh, 1, descr,
1045 $ 1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
1046 $ desct, work( ipw ) )
1047 CALL pslarf( 'Right', jw, ns, work(ir), 1+iroffh, 1, descr,
1048 $ 1, work(itau+iroffh), v, 1+iroffh, 1+iroffh,
1049 $ descv, work( ipw ) )
1050*
1051 itau = 1
1052 ipw = itau + desctau( lld_ ) * taucols
1053 CALL psgehrd( jw+iroffh, 1+iroffh, ns+iroffh, t, 1, 1,
1054 $ desct, work(itau), work( ipw ), lwork-ipw+1, info )
1055 END IF
1056*
1057* Copy updated reduced window into place.
1058*
1059 IF( kwtop.GT.1 ) THEN
1060 CALL pselget( 'All', '1-Tree', elem, v, 1+iroffh,
1061 $ 1+iroffh, descv )
1062 CALL pselset( h, kwtop, kwtop-1, desch, s*elem )
1063 END IF
1064 CALL pslacpy( 'Upper', jw-1, jw-1, t, 1+iroffh+1, 1+iroffh,
1065 $ desct, h, kwtop+1, kwtop, desch )
1066 CALL pslacpy( 'All', 1, jw, t, 1+iroffh, 1+iroffh, desct, h,
1067 $ kwtop, kwtop, desch )
1068 CALL pslacpy( 'All', jw-1, 1, t, 1+iroffh+1, 1+iroffh+jw-1,
1069 $ desct, h, kwtop+1, kwtop+jw-1, desch )
1070*
1071* Accumulate orthogonal matrix in order to update
1072* H and Z, if requested.
1073*
1074 IF( ns.GT.1 .AND. s.NE.zero ) THEN
1075 CALL psormhr( 'Right', 'No', jw+iroffh, ns+iroffh, 1+iroffh,
1076 $ ns+iroffh, t, 1, 1, desct, work(itau), v, 1,
1077 $ 1, descv, work( ipw ), lwork-ipw+1, info )
1078 END IF
1079*
1080* Update vertical slab in H.
1081*
1082 IF( wantt ) THEN
1083 ltop = 1
1084 ELSE
1085 ltop = ktop
1086 END IF
1087 kln = max( 0, kwtop-ltop )
1088 iroffhh = mod( ltop-1, nb )
1089 icoffhh = mod( kwtop-1, nb )
1090 hhrsrc = indxg2p( ltop, nb, myrow, desch(rsrc_), nprow )
1091 hhcsrc = indxg2p( kwtop, nb, mycol, desch(csrc_), npcol )
1092 hhrows = numroc( kln+iroffhh, nb, myrow, hhrsrc, nprow )
1093 hhcols = numroc( jw+icoffhh, nb, mycol, hhcsrc, npcol )
1094 CALL descinit( deschh, kln+iroffhh, jw+icoffhh, nb, nb,
1095 $ hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
1096 CALL psgemm( 'No', 'No', kln, jw, jw, one, h, ltop,
1097 $ kwtop, desch, v, 1+iroffh, 1+iroffh, descv, zero,
1098 $ work, 1+iroffhh, 1+icoffhh, deschh )
1099 CALL pslacpy( 'All', kln, jw, work, 1+iroffhh, 1+icoffhh,
1100 $ deschh, h, ltop, kwtop, desch )
1101*
1102* Update horizontal slab in H.
1103*
1104 IF( wantt ) THEN
1105 kln = n-kbot
1106 iroffhh = mod( kwtop-1, nb )
1107 icoffhh = mod( kbot, nb )
1108 hhrsrc = indxg2p( kwtop, nb, myrow, desch(rsrc_), nprow )
1109 hhcsrc = indxg2p( kbot+1, nb, mycol, desch(csrc_), npcol )
1110 hhrows = numroc( jw+iroffhh, nb, myrow, hhrsrc, nprow )
1111 hhcols = numroc( kln+icoffhh, nb, mycol, hhcsrc, npcol )
1112 CALL descinit( deschh, jw+iroffhh, kln+icoffhh, nb, nb,
1113 $ hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
1114 CALL psgemm( 'Tr', 'No', jw, kln, jw, one, v,
1115 $ 1+iroffh, 1+iroffh, descv, h, kwtop, kbot+1,
1116 $ desch, zero, work, 1+iroffhh, 1+icoffhh, deschh )
1117 CALL pslacpy( 'All', jw, kln, work, 1+iroffhh, 1+icoffhh,
1118 $ deschh, h, kwtop, kbot+1, desch )
1119 END IF
1120*
1121* Update vertical slab in Z.
1122*
1123 IF( wantz ) THEN
1124 kln = ihiz-iloz+1
1125 iroffzz = mod( iloz-1, nb )
1126 icoffzz = mod( kwtop-1, nb )
1127 zzrsrc = indxg2p( iloz, nb, myrow, descz(rsrc_), nprow )
1128 zzcsrc = indxg2p( kwtop, nb, mycol, descz(csrc_), npcol )
1129 zzrows = numroc( kln+iroffzz, nb, myrow, zzrsrc, nprow )
1130 zzcols = numroc( jw+icoffzz, nb, mycol, zzcsrc, npcol )
1131 CALL descinit( desczz, kln+iroffzz, jw+icoffzz, nb, nb,
1132 $ zzrsrc, zzcsrc, ictxt, max(1, zzrows), ierr )
1133 CALL psgemm( 'No', 'No', kln, jw, jw, one, z, iloz,
1134 $ kwtop, descz, v, 1+iroffh, 1+iroffh, descv,
1135 $ zero, work, 1+iroffzz, 1+icoffzz, desczz )
1136 CALL pslacpy( 'All', kln, jw, work, 1+iroffzz, 1+icoffzz,
1137 $ desczz, z, iloz, kwtop, descz )
1138 END IF
1139 END IF
1140*
1141* Return the number of deflations (ND) and the number of shifts (NS).
1142* (Subtracting INFQR from the spike length takes care of the case of
1143* a rare QR failure while calculating eigenvalues of the deflation
1144* window.)
1145*
1146 nd = jw - ns
1147 ns = ns - infqr
1148*
1149* Return optimal workspace.
1150*
1151 work( 1 ) = float( lwkopt )
1152 iwork( 1 ) = ilwkopt + nsel
1153*
1154* End of PSLAQR3
1155*
1156 END
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2p.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
integer function pilaenvx(ictxt, ispec, name, opts, n1, n2, n3, n4)
Definition pilaenvx.f:3
subroutine pslaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition psblastst.f:6863
subroutine pselget(scope, top, alpha, a, ia, ja, desca)
Definition pselget.f:2
subroutine pselset(a, ia, ja, desca, alpha)
Definition pselset.f:2
subroutine psgehrd(n, ilo, ihi, a, ia, ja, desca, tau, work, lwork, info)
Definition psgehrd.f:3
subroutine pslacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pslacpy.f:3
subroutine pslamve(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb, dwork)
Definition pslamve.f:3
real function pslange(norm, m, n, a, ia, ja, desca, work)
Definition pslange.f:3
recursive subroutine pslaqr0(wantt, wantz, n, ilo, ihi, h, desch, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, liwork, info, reclevel)
Definition pslaqr0.f:4
recursive subroutine pslaqr1(wantt, wantz, n, ilo, ihi, a, desca, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, ilwork, info)
Definition pslaqr1.f:5
recursive subroutine pslaqr3(wantt, wantz, n, ktop, kbot, nw, h, desch, iloz, ihiz, z, descz, ns, nd, sr, si, v, descv, nh, t, desct, nv, wv, descw, work, lwork, iwork, liwork, reclevel)
Definition pslaqr3.f:6
subroutine pslarf(side, m, n, v, iv, jv, descv, incv, tau, c, ic, jc, descc, work)
Definition pslarf.f:3
subroutine pslarfg(n, alpha, iax, jax, x, ix, jx, descx, incx, tau)
Definition pslarfg.f:3
subroutine psormhr(side, trans, m, n, ilo, ihi, a, ia, ja, desca, tau, c, ic, jc, descc, work, lwork, info)
Definition psormhr.f:3
subroutine psrot(n, x, ix, jx, descx, incx, y, iy, jy, descy, incy, cs, sn, work, lwork, info)
Definition psrot.f:3
subroutine pstrord(compq, select, para, n, t, it, jt, desct, q, iq, jq, descq, wr, wi, m, work, lwork, iwork, liwork, info)
Definition pstrord.f:4
real function slamch(cmach)
Definition tools.f:867