ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pslaqr3.f
Go to the documentation of this file.
1  RECURSIVE SUBROUTINE pslaqr3( WANTT, WANTZ, N, KTOP, KBOT, NW, H,
2  $ DESCH, ILOZ, IHIZ, Z, DESCZ, NS, ND,
3  $ SR, SI, V, DESCV, NH, T, DESCT, NV,
4  $ WV, DESCW, WORK, LWORK, IWORK,
5  $ LIWORK, RECLEVEL )
6 *
7 * Contribution from the Department of Computing Science and HPC2N,
8 * Umea University, Sweden
9 *
10 * -- ScaLAPACK auxiliary routine (version 2.0.1) --
11 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
12 * Univ. of Colorado Denver and University of California, Berkeley.
13 * January, 2012
14 *
15  IMPLICIT NONE
16 *
17 * .. Scalar Arguments ..
18  INTEGER ihiz, iloz, kbot, ktop, lwork, n, nd, nh, ns,
19  $ nv, nw, liwork, reclevel
20  LOGICAL wantt, wantz
21 * ..
22 * .. Array Arguments ..
23  INTEGER desch( * ), descz( * ), desct( * ), descv( * ),
24  $ descw( * ), iwork( * )
25  REAL h( * ), si( kbot ), sr( kbot ), t( * ),
26  $ v( * ), work( * ), wv( * ),
27  $ z( * )
28 * ..
29 *
30 * Purpose
31 * =======
32 *
33 * Aggressive early deflation:
34 *
35 * This subroutine accepts as input an upper Hessenberg matrix H and
36 * performs an orthogonal similarity transformation designed to detect
37 * and deflate fully converged eigenvalues from a trailing principal
38 * submatrix. On output H has been overwritten by a new Hessenberg
39 * matrix that is a perturbation of an orthogonal similarity
40 * transformation of H. It is to be hoped that the final version of H
41 * has many zero subdiagonal entries.
42 *
43 * Notes
44 * =====
45 *
46 * Each global data object is described by an associated description
47 * vector. This vector stores the information required to establish
48 * the mapping between an object element and its corresponding process
49 * and memory location.
50 *
51 * Let A be a generic term for any 2D block cyclicly distributed array.
52 * Such a global array has an associated description vector DESCA.
53 * In the following comments, the character _ should be read as
54 * "of the global array".
55 *
56 * NOTATION STORED IN EXPLANATION
57 * --------------- -------------- --------------------------------------
58 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
59 * DTYPE_A = 1.
60 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
61 * the BLACS process grid A is distribu-
62 * ted over. The context itself is glo-
63 * bal, but the handle (the integer
64 * value) may vary.
65 * M_A (global) DESCA( M_ ) The number of rows in the global
66 * array A.
67 * N_A (global) DESCA( N_ ) The number of columns in the global
68 * array A.
69 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
70 * the rows of the array.
71 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
72 * the columns of the array.
73 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
74 * row of the array A is distributed.
75 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
76 * first column of the array A is
77 * distributed.
78 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
79 * array. LLD_A >= MAX(1,LOCr(M_A)).
80 *
81 * Let K be the number of rows or columns of a distributed matrix,
82 * and assume that its process grid has dimension p x q.
83 * LOCr( K ) denotes the number of elements of K that a process
84 * would receive if K were distributed over the p processes of its
85 * process column.
86 * Similarly, LOCc( K ) denotes the number of elements of K that a
87 * process would receive if K were distributed over the q processes of
88 * its process row.
89 * The values of LOCr() and LOCc() may be determined via a call to the
90 * ScaLAPACK tool function, NUMROC:
91 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
92 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
93 * An upper bound for these quantities may be computed by:
94 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
95 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
96 *
97 * Arguments
98 * =========
99 *
100 * WANTT (global input) LOGICAL
101 * If .TRUE., then the Hessenberg matrix H is fully updated
102 * so that the quasi-triangular Schur factor may be
103 * computed (in cooperation with the calling subroutine).
104 * If .FALSE., then only enough of H is updated to preserve
105 * the eigenvalues.
106 *
107 * WANTZ (global input) LOGICAL
108 * If .TRUE., then the orthogonal matrix Z is updated so
109 * so that the orthogonal Schur factor may be computed
110 * (in cooperation with the calling subroutine).
111 * If .FALSE., then Z is not referenced.
112 *
113 * N (global input) INTEGER
114 * The order of the matrix H and (if WANTZ is .TRUE.) the
115 * order of the orthogonal matrix Z.
116 *
117 * KTOP (global input) INTEGER
118 * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
119 * KBOT and KTOP together determine an isolated block
120 * along the diagonal of the Hessenberg matrix.
121 *
122 * KBOT (global input) INTEGER
123 * It is assumed without a check that either
124 * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
125 * determine an isolated block along the diagonal of the
126 * Hessenberg matrix.
127 *
128 * NW (global input) INTEGER
129 * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
130 *
131 * H (local input/output) REAL array, dimension
132 * (DESCH(LLD_),*)
133 * On input the initial N-by-N section of H stores the
134 * Hessenberg matrix undergoing aggressive early deflation.
135 * On output H has been transformed by an orthogonal
136 * similarity transformation, perturbed, and the returned
137 * to Hessenberg form that (it is to be hoped) has some
138 * zero subdiagonal entries.
139 *
140 * DESCH (global and local input) INTEGER array of dimension DLEN_.
141 * The array descriptor for the distributed matrix H.
142 *
143 * ILOZ (global input) INTEGER
144 * IHIZ (global input) INTEGER
145 * Specify the rows of Z to which transformations must be
146 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
147 *
148 * Z (input/output) REAL array, dimension
149 * (DESCH(LLD_),*)
150 * IF WANTZ is .TRUE., then on output, the orthogonal
151 * similarity transformation mentioned above has been
152 * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
153 * If WANTZ is .FALSE., then Z is unreferenced.
154 *
155 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
156 * The array descriptor for the distributed matrix Z.
157 *
158 * NS (global output) INTEGER
159 * The number of unconverged (ie approximate) eigenvalues
160 * returned in SR and SI that may be used as shifts by the
161 * calling subroutine.
162 *
163 * ND (global output) INTEGER
164 * The number of converged eigenvalues uncovered by this
165 * subroutine.
166 *
167 * SR (global output) REAL array, dimension KBOT
168 * SI (global output) REAL array, dimension KBOT
169 * On output, the real and imaginary parts of approximate
170 * eigenvalues that may be used for shifts are stored in
171 * SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
172 * SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
173 * The real and imaginary parts of converged eigenvalues
174 * are stored in SR(KBOT-ND+1) through SR(KBOT) and
175 * SI(KBOT-ND+1) through SI(KBOT), respectively.
176 *
177 * V (global workspace) REAL array, dimension
178 * (DESCV(LLD_),*)
179 * An NW-by-NW distributed work array.
180 *
181 * DESCV (global and local input) INTEGER array of dimension DLEN_.
182 * The array descriptor for the distributed matrix V.
183 *
184 * NH (input) INTEGER scalar
185 * The number of columns of T. NH.GE.NW.
186 *
187 * T (global workspace) REAL array, dimension
188 * (DESCV(LLD_),*)
189 *
190 * DESCT (global and local input) INTEGER array of dimension DLEN_.
191 * The array descriptor for the distributed matrix T.
192 *
193 * NV (global input) INTEGER
194 * The number of rows of work array WV available for
195 * workspace. NV.GE.NW.
196 *
197 * WV (global workspace) REAL array, dimension
198 * (DESCW(LLD_),*)
199 *
200 * DESCW (global and local input) INTEGER array of dimension DLEN_.
201 * The array descriptor for the distributed matrix WV.
202 *
203 * WORK (local workspace) REAL array, dimension LWORK.
204 * On exit, WORK(1) is set to an estimate of the optimal value
205 * of LWORK for the given values of N, NW, KTOP and KBOT.
206 *
207 * LWORK (local input) INTEGER
208 * The dimension of the work array WORK. LWORK = 2*NW
209 * suffices, but greater efficiency may result from larger
210 * values of LWORK.
211 *
212 * If LWORK = -1, then a workspace query is assumed; PSLAQR3
213 * only estimates the optimal workspace size for the given
214 * values of N, NW, KTOP and KBOT. The estimate is returned
215 * in WORK(1). No error message related to LWORK is issued
216 * by XERBLA. Neither H nor Z are accessed.
217 *
218 * IWORK (local workspace) INTEGER array, dimension (LIWORK)
219 *
220 * LIWORK (local input) INTEGER
221 * The length of the workspace array IWORK
222 *
223 * ================================================================
224 * Based on contributions by
225 * Robert Granat and Meiyue Shao,
226 * Department of Computing Science and HPC2N,
227 * Umea University, Sweden
228 *
229 * ================================================================
230 * .. Parameters ..
231  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
232  $ lld_, mb_, m_, nb_, n_, rsrc_
233  INTEGER recmax
234  LOGICAL sortgrad
235  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
236  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
237  $ rsrc_ = 7, csrc_ = 8, lld_ = 9, recmax = 3,
238  $ sortgrad = .false. )
239  REAL zero, one
240  PARAMETER ( zero = 0.0, one = 1.0 )
241 * ..
242 * .. Local Scalars ..
243  REAL aa, bb, beta, cc, cs, dd, evi, evk, foo, s,
244  $ safmax, safmin, smlnum, sn, tau, ulp,
245  $ elem, elem1, elem2, elem3, r1, anorm, rnorm,
246  $ resaed
247  INTEGER i, ifst, ilst, info, infqr, j, jw, k, kcol,
248  $ kend, kln, krow, kwtop, ltop, lwk1, lwk2, lwk3,
249  $ lwkopt, nmin, lldh, lldz, lldt, lldv, lldwv,
250  $ ictxt, nprow, nmax, npcol, myrow, mycol, nb,
251  $ iroffh, m, rcols, taurows, rrows, taucols,
252  $ itau, ir, ipw, nprocs, mloc, iroffhh,
253  $ icoffhh, hhrsrc, hhcsrc, hhrows, hhcols,
254  $ iroffzz, icoffzz, zzrsrc, zzcsrc, zzrows,
255  $ zzcols, ierr, tzrows0, tzcols0, ierr0, ipt0,
256  $ ipz0, ipw0, nb2, round, lilst, kk, lilst0,
257  $ iwrk1, rsrc, csrc, lwk4, lwk5, iwrk2, lwk6,
258  $ lwk7, lwk8, ilwkopt, tzrows, tzcols, nsel,
259  $ npmin, ictxt_new, myrow_new, mycol_new
260  LOGICAL bulge, sorted, lquery
261 * ..
262 * .. Local Arrays ..
263  INTEGER par( 6 ), descr( dlen_ ),
264  $ desctau( dlen_ ), deschh( dlen_ ),
265  $ desczz( dlen_ ), desctz0( dlen_ ),
266  $ pmap( 64*64 )
267  REAL ddum( 1 )
268 * ..
269 * .. External Functions ..
270  REAL slamch, pslange
271  INTEGER pilaenvx, numroc, indxg2p, iceil, blacs_pnum
272  EXTERNAL slamch, pilaenvx, numroc, indxg2p, pslange,
273  $ iceil, blacs_pnum
274 * ..
275 * .. External Subroutines ..
276  EXTERNAL pscopy, psgehrd, psgemm, slabad, pslacpy,
277  $ pslaqr1, slanv2, pslaqr0, pslarf, pslarfg,
279  $ pslamve, blacs_gridinfo, blacs_gridmap,
280  $ blacs_gridexit, psgemr2d
281 * ..
282 * .. Intrinsic Functions ..
283  INTRINSIC abs, float, int, max, min, sqrt
284 * ..
285 * .. Executable Statements ..
286  ictxt = desch( ctxt_ )
287  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
288  nprocs = nprow*npcol
289 *
290 * Extract local leading dimensions, blockfactors, offset for
291 * keeping the alignment requirements and size of deflation window.
292 *
293  lldh = desch( lld_ )
294  lldz = descz( lld_ )
295  lldt = desct( lld_ )
296  lldv = descv( lld_ )
297  lldwv = descw( lld_ )
298  nb = desch( mb_ )
299  iroffh = mod( ktop - 1, nb )
300  jw = min( nw, kbot-ktop+1 )
301  nsel = nb+jw
302 *
303 * Extract environment variables for parallel eigenvalue reordering.
304 *
305  par(1) = pilaenvx(ictxt, 17, 'PSLAQR3', 'SV', jw, nb, -1, -1)
306  par(2) = pilaenvx(ictxt, 18, 'PSLAQR3', 'SV', jw, nb, -1, -1)
307  par(3) = pilaenvx(ictxt, 19, 'PSLAQR3', 'SV', jw, nb, -1, -1)
308  par(4) = pilaenvx(ictxt, 20, 'PSLAQR3', 'SV', jw, nb, -1, -1)
309  par(5) = pilaenvx(ictxt, 21, 'PSLAQR3', 'SV', jw, nb, -1, -1)
310  par(6) = pilaenvx(ictxt, 22, 'PSLAQR3', 'SV', jw, nb, -1, -1)
311 *
312 * Check if workspace query.
313 *
314  lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
315 *
316 * Estimate optimal workspace.
317 *
318  IF( jw.LE.2 ) THEN
319  lwkopt = 1
320  ELSE
321 *
322 * Workspace query calls to PSGEHRD and PSORMHR.
323 *
324  taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
325  taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
326  $ npcol )
327  CALL psgehrd( jw, 1, jw, t, 1, 1, desct, work, work, -1,
328  $ info )
329  lwk1 = int( work( 1 ) ) + taurows*taucols
330 *
331 * Workspace query call to PSORMHR.
332 *
333  CALL psormhr( 'Right', 'No', jw, jw, 1, jw, t, 1, 1, desct,
334  $ work, v, 1, 1, descv, work, -1, info )
335  lwk2 = int( work( 1 ) )
336 *
337 * Workspace query call to PSLAQR0.
338 *
339  nmin = pilaenvx( ictxt, 12, 'PSLAQR3', 'SV', jw, 1, jw, lwork )
340  nmax = ( n-1 ) / 3
341  IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
342  $ .AND. reclevel.LT.recmax ) THEN
343  CALL pslaqr0( .true., .true., jw+iroffh, 1+iroffh,
344  $ jw+iroffh, t, desct, sr, si, 1, jw, v, descv,
345  $ work, -1, iwork, liwork-nsel, infqr,
346  $ reclevel+1 )
347  lwk3 = int( work( 1 ) )
348  iwrk1 = iwork( 1 )
349  ELSE
350  rsrc = desct( rsrc_ )
351  csrc = desct( csrc_ )
352  desct( rsrc_ ) = 0
353  desct( csrc_ ) = 0
354  CALL pslaqr1( .true., .true., jw+iroffh, 1, jw+iroffh, t,
355  $ desct, sr, si, 1, jw+iroffh, v, descv, work, -1,
356  $ iwork, liwork-nsel, infqr )
357  desct( rsrc_ ) = rsrc
358  desct( csrc_ ) = csrc
359  lwk3 = int( work( 1 ) )
360  iwrk1 = iwork( 1 )
361  END IF
362 *
363 * Workspace in case of alignment problems.
364 *
365  tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
366  tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
367  lwk4 = 2 * tzrows0*tzcols0
368 *
369 * Workspace check for reordering.
370 *
371  CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
372  $ desct, v, 1, 1, descv, ddum, ddum, mloc, work, -1,
373  $ iwork, liwork-nsel, info )
374  lwk5 = int( work( 1 ) )
375  iwrk2 = iwork( 1 )
376 *
377 * Extra workspace for reflecting back spike
378 * (workspace for PSLARF approximated for simplicity).
379 *
380  rrows = numroc( n+iroffh, nb, myrow, descv(rsrc_), nprow )
381  rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
382  lwk6 = rrows*rcols + taurows*taucols +
383  $ 2*iceil(iceil(jw+iroffh,nb),nprow)*nb
384  $ *iceil(iceil(jw+iroffh,nb),npcol)*nb
385 *
386 * Extra workspace needed by PBLAS update calls
387 * (also estimated for simplicity).
388 *
389  lwk7 = max( iceil(iceil(jw,nb),nprow)*nb *
390  $ iceil(iceil(n-kbot,nb),npcol)*nb,
391  $ iceil(iceil(ihiz-iloz+1,nb),nprow)*nb *
392  $ iceil(iceil(jw,nb),npcol)*nb,
393  $ iceil(iceil(kbot-jw,nb),nprow)*nb *
394  $ iceil(iceil(jw,nb),npcol)*nb )
395 *
396 * Residual check workspace.
397 *
398  tzrows = numroc( jw+iroffh, nb, myrow, desct(rsrc_), nprow )
399  tzcols = numroc( jw+iroffh, nb, mycol, desct(csrc_), npcol )
400  lwk8 = 2*tzrows*tzcols
401 *
402 * Optimal workspace.
403 *
404  lwkopt = max( lwk1, lwk2, lwk3+lwk4, lwk5, lwk6, lwk7, lwk8 )
405  ilwkopt = max( iwrk1, iwrk2 )
406  END IF
407 *
408 * Quick return in case of workspace query.
409 *
410  work( 1 ) = float( lwkopt )
411 *
412 * IWORK(1:NSEL) is used as the array SELECT for PSTRORD.
413 *
414  iwork( 1 ) = ilwkopt + nsel
415  IF( lquery )
416  $ RETURN
417 *
418 * Nothing to do for an empty active block ...
419  ns = 0
420  nd = 0
421  IF( ktop.GT.kbot )
422  $ RETURN
423 * ... nor for an empty deflation window.
424 *
425  IF( nw.LT.1 )
426  $ RETURN
427 *
428 * Machine constants.
429 *
430  safmin = slamch( 'SAFE MINIMUM' )
431  safmax = one / safmin
432  CALL slabad( safmin, safmax )
433  ulp = slamch( 'PRECISION' )
434  smlnum = safmin*( float( n ) / ulp )
435 *
436 * Setup deflation window.
437 *
438  jw = min( nw, kbot-ktop+1 )
439  kwtop = kbot - jw + 1
440  IF( kwtop.EQ.ktop ) THEN
441  s = zero
442  ELSE
443  CALL pselget( 'All', '1-Tree', s, h, kwtop, kwtop-1, desch )
444  END IF
445 *
446  IF( kbot.EQ.kwtop ) THEN
447 *
448 * 1-by-1 deflation window: not much to do.
449 *
450  CALL pselget( 'All', '1-Tree', sr( kwtop ), h, kwtop, kwtop,
451  $ desch )
452  si( kwtop ) = zero
453  ns = 1
454  nd = 0
455  IF( abs( s ).LE.max( smlnum, ulp*abs( sr( kwtop ) ) ) )
456  $ THEN
457  ns = 0
458  nd = 1
459  IF( kwtop.GT.ktop )
460  $ CALL pselset( h, kwtop, kwtop-1 , desch, zero )
461  END IF
462  RETURN
463  END IF
464 *
465  IF( kwtop.EQ.ktop .AND. kbot-kwtop.EQ.1 ) THEN
466 *
467 * 2-by-2 deflation window: a little more to do.
468 *
469  CALL pselget( 'All', '1-Tree', aa, h, kwtop, kwtop, desch )
470  CALL pselget( 'All', '1-Tree', bb, h, kwtop, kwtop+1, desch )
471  CALL pselget( 'All', '1-Tree', cc, h, kwtop+1, kwtop, desch )
472  CALL pselget( 'All', '1-Tree', dd, h, kwtop+1, kwtop+1, desch )
473  CALL slanv2( aa, bb, cc, dd, sr(kwtop), si(kwtop),
474  $ sr(kwtop+1), si(kwtop+1), cs, sn )
475  ns = 0
476  nd = 2
477  IF( cc.EQ.zero ) THEN
478  i = kwtop
479  IF( i+2.LE.n .AND. wantt )
480  $ CALL psrot( n-i-1, h, i, i+2, desch, desch(m_), h, i+1,
481  $ i+2, desch, desch(m_), cs, sn, work, lwork, info )
482  IF( i.GT.1 )
483  $ CALL psrot( i-1, h, 1, i, desch, 1, h, 1, i+1, desch, 1,
484  $ cs, sn, work, lwork, info )
485  IF( wantz )
486  $ CALL psrot( ihiz-iloz+1, z, iloz, i, descz, 1, z, iloz,
487  $ i+1, descz, 1, cs, sn, work, lwork, info )
488  CALL pselset( h, i, i, desch, aa )
489  CALL pselset( h, i, i+1, desch, bb )
490  CALL pselset( h, i+1, i, desch, cc )
491  CALL pselset( h, i+1, i+1, desch, dd )
492  END IF
493  work( 1 ) = float( lwkopt )
494  RETURN
495  END IF
496 *
497 * Calculate new value for IROFFH in case deflation window
498 * was adjusted.
499 *
500  iroffh = mod( kwtop - 1, nb )
501 *
502 * Adjust number of rows and columns of T matrix descriptor
503 * to prepare for call to PDBTRORD.
504 *
505  desct( m_ ) = jw+iroffh
506  desct( n_ ) = jw+iroffh
507 *
508 * Convert to spike-triangular form. (In case of a rare QR failure,
509 * this routine continues to do aggressive early deflation using that
510 * part of the deflation window that converged using INFQR here and
511 * there to keep track.)
512 *
513 * Copy the trailing submatrix to the working space.
514 *
515  CALL pslaset( 'All', iroffh, jw+iroffh, zero, one, t, 1, 1,
516  $ desct )
517  CALL pslaset( 'All', jw, iroffh, zero, zero, t, 1+iroffh, 1,
518  $ desct )
519  CALL pslacpy( 'All', 1, jw, h, kwtop, kwtop, desch, t, 1+iroffh,
520  $ 1+iroffh, desct )
521  CALL pslacpy( 'Upper', jw-1, jw-1, h, kwtop+1, kwtop, desch, t,
522  $ 1+iroffh+1, 1+iroffh, desct )
523  IF( jw.GT.2 )
524  $ CALL pslaset( 'Lower', jw-2, jw-2, zero, zero, t, 1+iroffh+2,
525  $ 1+iroffh, desct )
526  CALL pslacpy( 'All', jw-1, 1, h, kwtop+1, kwtop+jw-1, desch, t,
527  $ 1+iroffh+1, 1+iroffh+jw-1, desct )
528 *
529 * Initialize the working orthogonal matrix.
530 *
531  CALL pslaset( 'All', jw+iroffh, jw+iroffh, zero, one, v, 1, 1,
532  $ descv )
533 *
534 * Compute the Schur form of T.
535 *
536  npmin = pilaenvx( ictxt, 23, 'PSLAQR3', 'SV', jw, nb, nprow,
537  $ npcol )
538  nmin = pilaenvx( ictxt, 12, 'PSLAQR3', 'SV', jw, 1, jw, lwork )
539  nmax = ( n-1 ) / 3
540  IF( min(nprow, npcol).LE.npmin+1 .OR. reclevel.GE.1 ) THEN
541 *
542 * The AED window is large enough.
543 * Compute the Schur decomposition with all processors.
544 *
545  IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
546  $ .AND. reclevel.LT.recmax ) THEN
547  CALL pslaqr0( .true., .true., jw+iroffh, 1+iroffh,
548  $ jw+iroffh, t, desct, sr( kwtop-iroffh ),
549  $ si( kwtop-iroffh ), 1+iroffh, jw+iroffh, v, descv,
550  $ work, lwork, iwork(nsel+1), liwork-nsel, infqr,
551  $ reclevel+1 )
552  ELSE
553  IF( desct(rsrc_).EQ.0 .AND. desct(csrc_).EQ.0 ) THEN
554  IF( jw+iroffh.GT.desct( mb_ ) ) THEN
555  CALL pslaqr1( .true., .true., jw+iroffh, 1,
556  $ jw+iroffh, t, desct, sr( kwtop-iroffh ),
557  $ si( kwtop-iroffh ), 1, jw+iroffh, v,
558  $ descv, work, lwork, iwork(nsel+1), liwork-nsel,
559  $ infqr )
560  ELSE
561  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
562  CALL slahqr( .true., .true., jw+iroffh, 1+iroffh,
563  $ jw+iroffh, t, desct(lld_),
564  $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
565  $ 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
566  ELSE
567  infqr = 0
568  END IF
569  IF( nprocs.GT.1 )
570  $ CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
571  $ 1, -1, -1, -1, -1, -1 )
572  END IF
573  ELSEIF( jw+iroffh.LE.desct( mb_ ) ) THEN
574  IF( myrow.EQ.desct(rsrc_) .AND. mycol.EQ.desct(csrc_) )
575  $ THEN
576  CALL slahqr( .true., .true., jw+iroffh, 1+iroffh,
577  $ jw+iroffh, t, desct(lld_),
578  $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
579  $ 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
580  ELSE
581  infqr = 0
582  END IF
583  IF( nprocs.GT.1 )
584  $ CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
585  $ 1, -1, -1, -1, -1, -1 )
586  ELSE
587  tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
588  tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
589  CALL descinit( desctz0, jw+iroffh, jw+iroffh, nb, nb, 0,
590  $ 0, ictxt, max(1,tzrows0), ierr0 )
591  ipt0 = 1
592  ipz0 = ipt0 + max(1,tzrows0)*tzcols0
593  ipw0 = ipz0 + max(1,tzrows0)*tzcols0
594  CALL pslamve( 'All', jw+iroffh, jw+iroffh, t, 1, 1,
595  $ desct, work(ipt0), 1, 1, desctz0, work(ipw0) )
596  CALL pslaset( 'All', jw+iroffh, jw+iroffh, zero, one,
597  $ work(ipz0), 1, 1, desctz0 )
598  CALL pslaqr1( .true., .true., jw+iroffh, 1,
599  $ jw+iroffh, work(ipt0), desctz0,
600  $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
601  $ 1, jw+iroffh, work(ipz0),
602  $ desctz0, work(ipw0), lwork-ipw0+1, iwork(nsel+1),
603  $ liwork-nsel, infqr )
604  CALL pslamve( 'All', jw+iroffh, jw+iroffh, work(ipt0), 1,
605  $ 1, desctz0, t, 1, 1, desct, work(ipw0) )
606  CALL pslamve( 'All', jw+iroffh, jw+iroffh, work(ipz0), 1,
607  $ 1, desctz0, v, 1, 1, descv, work(ipw0) )
608  END IF
609  END IF
610  ELSE
611 *
612 * The AED window is too small.
613 * Redistribute the AED window to a subgrid
614 * and do the computation on the subgrid.
615 *
616  ictxt_new = ictxt
617  DO 20 i = 0, npmin-1
618  DO 10 j = 0, npmin-1
619  pmap( j+1+i*npmin ) = blacs_pnum( ictxt, i, j )
620  10 CONTINUE
621  20 CONTINUE
622  CALL blacs_gridmap( ictxt_new, pmap, npmin, npmin, npmin )
623  CALL blacs_gridinfo( ictxt_new, npmin, npmin, myrow_new,
624  $ mycol_new )
625  IF( myrow.GE.npmin .OR. mycol.GE.npmin ) ictxt_new = -1
626  IF( ictxt_new.GE.0 ) THEN
627  tzrows0 = numroc( jw, nb, myrow_new, 0, npmin )
628  tzcols0 = numroc( jw, nb, mycol_new, 0, npmin )
629  CALL descinit( desctz0, jw, jw, nb, nb, 0,
630  $ 0, ictxt_new, max(1,tzrows0), ierr0 )
631  ipt0 = 1
632  ipz0 = ipt0 + max(1,tzrows0)*max(1,tzcols0)
633  ipw0 = ipz0 + max(1,tzrows0)*max(1,tzcols0)
634  ELSE
635  ipt0 = 1
636  ipz0 = 2
637  ipw0 = 3
638  desctz0( ctxt_ ) = -1
639  infqr = 0
640  END IF
641  CALL psgemr2d( jw, jw, t, 1+iroffh, 1+iroffh, desct,
642  $ work(ipt0), 1, 1, desctz0, ictxt )
643  IF( ictxt_new.GE.0 ) THEN
644  CALL pslaset( 'All', jw, jw, zero, one, work(ipz0), 1, 1,
645  $ desctz0 )
646  nmin = pilaenvx( ictxt_new, 12, 'PSLAQR3', 'SV', jw, 1, jw,
647  $ lwork )
648  IF( jw.GT.nmin .AND. jw.LE.nmax .AND. reclevel.LT.1 ) THEN
649  CALL pslaqr0( .true., .true., jw, 1, jw, work(ipt0),
650  $ desctz0, sr( kwtop ), si( kwtop ), 1, jw,
651  $ work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
652  $ iwork(nsel+1), liwork-nsel, infqr,
653  $ reclevel+1 )
654  ELSE
655  CALL pslaqr1( .true., .true., jw, 1, jw, work(ipt0),
656  $ desctz0, sr( kwtop ), si( kwtop ), 1, jw,
657  $ work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
658  $ iwork(nsel+1), liwork-nsel, infqr )
659  END IF
660  END IF
661  CALL psgemr2d( jw, jw, work(ipt0), 1, 1, desctz0, t, 1+iroffh,
662  $ 1+iroffh, desct, ictxt )
663  CALL psgemr2d( jw, jw, work(ipz0), 1, 1, desctz0, v, 1+iroffh,
664  $ 1+iroffh, descv, ictxt )
665  IF( ictxt_new.GE.0 )
666  $ CALL blacs_gridexit( ictxt_new )
667  IF( myrow+mycol.GT.0 ) THEN
668  DO 40 j = 0, jw-1
669  sr( kwtop+j ) = zero
670  si( kwtop+j ) = zero
671  40 CONTINUE
672  END IF
673  CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr, 1, -1, -1,
674  $ -1, -1, -1 )
675  CALL sgsum2d( ictxt, 'All', ' ', jw, 1, sr(kwtop), jw, -1, -1 )
676  CALL sgsum2d( ictxt, 'All', ' ', jw, 1, si(kwtop), jw, -1, -1 )
677  END IF
678 *
679 * Adjust INFQR for offset from block border in submatrices.
680 *
681  IF( infqr.NE.0 )
682  $ infqr = infqr - iroffh
683 *
684 * PSTRORD needs a clean margin near the diagonal.
685 *
686  DO 50 j = 1, jw - 3
687  CALL pselset( t, j+2, j, desct, zero )
688  CALL pselset( t, j+3, j, desct, zero )
689  50 CONTINUE
690  IF( jw.GT.2 )
691  $ CALL pselset( t, jw, jw-2, desct, zero )
692 *
693 * Check local residual for AED Schur decomposition.
694 *
695  resaed = 0.0
696 *
697 * Clean up the array SELECT for PSTRORD.
698 *
699  DO 60 j = 1, nsel
700  iwork( j ) = 0
701  60 CONTINUE
702 *
703 * Set local M counter to zero.
704 *
705  mloc = 0
706 *
707 * Outer deflation detection loop (label 80).
708 * In this loop a bunch of undeflatable eigenvalues
709 * are moved simultaneously.
710 *
711  DO 70 j = 1, iroffh + infqr
712  iwork( j ) = 1
713  70 CONTINUE
714 *
715  ns = jw
716  ilst = infqr + 1 + iroffh
717  IF( ilst.GT.1 ) THEN
718  CALL pselget( 'All', '1-Tree', elem, t, ilst, ilst-1, desct )
719  bulge = elem.NE.zero
720  IF( bulge ) ilst = ilst+1
721  END IF
722 *
723  80 CONTINUE
724  IF( ilst.LE.ns+iroffh ) THEN
725 *
726 * Find the top-left corner of the local window.
727 *
728  lilst = max(ilst,ns+iroffh-nb+1)
729  IF( lilst.GT.1 ) THEN
730  CALL pselget( 'All', '1-Tree', elem, t, lilst, lilst-1,
731  $ desct )
732  bulge = elem.NE.zero
733  IF( bulge ) lilst = lilst+1
734  END IF
735 *
736 * Lock all eigenvalues outside the local window.
737 *
738  DO 90 j = iroffh+1, lilst-1
739  iwork( j ) = 1
740  90 CONTINUE
741  lilst0 = lilst
742 *
743 * Inner deflation detection loop (label 100).
744 * In this loop, the undeflatable eigenvalues are moved to the
745 * top-left corner of the local window.
746 *
747  100 CONTINUE
748  IF( lilst.LE.ns+iroffh ) THEN
749  IF( ns.EQ.1 ) THEN
750  bulge = .false.
751  ELSE
752  CALL pselget( 'All', '1-Tree', elem, t, ns+iroffh,
753  $ ns+iroffh-1, desct )
754  bulge = elem.NE.zero
755  END IF
756 *
757 * Small spike tip test for deflation.
758 *
759  IF( .NOT.bulge ) THEN
760 *
761 * Real eigenvalue.
762 *
763  CALL pselget( 'All', '1-Tree', elem, t, ns+iroffh,
764  $ ns+iroffh, desct )
765  foo = abs( elem )
766  IF( foo.EQ.zero )
767  $ foo = abs( s )
768  CALL pselget( 'All', '1-Tree', elem, v, 1+iroffh,
769  $ ns+iroffh, descv )
770  IF( abs( s*elem ).LE.max( smlnum, ulp*foo ) ) THEN
771 *
772 * Deflatable.
773 *
774  ns = ns - 1
775  ELSE
776 *
777 * Undeflatable: move it up out of the way.
778 *
779  ifst = ns
780  DO 110 j = lilst, jw+iroffh
781  iwork( j ) = 0
782  110 CONTINUE
783  iwork( ifst+iroffh ) = 1
784  CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
785  $ 1, desct, v, 1, 1, descv, work,
786  $ work(jw+iroffh+1), mloc,
787  $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
788  $ iwork(nsel+1), liwork-nsel, info )
789 *
790 * Adjust the array SELECT explicitly so that it does not
791 * rely on the output of PSTRORD.
792 *
793  iwork( ifst+iroffh ) = 0
794  iwork( lilst ) = 1
795  lilst = lilst + 1
796 *
797 * In case of a rare exchange failure, adjust the
798 * pointers ILST and LILST to the current place to avoid
799 * unexpected behaviors.
800 *
801  IF( info.NE.0 ) THEN
802  lilst = max(info, lilst)
803  ilst = max(info, ilst)
804  END IF
805  END IF
806  ELSE
807 *
808 * Complex conjugate pair.
809 *
810  CALL pselget( 'All', '1-Tree', elem1, t, ns+iroffh,
811  $ ns+iroffh, desct )
812  CALL pselget( 'All', '1-Tree', elem2, t, ns+iroffh,
813  $ ns+iroffh-1, desct )
814  CALL pselget( 'All', '1-Tree', elem3, t, ns+iroffh-1,
815  $ ns+iroffh, desct )
816  foo = abs( elem1 ) + sqrt( abs( elem2 ) )*
817  $ sqrt( abs( elem3 ) )
818  IF( foo.EQ.zero )
819  $ foo = abs( s )
820  CALL pselget( 'All', '1-Tree', elem1, v, 1+iroffh,
821  $ ns+iroffh, descv )
822  CALL pselget( 'All', '1-Tree', elem2, v, 1+iroffh,
823  $ ns+iroffh-1, descv )
824  IF( max( abs( s*elem1 ), abs( s*elem2 ) ).LE.
825  $ max( smlnum, ulp*foo ) ) THEN
826 *
827 * Deflatable.
828 *
829  ns = ns - 2
830  ELSE
831 *
832 * Undeflatable: move them up out of the way.
833 *
834  ifst = ns
835  DO 120 j = lilst, jw+iroffh
836  iwork( j ) = 0
837  120 CONTINUE
838  iwork( ifst+iroffh ) = 1
839  iwork( ifst+iroffh-1 ) = 1
840  CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
841  $ 1, desct, v, 1, 1, descv, work,
842  $ work(jw+iroffh+1), mloc,
843  $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
844  $ iwork(nsel+1), liwork-nsel, info )
845 *
846 * Adjust the array SELECT explicitly so that it does not
847 * rely on the output of PSTRORD.
848 *
849  iwork( ifst+iroffh ) = 0
850  iwork( ifst+iroffh-1 ) = 0
851  iwork( lilst ) = 1
852  iwork( lilst+1 ) = 1
853  lilst = lilst + 2
854 *
855 * In case of a rare exchange failure, adjust the
856 * pointers ILST and LILST to the current place to avoid
857 * unexpected behaviors.
858 *
859  IF( info.NE.0 ) THEN
860  lilst = max(info, lilst)
861  ilst = max(info, ilst)
862  END IF
863  END IF
864  END IF
865 *
866 * End of inner deflation detection loop.
867 *
868  GO TO 100
869  END IF
870 *
871 * Unlock the eigenvalues outside the local window.
872 * Then undeflatable eigenvalues are moved to the proper position.
873 *
874  DO 130 j = ilst, lilst0-1
875  iwork( j ) = 0
876  130 CONTINUE
877  CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
878  $ desct, v, 1, 1, descv, work, work(jw+iroffh+1),
879  $ m, work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
880  $ iwork(nsel+1), liwork-nsel, info )
881  ilst = m + 1
882 *
883 * In case of a rare exchange failure, adjust the pointer ILST to
884 * the current place to avoid unexpected behaviors.
885 *
886  IF( info.NE.0 )
887  $ ilst = max(info, ilst)
888 *
889 * End of outer deflation detection loop.
890 *
891  GO TO 80
892  END IF
893 
894 *
895 * Post-reordering step: copy output eigenvalues to output.
896 *
897  CALL scopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
898  CALL scopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
899 *
900 * Check local residual for reordered AED Schur decomposition.
901 *
902  resaed = 0.0
903 *
904 * Return to Hessenberg form.
905 *
906  IF( ns.EQ.0 )
907  $ s = zero
908 *
909  IF( ns.LT.jw .AND. sortgrad ) THEN
910 *
911 * Sorting diagonal blocks of T improves accuracy for
912 * graded matrices. Bubble sort deals well with exchange
913 * failures. Eigenvalues/shifts from T are also restored.
914 *
915  round = 0
916  sorted = .false.
917  i = ns + 1 + iroffh
918  140 CONTINUE
919  IF( sorted )
920  $ GO TO 180
921  sorted = .true.
922  round = round + 1
923 *
924  kend = i - 1
925  i = infqr + 1 + iroffh
926  IF( i.EQ.ns+iroffh ) THEN
927  k = i + 1
928  ELSE IF( si( kwtop-iroffh + i-1 ).EQ.zero ) THEN
929  k = i + 1
930  ELSE
931  k = i + 2
932  END IF
933  150 CONTINUE
934  IF( k.LE.kend ) THEN
935  IF( k.EQ.i+1 ) THEN
936  evi = abs( sr( kwtop-iroffh+i-1 ) )
937  ELSE
938  evi = abs( sr( kwtop-iroffh+i-1 ) ) +
939  $ abs( si( kwtop-iroffh+i-1 ) )
940  END IF
941 *
942  IF( k.EQ.kend ) THEN
943  evk = abs( sr( kwtop-iroffh+k-1 ) )
944  ELSEIF( si( kwtop-iroffh+k-1 ).EQ.zero ) THEN
945  evk = abs( sr( kwtop-iroffh+k-1 ) )
946  ELSE
947  evk = abs( sr( kwtop-iroffh+k-1 ) ) +
948  $ abs( si( kwtop-iroffh+k-1 ) )
949  END IF
950 *
951  IF( evi.GE.evk ) THEN
952  i = k
953  ELSE
954  mloc = 0
955  sorted = .false.
956  ifst = i
957  ilst = k
958  DO 160 j = 1, i-1
959  iwork( j ) = 1
960  mloc = mloc + 1
961  160 CONTINUE
962  IF( k.EQ.i+2 ) THEN
963  iwork( i ) = 0
964  iwork(i+1) = 0
965  ELSE
966  iwork( i ) = 0
967  END IF
968  IF( k.NE.kend .AND. si( kwtop-iroffh+k-1 ).NE.zero ) THEN
969  iwork( k ) = 1
970  iwork(k+1) = 1
971  mloc = mloc + 2
972  ELSE
973  iwork( k ) = 1
974  IF( k.LT.kend ) iwork(k+1) = 0
975  mloc = mloc + 1
976  END IF
977  DO 170 j = k+2, jw+iroffh
978  iwork( j ) = 0
979  170 CONTINUE
980  CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
981  $ desct, v, 1, 1, descv, work, work(jw+iroffh+1), m,
982  $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
983  $ iwork(nsel+1), liwork-nsel, ierr )
984  CALL scopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
985  CALL scopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
986  IF( ierr.EQ.0 ) THEN
987  i = ilst
988  ELSE
989  i = k
990  END IF
991  END IF
992  IF( i.EQ.kend ) THEN
993  k = i + 1
994  ELSE IF( si( kwtop-iroffh+i-1 ).EQ.zero ) THEN
995  k = i + 1
996  ELSE
997  k = i + 2
998  END IF
999  GO TO 150
1000  END IF
1001  GO TO 140
1002  180 CONTINUE
1003  END IF
1004 *
1005 * Restore number of rows and columns of T matrix descriptor.
1006 *
1007  desct( m_ ) = nw+iroffh
1008  desct( n_ ) = nh+iroffh
1009 *
1010  IF( ns.LT.jw .OR. s.EQ.zero ) THEN
1011  IF( ns.GT.1 .AND. s.NE.zero ) THEN
1012 *
1013 * Reflect spike back into lower triangle.
1014 *
1015  rrows = numroc( ns+iroffh, nb, myrow, descv(rsrc_), nprow )
1016  rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
1017  CALL descinit( descr, ns+iroffh, 1, nb, 1, descv(rsrc_),
1018  $ descv(csrc_), ictxt, max(1, rrows), info )
1019  taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
1020  taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
1021  $ npcol )
1022  CALL descinit( desctau, 1, jw+iroffh, 1, nb, descv(rsrc_),
1023  $ descv(csrc_), ictxt, max(1, taurows), info )
1024 *
1025  ir = 1
1026  itau = ir + descr( lld_ ) * rcols
1027  ipw = itau + desctau( lld_ ) * taucols
1028 *
1029  CALL pslaset( 'All', ns+iroffh, 1, zero, zero, work(itau),
1030  $ 1, 1, desctau )
1031 *
1032  CALL pscopy( ns, v, 1+iroffh, 1+iroffh, descv, descv(m_),
1033  $ work(ir), 1+iroffh, 1, descr, 1 )
1034  CALL pslarfg( ns, beta, 1+iroffh, 1, work(ir), 2+iroffh, 1,
1035  $ descr, 1, work(itau+iroffh) )
1036  CALL pselset( work(ir), 1+iroffh, 1, descr, one )
1037 *
1038  CALL pslaset( 'Lower', jw-2, jw-2, zero, zero, t, 3+iroffh,
1039  $ 1+iroffh, desct )
1040 *
1041  CALL pslarf( 'Left', ns, jw, work(ir), 1+iroffh, 1, descr,
1042  $ 1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
1043  $ desct, work( ipw ) )
1044  CALL pslarf( 'Right', ns, ns, work(ir), 1+iroffh, 1, descr,
1045  $ 1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
1046  $ desct, work( ipw ) )
1047  CALL pslarf( 'Right', jw, ns, work(ir), 1+iroffh, 1, descr,
1048  $ 1, work(itau+iroffh), v, 1+iroffh, 1+iroffh,
1049  $ descv, work( ipw ) )
1050 *
1051  itau = 1
1052  ipw = itau + desctau( lld_ ) * taucols
1053  CALL psgehrd( jw+iroffh, 1+iroffh, ns+iroffh, t, 1, 1,
1054  $ desct, work(itau), work( ipw ), lwork-ipw+1, info )
1055  END IF
1056 *
1057 * Copy updated reduced window into place.
1058 *
1059  IF( kwtop.GT.1 ) THEN
1060  CALL pselget( 'All', '1-Tree', elem, v, 1+iroffh,
1061  $ 1+iroffh, descv )
1062  CALL pselset( h, kwtop, kwtop-1, desch, s*elem )
1063  END IF
1064  CALL pslacpy( 'Upper', jw-1, jw-1, t, 1+iroffh+1, 1+iroffh,
1065  $ desct, h, kwtop+1, kwtop, desch )
1066  CALL pslacpy( 'All', 1, jw, t, 1+iroffh, 1+iroffh, desct, h,
1067  $ kwtop, kwtop, desch )
1068  CALL pslacpy( 'All', jw-1, 1, t, 1+iroffh+1, 1+iroffh+jw-1,
1069  $ desct, h, kwtop+1, kwtop+jw-1, desch )
1070 *
1071 * Accumulate orthogonal matrix in order to update
1072 * H and Z, if requested.
1073 *
1074  IF( ns.GT.1 .AND. s.NE.zero ) THEN
1075  CALL psormhr( 'Right', 'No', jw+iroffh, ns+iroffh, 1+iroffh,
1076  $ ns+iroffh, t, 1, 1, desct, work(itau), v, 1,
1077  $ 1, descv, work( ipw ), lwork-ipw+1, info )
1078  END IF
1079 *
1080 * Update vertical slab in H.
1081 *
1082  IF( wantt ) THEN
1083  ltop = 1
1084  ELSE
1085  ltop = ktop
1086  END IF
1087  kln = max( 0, kwtop-ltop )
1088  iroffhh = mod( ltop-1, nb )
1089  icoffhh = mod( kwtop-1, nb )
1090  hhrsrc = indxg2p( ltop, nb, myrow, desch(rsrc_), nprow )
1091  hhcsrc = indxg2p( kwtop, nb, mycol, desch(csrc_), npcol )
1092  hhrows = numroc( kln+iroffhh, nb, myrow, hhrsrc, nprow )
1093  hhcols = numroc( jw+icoffhh, nb, mycol, hhcsrc, npcol )
1094  CALL descinit( deschh, kln+iroffhh, jw+icoffhh, nb, nb,
1095  $ hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
1096  CALL psgemm( 'No', 'No', kln, jw, jw, one, h, ltop,
1097  $ kwtop, desch, v, 1+iroffh, 1+iroffh, descv, zero,
1098  $ work, 1+iroffhh, 1+icoffhh, deschh )
1099  CALL pslacpy( 'All', kln, jw, work, 1+iroffhh, 1+icoffhh,
1100  $ deschh, h, ltop, kwtop, desch )
1101 *
1102 * Update horizontal slab in H.
1103 *
1104  IF( wantt ) THEN
1105  kln = n-kbot
1106  iroffhh = mod( kwtop-1, nb )
1107  icoffhh = mod( kbot, nb )
1108  hhrsrc = indxg2p( kwtop, nb, myrow, desch(rsrc_), nprow )
1109  hhcsrc = indxg2p( kbot+1, nb, mycol, desch(csrc_), npcol )
1110  hhrows = numroc( jw+iroffhh, nb, myrow, hhrsrc, nprow )
1111  hhcols = numroc( kln+icoffhh, nb, mycol, hhcsrc, npcol )
1112  CALL descinit( deschh, jw+iroffhh, kln+icoffhh, nb, nb,
1113  $ hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
1114  CALL psgemm( 'Tr', 'No', jw, kln, jw, one, v,
1115  $ 1+iroffh, 1+iroffh, descv, h, kwtop, kbot+1,
1116  $ desch, zero, work, 1+iroffhh, 1+icoffhh, deschh )
1117  CALL pslacpy( 'All', jw, kln, work, 1+iroffhh, 1+icoffhh,
1118  $ deschh, h, kwtop, kbot+1, desch )
1119  END IF
1120 *
1121 * Update vertical slab in Z.
1122 *
1123  IF( wantz ) THEN
1124  kln = ihiz-iloz+1
1125  iroffzz = mod( iloz-1, nb )
1126  icoffzz = mod( kwtop-1, nb )
1127  zzrsrc = indxg2p( iloz, nb, myrow, descz(rsrc_), nprow )
1128  zzcsrc = indxg2p( kwtop, nb, mycol, descz(csrc_), npcol )
1129  zzrows = numroc( kln+iroffzz, nb, myrow, zzrsrc, nprow )
1130  zzcols = numroc( jw+icoffzz, nb, mycol, zzcsrc, npcol )
1131  CALL descinit( desczz, kln+iroffzz, jw+icoffzz, nb, nb,
1132  $ zzrsrc, zzcsrc, ictxt, max(1, zzrows), ierr )
1133  CALL psgemm( 'No', 'No', kln, jw, jw, one, z, iloz,
1134  $ kwtop, descz, v, 1+iroffh, 1+iroffh, descv,
1135  $ zero, work, 1+iroffzz, 1+icoffzz, desczz )
1136  CALL pslacpy( 'All', kln, jw, work, 1+iroffzz, 1+icoffzz,
1137  $ desczz, z, iloz, kwtop, descz )
1138  END IF
1139  END IF
1140 *
1141 * Return the number of deflations (ND) and the number of shifts (NS).
1142 * (Subtracting INFQR from the spike length takes care of the case of
1143 * a rare QR failure while calculating eigenvalues of the deflation
1144 * window.)
1145 *
1146  nd = jw - ns
1147  ns = ns - infqr
1148 *
1149 * Return optimal workspace.
1150 *
1151  work( 1 ) = float( lwkopt )
1152  iwork( 1 ) = ilwkopt + nsel
1153 *
1154 * End of PSLAQR3
1155 *
1156  END
psgehrd
subroutine psgehrd(N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: psgehrd.f:3
pslaqr1
recursive subroutine pslaqr1(WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK, ILWORK, INFO)
Definition: pslaqr1.f:5
indxg2p
integer function indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
Definition: indxg2p.f:2
max
#define max(A, B)
Definition: pcgemr.c:180
pslaqr0
recursive subroutine pslaqr0(WANTT, WANTZ, N, ILO, IHI, H, DESCH, WR, WI, ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK, LIWORK, INFO, RECLEVEL)
Definition: pslaqr0.f:4
pslange
real function pslange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pslange.f:3
pselset
subroutine pselset(A, IA, JA, DESCA, ALPHA)
Definition: pselset.f:2
pilaenvx
integer function pilaenvx(ICTXT, ISPEC, NAME, OPTS, N1, N2, N3, N4)
Definition: pilaenvx.f:3
slamch
real function slamch(CMACH)
Definition: tools.f:867
pslarfg
subroutine pslarfg(N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, TAU)
Definition: pslarfg.f:3
pslacpy
subroutine pslacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pslacpy.f:3
psormhr
subroutine psormhr(SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, TAU, C, IC, JC, DESCC, WORK, LWORK, INFO)
Definition: psormhr.f:3
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
psrot
subroutine psrot(N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, CS, SN, WORK, LWORK, INFO)
Definition: psrot.f:3
pslarf
subroutine pslarf(SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, C, IC, JC, DESCC, WORK)
Definition: pslarf.f:3
pselget
subroutine pselget(SCOPE, TOP, ALPHA, A, IA, JA, DESCA)
Definition: pselget.f:2
pstrord
subroutine pstrord(COMPQ, SELECT, PARA, N, T, IT, JT, DESCT, Q, IQ, JQ, DESCQ, WR, WI, M, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: pstrord.f:4
pslamve
subroutine pslamve(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB, DWORK)
Definition: pslamve.f:3
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pslaset
subroutine pslaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: psblastst.f:6863
min
#define min(A, B)
Definition: pcgemr.c:181
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2
pslaqr3
recursive subroutine pslaqr3(WANTT, WANTZ, N, KTOP, KBOT, NW, H, DESCH, ILOZ, IHIZ, Z, DESCZ, NS, ND, SR, SI, V, DESCV, NH, T, DESCT, NV, WV, DESCW, WORK, LWORK, IWORK, LIWORK, RECLEVEL)
Definition: pslaqr3.f:6