ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlaqr3.f
Go to the documentation of this file.
1  RECURSIVE SUBROUTINE pdlaqr3( WANTT, WANTZ, N, KTOP, KBOT, NW, H,
2  $ DESCH, ILOZ, IHIZ, Z, DESCZ, NS, ND,
3  $ SR, SI, V, DESCV, NH, T, DESCT, NV,
4  $ WV, DESCW, WORK, LWORK, IWORK,
5  $ LIWORK, RECLEVEL )
6 *
7 * Contribution from the Department of Computing Science and HPC2N,
8 * Umea University, Sweden
9 *
10 * -- ScaLAPACK auxiliary routine (version 2.0.1) --
11 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
12 * Univ. of Colorado Denver and University of California, Berkeley.
13 * January, 2012
14 *
15  IMPLICIT NONE
16 *
17 * .. Scalar Arguments ..
18  INTEGER ihiz, iloz, kbot, ktop, lwork, n, nd, nh, ns,
19  $ nv, nw, liwork, reclevel
20  LOGICAL wantt, wantz
21 * ..
22 * .. Array Arguments ..
23  INTEGER desch( * ), descz( * ), desct( * ), descv( * ),
24  $ descw( * ), iwork( * )
25  DOUBLE PRECISION h( * ), si( kbot ), sr( kbot ), t( * ),
26  $ v( * ), work( * ), wv( * ),
27  $ z( * )
28 * ..
29 *
30 * Purpose
31 * =======
32 *
33 * Aggressive early deflation:
34 *
35 * This subroutine accepts as input an upper Hessenberg matrix H and
36 * performs an orthogonal similarity transformation designed to detect
37 * and deflate fully converged eigenvalues from a trailing principal
38 * submatrix. On output H has been overwritten by a new Hessenberg
39 * matrix that is a perturbation of an orthogonal similarity
40 * transformation of H. It is to be hoped that the final version of H
41 * has many zero subdiagonal entries.
42 *
43 * Notes
44 * =====
45 *
46 * Each global data object is described by an associated description
47 * vector. This vector stores the information required to establish
48 * the mapping between an object element and its corresponding process
49 * and memory location.
50 *
51 * Let A be a generic term for any 2D block cyclicly distributed array.
52 * Such a global array has an associated description vector DESCA.
53 * In the following comments, the character _ should be read as
54 * "of the global array".
55 *
56 * NOTATION STORED IN EXPLANATION
57 * --------------- -------------- --------------------------------------
58 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
59 * DTYPE_A = 1.
60 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
61 * the BLACS process grid A is distribu-
62 * ted over. The context itself is glo-
63 * bal, but the handle (the integer
64 * value) may vary.
65 * M_A (global) DESCA( M_ ) The number of rows in the global
66 * array A.
67 * N_A (global) DESCA( N_ ) The number of columns in the global
68 * array A.
69 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
70 * the rows of the array.
71 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
72 * the columns of the array.
73 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
74 * row of the array A is distributed.
75 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
76 * first column of the array A is
77 * distributed.
78 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
79 * array. LLD_A >= MAX(1,LOCr(M_A)).
80 *
81 * Let K be the number of rows or columns of a distributed matrix,
82 * and assume that its process grid has dimension p x q.
83 * LOCr( K ) denotes the number of elements of K that a process
84 * would receive if K were distributed over the p processes of its
85 * process column.
86 * Similarly, LOCc( K ) denotes the number of elements of K that a
87 * process would receive if K were distributed over the q processes of
88 * its process row.
89 * The values of LOCr() and LOCc() may be determined via a call to the
90 * ScaLAPACK tool function, NUMROC:
91 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
92 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
93 * An upper bound for these quantities may be computed by:
94 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
95 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
96 *
97 * Arguments
98 * =========
99 *
100 * WANTT (global input) LOGICAL
101 * If .TRUE., then the Hessenberg matrix H is fully updated
102 * so that the quasi-triangular Schur factor may be
103 * computed (in cooperation with the calling subroutine).
104 * If .FALSE., then only enough of H is updated to preserve
105 * the eigenvalues.
106 *
107 * WANTZ (global input) LOGICAL
108 * If .TRUE., then the orthogonal matrix Z is updated so
109 * so that the orthogonal Schur factor may be computed
110 * (in cooperation with the calling subroutine).
111 * If .FALSE., then Z is not referenced.
112 *
113 * N (global input) INTEGER
114 * The order of the matrix H and (if WANTZ is .TRUE.) the
115 * order of the orthogonal matrix Z.
116 *
117 * KTOP (global input) INTEGER
118 * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
119 * KBOT and KTOP together determine an isolated block
120 * along the diagonal of the Hessenberg matrix.
121 *
122 * KBOT (global input) INTEGER
123 * It is assumed without a check that either
124 * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
125 * determine an isolated block along the diagonal of the
126 * Hessenberg matrix.
127 *
128 * NW (global input) INTEGER
129 * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
130 *
131 * H (local input/output) DOUBLE PRECISION array, dimension
132 * (DESCH(LLD_),*)
133 * On input the initial N-by-N section of H stores the
134 * Hessenberg matrix undergoing aggressive early deflation.
135 * On output H has been transformed by an orthogonal
136 * similarity transformation, perturbed, and the returned
137 * to Hessenberg form that (it is to be hoped) has some
138 * zero subdiagonal entries.
139 *
140 * DESCH (global and local input) INTEGER array of dimension DLEN_.
141 * The array descriptor for the distributed matrix H.
142 *
143 * ILOZ (global input) INTEGER
144 * IHIZ (global input) INTEGER
145 * Specify the rows of Z to which transformations must be
146 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
147 *
148 * Z (input/output) DOUBLE PRECISION array, dimension
149 * (DESCH(LLD_),*)
150 * IF WANTZ is .TRUE., then on output, the orthogonal
151 * similarity transformation mentioned above has been
152 * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
153 * If WANTZ is .FALSE., then Z is unreferenced.
154 *
155 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
156 * The array descriptor for the distributed matrix Z.
157 *
158 * NS (global output) INTEGER
159 * The number of unconverged (ie approximate) eigenvalues
160 * returned in SR and SI that may be used as shifts by the
161 * calling subroutine.
162 *
163 * ND (global output) INTEGER
164 * The number of converged eigenvalues uncovered by this
165 * subroutine.
166 *
167 * SR (global output) DOUBLE PRECISION array, dimension KBOT
168 * SI (global output) DOUBLE PRECISION array, dimension KBOT
169 * On output, the real and imaginary parts of approximate
170 * eigenvalues that may be used for shifts are stored in
171 * SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
172 * SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
173 * The real and imaginary parts of converged eigenvalues
174 * are stored in SR(KBOT-ND+1) through SR(KBOT) and
175 * SI(KBOT-ND+1) through SI(KBOT), respectively.
176 *
177 * V (global workspace) DOUBLE PRECISION array, dimension
178 * (DESCV(LLD_),*)
179 * An NW-by-NW distributed work array.
180 *
181 * DESCV (global and local input) INTEGER array of dimension DLEN_.
182 * The array descriptor for the distributed matrix V.
183 *
184 * NH (input) INTEGER scalar
185 * The number of columns of T. NH.GE.NW.
186 *
187 * T (global workspace) DOUBLE PRECISION array, dimension
188 * (DESCV(LLD_),*)
189 *
190 * DESCT (global and local input) INTEGER array of dimension DLEN_.
191 * The array descriptor for the distributed matrix T.
192 *
193 * NV (global input) INTEGER
194 * The number of rows of work array WV available for
195 * workspace. NV.GE.NW.
196 *
197 * WV (global workspace) DOUBLE PRECISION array, dimension
198 * (DESCW(LLD_),*)
199 *
200 * DESCW (global and local input) INTEGER array of dimension DLEN_.
201 * The array descriptor for the distributed matrix WV.
202 *
203 * WORK (local workspace) DOUBLE PRECISION array, dimension LWORK.
204 * On exit, WORK(1) is set to an estimate of the optimal value
205 * of LWORK for the given values of N, NW, KTOP and KBOT.
206 *
207 * LWORK (local input) INTEGER
208 * The dimension of the work array WORK. LWORK = 2*NW
209 * suffices, but greater efficiency may result from larger
210 * values of LWORK.
211 *
212 * If LWORK = -1, then a workspace query is assumed; PDLAQR3
213 * only estimates the optimal workspace size for the given
214 * values of N, NW, KTOP and KBOT. The estimate is returned
215 * in WORK(1). No error message related to LWORK is issued
216 * by XERBLA. Neither H nor Z are accessed.
217 *
218 * IWORK (local workspace) INTEGER array, dimension (LIWORK)
219 *
220 * LIWORK (local input) INTEGER
221 * The length of the workspace array IWORK
222 *
223 * ================================================================
224 * Based on contributions by
225 * Robert Granat and Meiyue Shao,
226 * Department of Computing Science and HPC2N,
227 * Umea University, Sweden
228 *
229 * ================================================================
230 * .. Parameters ..
231  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
232  $ lld_, mb_, m_, nb_, n_, rsrc_
233  INTEGER recmax
234  LOGICAL sortgrad
235  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
236  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
237  $ rsrc_ = 7, csrc_ = 8, lld_ = 9, recmax = 3,
238  $ sortgrad = .false. )
239  DOUBLE PRECISION zero, one
240  PARAMETER ( zero = 0.0d0, one = 1.0d0 )
241 * ..
242 * .. Local Scalars ..
243  DOUBLE PRECISION aa, bb, beta, cc, cs, dd, evi, evk, foo, s,
244  $ safmax, safmin, smlnum, sn, tau, ulp,
245  $ elem, elem1, elem2, elem3, r1, anorm, rnorm,
246  $ resaed
247  INTEGER i, ifst, ilst, info, infqr, j, jw, k, kcol,
248  $ kend, kln, krow, kwtop, ltop, lwk1, lwk2, lwk3,
249  $ lwkopt, nmin, lldh, lldz, lldt, lldv, lldwv,
250  $ ictxt, nprow, nmax, npcol, myrow, mycol, nb,
251  $ iroffh, m, rcols, taurows, rrows, taucols,
252  $ itau, ir, ipw, nprocs, mloc, iroffhh,
253  $ icoffhh, hhrsrc, hhcsrc, hhrows, hhcols,
254  $ iroffzz, icoffzz, zzrsrc, zzcsrc, zzrows,
255  $ zzcols, ierr, tzrows0, tzcols0, ierr0, ipt0,
256  $ ipz0, ipw0, nb2, round, lilst, kk, lilst0,
257  $ iwrk1, rsrc, csrc, lwk4, lwk5, iwrk2, lwk6,
258  $ lwk7, lwk8, ilwkopt, tzrows, tzcols, nsel,
259  $ npmin, ictxt_new, myrow_new, mycol_new
260  LOGICAL bulge, sorted, lquery
261 * ..
262 * .. Local Arrays ..
263  INTEGER par( 6 ), descr( dlen_ ),
264  $ desctau( dlen_ ), deschh( dlen_ ),
265  $ desczz( dlen_ ), desctz0( dlen_ ),
266  $ pmap( 64*64 )
267  DOUBLE PRECISION ddum( 1 )
268 * ..
269 * .. External Functions ..
270  DOUBLE PRECISION dlamch, pdlange
271  INTEGER pilaenvx, numroc, indxg2p, iceil, blacs_pnum
272  EXTERNAL dlamch, pilaenvx, numroc, indxg2p, pdlange,
273  $ mpi_wtime, iceil, blacs_pnum
274 * ..
275 * .. External Subroutines ..
276  EXTERNAL pdcopy, pdgehrd, pdgemm, dlabad, pdlacpy,
277  $ pdlaqr1, dlanv2, pdlaqr0, pdlarf, pdlarfg,
279  $ pdlamve, blacs_gridinfo, blacs_gridmap,
280  $ blacs_gridexit, pdgemr2d
281 * ..
282 * .. Intrinsic Functions ..
283  INTRINSIC abs, dble, int, max, min, sqrt
284 * ..
285 * .. Executable Statements ..
286  ictxt = desch( ctxt_ )
287  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
288  nprocs = nprow*npcol
289 *
290 * Extract local leading dimensions, blockfactors, offset for
291 * keeping the alignment requirements and size of deflation window.
292 *
293  lldh = desch( lld_ )
294  lldz = descz( lld_ )
295  lldt = desct( lld_ )
296  lldv = descv( lld_ )
297  lldwv = descw( lld_ )
298  nb = desch( mb_ )
299  iroffh = mod( ktop - 1, nb )
300  jw = min( nw, kbot-ktop+1 )
301  nsel = nb+jw
302 *
303 * Extract environment variables for parallel eigenvalue reordering.
304 *
305  par(1) = pilaenvx(ictxt, 17, 'PDLAQR3', 'SV', jw, nb, -1, -1)
306  par(2) = pilaenvx(ictxt, 18, 'PDLAQR3', 'SV', jw, nb, -1, -1)
307  par(3) = pilaenvx(ictxt, 19, 'PDLAQR3', 'SV', jw, nb, -1, -1)
308  par(4) = pilaenvx(ictxt, 20, 'PDLAQR3', 'SV', jw, nb, -1, -1)
309  par(5) = pilaenvx(ictxt, 21, 'PDLAQR3', 'SV', jw, nb, -1, -1)
310  par(6) = pilaenvx(ictxt, 22, 'PDLAQR3', 'SV', jw, nb, -1, -1)
311 *
312 * Check if workspace query.
313 *
314  lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
315 *
316 * Estimate optimal workspace.
317 *
318  IF( jw.LE.2 ) THEN
319  lwkopt = 1
320  ELSE
321 *
322 * Workspace query calls to PDGEHRD and PDORMHR.
323 *
324  taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
325  taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
326  $ npcol )
327  CALL pdgehrd( jw, 1, jw, t, 1, 1, desct, work, work, -1,
328  $ info )
329  lwk1 = int( work( 1 ) ) + taurows*taucols
330 *
331 * Workspace query call to PDORMHR.
332 *
333  CALL pdormhr( 'Right', 'No', jw, jw, 1, jw, t, 1, 1, desct,
334  $ work, v, 1, 1, descv, work, -1, info )
335  lwk2 = int( work( 1 ) )
336 *
337 * Workspace query call to PDLAQR0.
338 *
339  nmin = pilaenvx( ictxt, 12, 'PDLAQR3', 'SV', jw, 1, jw, lwork )
340  nmax = ( n-1 ) / 3
341  IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
342  $ .AND. reclevel.LT.recmax ) THEN
343  CALL pdlaqr0( .true., .true., jw+iroffh, 1+iroffh,
344  $ jw+iroffh, t, desct, sr, si, 1, jw, v, descv,
345  $ work, -1, iwork, liwork-nsel, infqr,
346  $ reclevel+1 )
347  lwk3 = int( work( 1 ) )
348  iwrk1 = iwork( 1 )
349  ELSE
350  rsrc = desct( rsrc_ )
351  csrc = desct( csrc_ )
352  desct( rsrc_ ) = 0
353  desct( csrc_ ) = 0
354  CALL pdlaqr1( .true., .true., jw+iroffh, 1, jw+iroffh, t,
355  $ desct, sr, si, 1, jw+iroffh, v, descv, work, -1,
356  $ iwork, liwork-nsel, infqr )
357  desct( rsrc_ ) = rsrc
358  desct( csrc_ ) = csrc
359  lwk3 = int( work( 1 ) )
360  iwrk1 = iwork( 1 )
361  END IF
362 *
363 * Workspace in case of alignment problems.
364 *
365  tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
366  tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
367  lwk4 = 2 * tzrows0*tzcols0
368 *
369 * Workspace check for reordering.
370 *
371  CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
372  $ desct, v, 1, 1, descv, ddum, ddum, mloc, work, -1,
373  $ iwork, liwork-nsel, info )
374  lwk5 = int( work( 1 ) )
375  iwrk2 = iwork( 1 )
376 *
377 * Extra workspace for reflecting back spike
378 * (workspace for PDLARF approximated for simplicity).
379 *
380  rrows = numroc( n+iroffh, nb, myrow, descv(rsrc_), nprow )
381  rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
382  lwk6 = rrows*rcols + taurows*taucols +
383  $ 2*iceil(iceil(jw+iroffh,nb),nprow)*nb
384  $ *iceil(iceil(jw+iroffh,nb),npcol)*nb
385 *
386 * Extra workspace needed by PBLAS update calls
387 * (also estimated for simplicity).
388 *
389  lwk7 = max( iceil(iceil(jw,nb),nprow)*nb *
390  $ iceil(iceil(n-kbot,nb),npcol)*nb,
391  $ iceil(iceil(ihiz-iloz+1,nb),nprow)*nb *
392  $ iceil(iceil(jw,nb),npcol)*nb,
393  $ iceil(iceil(kbot-jw,nb),nprow)*nb *
394  $ iceil(iceil(jw,nb),npcol)*nb )
395 *
396 * Residual check workspace.
397 *
398  lwk8 = 0
399 *
400 * Optimal workspace.
401 *
402  lwkopt = max( lwk1, lwk2, lwk3+lwk4, lwk5, lwk6, lwk7, lwk8 )
403  ilwkopt = max( iwrk1, iwrk2 )
404  END IF
405 *
406 * Quick return in case of workspace query.
407 *
408  work( 1 ) = dble( lwkopt )
409 *
410 * IWORK(1:NSEL) is used as the array SELECT for PDTRORD.
411 *
412  iwork( 1 ) = ilwkopt + nsel
413  IF( lquery )
414  $ RETURN
415 *
416 * Nothing to do for an empty active block ...
417  ns = 0
418  nd = 0
419  IF( ktop.GT.kbot )
420  $ RETURN
421 * ... nor for an empty deflation window.
422 *
423  IF( nw.LT.1 )
424  $ RETURN
425 *
426 * Machine constants.
427 *
428  safmin = dlamch( 'SAFE MINIMUM' )
429  safmax = one / safmin
430  CALL dlabad( safmin, safmax )
431  ulp = dlamch( 'PRECISION' )
432  smlnum = safmin*( dble( n ) / ulp )
433 *
434 * Setup deflation window.
435 *
436  jw = min( nw, kbot-ktop+1 )
437  kwtop = kbot - jw + 1
438  IF( kwtop.EQ.ktop ) THEN
439  s = zero
440  ELSE
441  CALL pdelget( 'All', '1-Tree', s, h, kwtop, kwtop-1, desch )
442  END IF
443 *
444  IF( kbot.EQ.kwtop ) THEN
445 *
446 * 1-by-1 deflation window: not much to do.
447 *
448  CALL pdelget( 'All', '1-Tree', sr( kwtop ), h, kwtop, kwtop,
449  $ desch )
450  si( kwtop ) = zero
451  ns = 1
452  nd = 0
453  IF( abs( s ).LE.max( smlnum, ulp*abs( sr( kwtop ) ) ) )
454  $ THEN
455  ns = 0
456  nd = 1
457  IF( kwtop.GT.ktop )
458  $ CALL pdelset( h, kwtop, kwtop-1 , desch, zero )
459  END IF
460  RETURN
461  END IF
462 *
463  IF( kwtop.EQ.ktop .AND. kbot-kwtop.EQ.1 ) THEN
464 *
465 * 2-by-2 deflation window: a little more to do.
466 *
467  CALL pdelget( 'All', '1-Tree', aa, h, kwtop, kwtop, desch )
468  CALL pdelget( 'All', '1-Tree', bb, h, kwtop, kwtop+1, desch )
469  CALL pdelget( 'All', '1-Tree', cc, h, kwtop+1, kwtop, desch )
470  CALL pdelget( 'All', '1-Tree', dd, h, kwtop+1, kwtop+1, desch )
471  CALL dlanv2( aa, bb, cc, dd, sr(kwtop), si(kwtop),
472  $ sr(kwtop+1), si(kwtop+1), cs, sn )
473  ns = 0
474  nd = 2
475  IF( cc.EQ.zero ) THEN
476  i = kwtop
477  IF( i+2.LE.n .AND. wantt )
478  $ CALL pdrot( n-i-1, h, i, i+2, desch, desch(m_), h, i+1,
479  $ i+2, desch, desch(m_), cs, sn, work, lwork, info )
480  IF( i.GT.1 )
481  $ CALL pdrot( i-1, h, 1, i, desch, 1, h, 1, i+1, desch, 1,
482  $ cs, sn, work, lwork, info )
483  IF( wantz )
484  $ CALL pdrot( ihiz-iloz+1, z, iloz, i, descz, 1, z, iloz,
485  $ i+1, descz, 1, cs, sn, work, lwork, info )
486  CALL pdelset( h, i, i, desch, aa )
487  CALL pdelset( h, i, i+1, desch, bb )
488  CALL pdelset( h, i+1, i, desch, cc )
489  CALL pdelset( h, i+1, i+1, desch, dd )
490  END IF
491  work( 1 ) = dble( lwkopt )
492  RETURN
493  END IF
494 *
495 * Calculate new value for IROFFH in case deflation window
496 * was adjusted.
497 *
498  iroffh = mod( kwtop - 1, nb )
499 *
500 * Adjust number of rows and columns of T matrix descriptor
501 * to prepare for call to PDBTRORD.
502 *
503  desct( m_ ) = jw+iroffh
504  desct( n_ ) = jw+iroffh
505 *
506 * Convert to spike-triangular form. (In case of a rare QR failure,
507 * this routine continues to do aggressive early deflation using that
508 * part of the deflation window that converged using INFQR here and
509 * there to keep track.)
510 *
511 * Copy the trailing submatrix to the working space.
512 *
513  CALL pdlaset( 'All', iroffh, jw+iroffh, zero, one, t, 1, 1,
514  $ desct )
515  CALL pdlaset( 'All', jw, iroffh, zero, zero, t, 1+iroffh, 1,
516  $ desct )
517  CALL pdlacpy( 'All', 1, jw, h, kwtop, kwtop, desch, t, 1+iroffh,
518  $ 1+iroffh, desct )
519  CALL pdlacpy( 'Upper', jw-1, jw-1, h, kwtop+1, kwtop, desch, t,
520  $ 1+iroffh+1, 1+iroffh, desct )
521  IF( jw.GT.2 )
522  $ CALL pdlaset( 'Lower', jw-2, jw-2, zero, zero, t, 1+iroffh+2,
523  $ 1+iroffh, desct )
524  CALL pdlacpy( 'All', jw-1, 1, h, kwtop+1, kwtop+jw-1, desch, t,
525  $ 1+iroffh+1, 1+iroffh+jw-1, desct )
526 *
527 * Initialize the working orthogonal matrix.
528 *
529  CALL pdlaset( 'All', jw+iroffh, jw+iroffh, zero, one, v, 1, 1,
530  $ descv )
531 *
532 * Compute the Schur form of T.
533 *
534  npmin = pilaenvx( ictxt, 23, 'PDLAQR3', 'SV', jw, nb, nprow,
535  $ npcol )
536  nmin = pilaenvx( ictxt, 12, 'PDLAQR3', 'SV', jw, 1, jw, lwork )
537  nmax = ( n-1 ) / 3
538  IF( min(nprow, npcol).LE.npmin+1 .OR. reclevel.GE.1 ) THEN
539 *
540 * The AED window is large enough.
541 * Compute the Schur decomposition with all processors.
542 *
543  IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
544  $ .AND. reclevel.LT.recmax ) THEN
545  CALL pdlaqr0( .true., .true., jw+iroffh, 1+iroffh,
546  $ jw+iroffh, t, desct, sr( kwtop-iroffh ),
547  $ si( kwtop-iroffh ), 1+iroffh, jw+iroffh, v, descv,
548  $ work, lwork, iwork(nsel+1), liwork-nsel, infqr,
549  $ reclevel+1 )
550  ELSE
551  IF( desct(rsrc_).EQ.0 .AND. desct(csrc_).EQ.0 ) THEN
552  IF( jw+iroffh.GT.desct( mb_ ) ) THEN
553  CALL pdlaqr1( .true., .true., jw+iroffh, 1,
554  $ jw+iroffh, t, desct, sr( kwtop-iroffh ),
555  $ si( kwtop-iroffh ), 1, jw+iroffh, v,
556  $ descv, work, lwork, iwork(nsel+1), liwork-nsel,
557  $ infqr )
558  ELSE
559  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
560  CALL dlahqr( .true., .true., jw+iroffh, 1+iroffh,
561  $ jw+iroffh, t, desct(lld_),
562  $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
563  $ 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
564  ELSE
565  infqr = 0
566  END IF
567  IF( nprocs.GT.1 )
568  $ CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
569  $ 1, -1, -1, -1, -1, -1 )
570  END IF
571  ELSEIF( jw+iroffh.LE.desct( mb_ ) ) THEN
572  IF( myrow.EQ.desct(rsrc_) .AND. mycol.EQ.desct(csrc_) )
573  $ THEN
574  CALL dlahqr( .true., .true., jw+iroffh, 1+iroffh,
575  $ jw+iroffh, t, desct(lld_),
576  $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
577  $ 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
578  ELSE
579  infqr = 0
580  END IF
581  IF( nprocs.GT.1 )
582  $ CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
583  $ 1, -1, -1, -1, -1, -1 )
584  ELSE
585  tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
586  tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
587  CALL descinit( desctz0, jw+iroffh, jw+iroffh, nb, nb, 0,
588  $ 0, ictxt, max(1,tzrows0), ierr0 )
589  ipt0 = 1
590  ipz0 = ipt0 + max(1,tzrows0)*tzcols0
591  ipw0 = ipz0 + max(1,tzrows0)*tzcols0
592  CALL pdlamve( 'All', jw+iroffh, jw+iroffh, t, 1, 1,
593  $ desct, work(ipt0), 1, 1, desctz0, work(ipw0) )
594  CALL pdlaset( 'All', jw+iroffh, jw+iroffh, zero, one,
595  $ work(ipz0), 1, 1, desctz0 )
596  CALL pdlaqr1( .true., .true., jw+iroffh, 1,
597  $ jw+iroffh, work(ipt0), desctz0,
598  $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
599  $ 1, jw+iroffh, work(ipz0),
600  $ desctz0, work(ipw0), lwork-ipw0+1, iwork(nsel+1),
601  $ liwork-nsel, infqr )
602  CALL pdlamve( 'All', jw+iroffh, jw+iroffh, work(ipt0), 1,
603  $ 1, desctz0, t, 1, 1, desct, work(ipw0) )
604  CALL pdlamve( 'All', jw+iroffh, jw+iroffh, work(ipz0), 1,
605  $ 1, desctz0, v, 1, 1, descv, work(ipw0) )
606  END IF
607  END IF
608  ELSE
609 *
610 * The AED window is too small.
611 * Redistribute the AED window to a subgrid
612 * and do the computation on the subgrid.
613 *
614  ictxt_new = ictxt
615  DO 20 i = 0, npmin-1
616  DO 10 j = 0, npmin-1
617  pmap( j+1+i*npmin ) = blacs_pnum( ictxt, i, j )
618  10 CONTINUE
619  20 CONTINUE
620  CALL blacs_gridmap( ictxt_new, pmap, npmin, npmin, npmin )
621  CALL blacs_gridinfo( ictxt_new, npmin, npmin, myrow_new,
622  $ mycol_new )
623  IF( myrow.GE.npmin .OR. mycol.GE.npmin ) ictxt_new = -1
624  IF( ictxt_new.GE.0 ) THEN
625  tzrows0 = numroc( jw, nb, myrow_new, 0, npmin )
626  tzcols0 = numroc( jw, nb, mycol_new, 0, npmin )
627  CALL descinit( desctz0, jw, jw, nb, nb, 0,
628  $ 0, ictxt_new, max(1,tzrows0), ierr0 )
629  ipt0 = 1
630  ipz0 = ipt0 + max(1,tzrows0)*max(1,tzcols0)
631  ipw0 = ipz0 + max(1,tzrows0)*max(1,tzcols0)
632  ELSE
633  ipt0 = 1
634  ipz0 = 2
635  ipw0 = 3
636  desctz0( ctxt_ ) = -1
637  infqr = 0
638  END IF
639  CALL pdgemr2d( jw, jw, t, 1+iroffh, 1+iroffh, desct,
640  $ work(ipt0), 1, 1, desctz0, ictxt )
641  IF( ictxt_new.GE.0 ) THEN
642  CALL pdlaset( 'All', jw, jw, zero, one, work(ipz0), 1, 1,
643  $ desctz0 )
644  nmin = pilaenvx( ictxt_new, 12, 'PDLAQR3', 'SV', jw, 1, jw,
645  $ lwork )
646  IF( jw.GT.nmin .AND. jw.LE.nmax .AND. reclevel.LT.1 ) THEN
647  CALL pdlaqr0( .true., .true., jw, 1, jw, work(ipt0),
648  $ desctz0, sr( kwtop ), si( kwtop ), 1, jw,
649  $ work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
650  $ iwork(nsel+1), liwork-nsel, infqr,
651  $ reclevel+1 )
652  ELSE
653  CALL pdlaqr1( .true., .true., jw, 1, jw, work(ipt0),
654  $ desctz0, sr( kwtop ), si( kwtop ), 1, jw,
655  $ work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
656  $ iwork(nsel+1), liwork-nsel, infqr )
657  END IF
658  END IF
659  CALL pdgemr2d( jw, jw, work(ipt0), 1, 1, desctz0, t, 1+iroffh,
660  $ 1+iroffh, desct, ictxt )
661  CALL pdgemr2d( jw, jw, work(ipz0), 1, 1, desctz0, v, 1+iroffh,
662  $ 1+iroffh, descv, ictxt )
663  IF( ictxt_new.GE.0 )
664  $ CALL blacs_gridexit( ictxt_new )
665  IF( myrow+mycol.GT.0 ) THEN
666  DO 40 j = 0, jw-1
667  sr( kwtop+j ) = zero
668  si( kwtop+j ) = zero
669  40 CONTINUE
670  END IF
671  CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr, 1, -1, -1,
672  $ -1, -1, -1 )
673  CALL dgsum2d( ictxt, 'All', ' ', jw, 1, sr(kwtop), jw, -1, -1 )
674  CALL dgsum2d( ictxt, 'All', ' ', jw, 1, si(kwtop), jw, -1, -1 )
675  END IF
676 *
677 * Adjust INFQR for offset from block border in submatrices.
678 *
679  IF( infqr.NE.0 )
680  $ infqr = infqr - iroffh
681 *
682 * PDTRORD needs a clean margin near the diagonal.
683 *
684  DO 50 j = 1, jw - 3
685  CALL pdelset( t, j+2, j, desct, zero )
686  CALL pdelset( t, j+3, j, desct, zero )
687  50 CONTINUE
688  IF( jw.GT.2 )
689  $ CALL pdelset( t, jw, jw-2, desct, zero )
690 *
691 * Check local residual for AED Schur decomposition.
692 *
693  resaed = 0.0d+00
694 *
695 * Clean up the array SELECT for PDTRORD.
696 *
697  DO 60 j = 1, nsel
698  iwork( j ) = 0
699  60 CONTINUE
700 *
701 * Set local M counter to zero.
702 *
703  mloc = 0
704 *
705 * Outer deflation detection loop (label 80).
706 * In this loop a bunch of undeflatable eigenvalues
707 * are moved simultaneously.
708 *
709  DO 70 j = 1, iroffh + infqr
710  iwork( j ) = 1
711  70 CONTINUE
712 *
713  ns = jw
714  ilst = infqr + 1 + iroffh
715  IF( ilst.GT.1 ) THEN
716  CALL pdelget( 'All', '1-Tree', elem, t, ilst, ilst-1, desct )
717  bulge = elem.NE.zero
718  IF( bulge ) ilst = ilst+1
719  END IF
720 *
721  80 CONTINUE
722  IF( ilst.LE.ns+iroffh ) THEN
723 *
724 * Find the top-left corner of the local window.
725 *
726  lilst = max(ilst,ns+iroffh-nb+1)
727  IF( lilst.GT.1 ) THEN
728  CALL pdelget( 'All', '1-Tree', elem, t, lilst, lilst-1,
729  $ desct )
730  bulge = elem.NE.zero
731  IF( bulge ) lilst = lilst+1
732  END IF
733 *
734 * Lock all eigenvalues outside the local window.
735 *
736  DO 90 j = iroffh+1, lilst-1
737  iwork( j ) = 1
738  90 CONTINUE
739  lilst0 = lilst
740 *
741 * Inner deflation detection loop (label 100).
742 * In this loop, the undeflatable eigenvalues are moved to the
743 * top-left corner of the local window.
744 *
745  100 CONTINUE
746  IF( lilst.LE.ns+iroffh ) THEN
747  IF( ns.EQ.1 ) THEN
748  bulge = .false.
749  ELSE
750  CALL pdelget( 'All', '1-Tree', elem, t, ns+iroffh,
751  $ ns+iroffh-1, desct )
752  bulge = elem.NE.zero
753  END IF
754 *
755 * Small spike tip test for deflation.
756 *
757  IF( .NOT.bulge ) THEN
758 *
759 * Real eigenvalue.
760 *
761  CALL pdelget( 'All', '1-Tree', elem, t, ns+iroffh,
762  $ ns+iroffh, desct )
763  foo = abs( elem )
764  IF( foo.EQ.zero )
765  $ foo = abs( s )
766  CALL pdelget( 'All', '1-Tree', elem, v, 1+iroffh,
767  $ ns+iroffh, descv )
768  IF( abs( s*elem ).LE.max( smlnum, ulp*foo ) ) THEN
769 *
770 * Deflatable.
771 *
772  ns = ns - 1
773  ELSE
774 *
775 * Undeflatable: move it up out of the way.
776 *
777  ifst = ns
778  DO 110 j = lilst, jw+iroffh
779  iwork( j ) = 0
780  110 CONTINUE
781  iwork( ifst+iroffh ) = 1
782  CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
783  $ 1, desct, v, 1, 1, descv, work,
784  $ work(jw+iroffh+1), mloc,
785  $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
786  $ iwork(nsel+1), liwork-nsel, info )
787 *
788 * Adjust the array SELECT explicitly so that it does not
789 * rely on the output of PDTRORD.
790 *
791  iwork( ifst+iroffh ) = 0
792  iwork( lilst ) = 1
793  lilst = lilst + 1
794 *
795 * In case of a rare exchange failure, adjust the
796 * pointers ILST and LILST to the current place to avoid
797 * unexpected behaviors.
798 *
799  IF( info.NE.0 ) THEN
800  lilst = max(info, lilst)
801  ilst = max(info, ilst)
802  END IF
803  END IF
804  ELSE
805 *
806 * Complex conjugate pair.
807 *
808  CALL pdelget( 'All', '1-Tree', elem1, t, ns+iroffh,
809  $ ns+iroffh, desct )
810  CALL pdelget( 'All', '1-Tree', elem2, t, ns+iroffh,
811  $ ns+iroffh-1, desct )
812  CALL pdelget( 'All', '1-Tree', elem3, t, ns+iroffh-1,
813  $ ns+iroffh, desct )
814  foo = abs( elem1 ) + sqrt( abs( elem2 ) )*
815  $ sqrt( abs( elem3 ) )
816  IF( foo.EQ.zero )
817  $ foo = abs( s )
818  CALL pdelget( 'All', '1-Tree', elem1, v, 1+iroffh,
819  $ ns+iroffh, descv )
820  CALL pdelget( 'All', '1-Tree', elem2, v, 1+iroffh,
821  $ ns+iroffh-1, descv )
822  IF( max( abs( s*elem1 ), abs( s*elem2 ) ).LE.
823  $ max( smlnum, ulp*foo ) ) THEN
824 *
825 * Deflatable.
826 *
827  ns = ns - 2
828  ELSE
829 *
830 * Undeflatable: move them up out of the way.
831 *
832  ifst = ns
833  DO 120 j = lilst, jw+iroffh
834  iwork( j ) = 0
835  120 CONTINUE
836  iwork( ifst+iroffh ) = 1
837  iwork( ifst+iroffh-1 ) = 1
838  CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
839  $ 1, desct, v, 1, 1, descv, work,
840  $ work(jw+iroffh+1), mloc,
841  $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
842  $ iwork(nsel+1), liwork-nsel, info )
843 *
844 * Adjust the array SELECT explicitly so that it does not
845 * rely on the output of PDTRORD.
846 *
847  iwork( ifst+iroffh ) = 0
848  iwork( ifst+iroffh-1 ) = 0
849  iwork( lilst ) = 1
850  iwork( lilst+1 ) = 1
851  lilst = lilst + 2
852 *
853 * In case of a rare exchange failure, adjust the
854 * pointers ILST and LILST to the current place to avoid
855 * unexpected behaviors.
856 *
857  IF( info.NE.0 ) THEN
858  lilst = max(info, lilst)
859  ilst = max(info, ilst)
860  END IF
861  END IF
862  END IF
863 *
864 * End of inner deflation detection loop.
865 *
866  GO TO 100
867  END IF
868 *
869 * Unlock the eigenvalues outside the local window.
870 * Then undeflatable eigenvalues are moved to the proper position.
871 *
872  DO 130 j = ilst, lilst0-1
873  iwork( j ) = 0
874  130 CONTINUE
875  CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
876  $ desct, v, 1, 1, descv, work, work(jw+iroffh+1),
877  $ m, work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
878  $ iwork(nsel+1), liwork-nsel, info )
879  ilst = m + 1
880 *
881 * In case of a rare exchange failure, adjust the pointer ILST to
882 * the current place to avoid unexpected behaviors.
883 *
884  IF( info.NE.0 )
885  $ ilst = max(info, ilst)
886 *
887 * End of outer deflation detection loop.
888 *
889  GO TO 80
890  END IF
891 
892 *
893 * Post-reordering step: copy output eigenvalues to output.
894 *
895  CALL dcopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
896  CALL dcopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
897 *
898 * Check local residual for reordered AED Schur decomposition.
899 *
900  resaed = 0.0d+00
901 *
902 * Return to Hessenberg form.
903 *
904  IF( ns.EQ.0 )
905  $ s = zero
906 *
907  IF( ns.LT.jw .AND. sortgrad ) THEN
908 *
909 * Sorting diagonal blocks of T improves accuracy for
910 * graded matrices. Bubble sort deals well with exchange
911 * failures. Eigenvalues/shifts from T are also restored.
912 *
913  round = 0
914  sorted = .false.
915  i = ns + 1 + iroffh
916  140 CONTINUE
917  IF( sorted )
918  $ GO TO 180
919  sorted = .true.
920  round = round + 1
921 *
922  kend = i - 1
923  i = infqr + 1 + iroffh
924  IF( i.EQ.ns+iroffh ) THEN
925  k = i + 1
926  ELSE IF( si( kwtop-iroffh + i-1 ).EQ.zero ) THEN
927  k = i + 1
928  ELSE
929  k = i + 2
930  END IF
931  150 CONTINUE
932  IF( k.LE.kend ) THEN
933  IF( k.EQ.i+1 ) THEN
934  evi = abs( sr( kwtop-iroffh+i-1 ) )
935  ELSE
936  evi = abs( sr( kwtop-iroffh+i-1 ) ) +
937  $ abs( si( kwtop-iroffh+i-1 ) )
938  END IF
939 *
940  IF( k.EQ.kend ) THEN
941  evk = abs( sr( kwtop-iroffh+k-1 ) )
942  ELSEIF( si( kwtop-iroffh+k-1 ).EQ.zero ) THEN
943  evk = abs( sr( kwtop-iroffh+k-1 ) )
944  ELSE
945  evk = abs( sr( kwtop-iroffh+k-1 ) ) +
946  $ abs( si( kwtop-iroffh+k-1 ) )
947  END IF
948 *
949  IF( evi.GE.evk ) THEN
950  i = k
951  ELSE
952  mloc = 0
953  sorted = .false.
954  ifst = i
955  ilst = k
956  DO 160 j = 1, i-1
957  iwork( j ) = 1
958  mloc = mloc + 1
959  160 CONTINUE
960  IF( k.EQ.i+2 ) THEN
961  iwork( i ) = 0
962  iwork(i+1) = 0
963  ELSE
964  iwork( i ) = 0
965  END IF
966  IF( k.NE.kend .AND. si( kwtop-iroffh+k-1 ).NE.zero ) THEN
967  iwork( k ) = 1
968  iwork(k+1) = 1
969  mloc = mloc + 2
970  ELSE
971  iwork( k ) = 1
972  IF( k.LT.kend ) iwork(k+1) = 0
973  mloc = mloc + 1
974  END IF
975  DO 170 j = k+2, jw+iroffh
976  iwork( j ) = 0
977  170 CONTINUE
978  CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
979  $ desct, v, 1, 1, descv, work, work(jw+iroffh+1), m,
980  $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
981  $ iwork(nsel+1), liwork-nsel, ierr )
982  CALL dcopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
983  CALL dcopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
984  IF( ierr.EQ.0 ) THEN
985  i = ilst
986  ELSE
987  i = k
988  END IF
989  END IF
990  IF( i.EQ.kend ) THEN
991  k = i + 1
992  ELSE IF( si( kwtop-iroffh+i-1 ).EQ.zero ) THEN
993  k = i + 1
994  ELSE
995  k = i + 2
996  END IF
997  GO TO 150
998  END IF
999  GO TO 140
1000  180 CONTINUE
1001  END IF
1002 *
1003 * Restore number of rows and columns of T matrix descriptor.
1004 *
1005  desct( m_ ) = nw+iroffh
1006  desct( n_ ) = nh+iroffh
1007 *
1008  IF( ns.LT.jw .OR. s.EQ.zero ) THEN
1009  IF( ns.GT.1 .AND. s.NE.zero ) THEN
1010 *
1011 * Reflect spike back into lower triangle.
1012 *
1013  rrows = numroc( ns+iroffh, nb, myrow, descv(rsrc_), nprow )
1014  rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
1015  CALL descinit( descr, ns+iroffh, 1, nb, 1, descv(rsrc_),
1016  $ descv(csrc_), ictxt, max(1, rrows), info )
1017  taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
1018  taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
1019  $ npcol )
1020  CALL descinit( desctau, 1, jw+iroffh, 1, nb, descv(rsrc_),
1021  $ descv(csrc_), ictxt, max(1, taurows), info )
1022 *
1023  ir = 1
1024  itau = ir + descr( lld_ ) * rcols
1025  ipw = itau + desctau( lld_ ) * taucols
1026 *
1027  CALL pdlaset( 'All', ns+iroffh, 1, zero, zero, work(itau),
1028  $ 1, 1, desctau )
1029 *
1030  CALL pdcopy( ns, v, 1+iroffh, 1+iroffh, descv, descv(m_),
1031  $ work(ir), 1+iroffh, 1, descr, 1 )
1032  CALL pdlarfg( ns, beta, 1+iroffh, 1, work(ir), 2+iroffh, 1,
1033  $ descr, 1, work(itau+iroffh) )
1034  CALL pdelset( work(ir), 1+iroffh, 1, descr, one )
1035 *
1036  CALL pdlaset( 'Lower', jw-2, jw-2, zero, zero, t, 3+iroffh,
1037  $ 1+iroffh, desct )
1038 *
1039  CALL pdlarf( 'Left', ns, jw, work(ir), 1+iroffh, 1, descr,
1040  $ 1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
1041  $ desct, work( ipw ) )
1042  CALL pdlarf( 'Right', ns, ns, work(ir), 1+iroffh, 1, descr,
1043  $ 1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
1044  $ desct, work( ipw ) )
1045  CALL pdlarf( 'Right', jw, ns, work(ir), 1+iroffh, 1, descr,
1046  $ 1, work(itau+iroffh), v, 1+iroffh, 1+iroffh,
1047  $ descv, work( ipw ) )
1048 *
1049  itau = 1
1050  ipw = itau + desctau( lld_ ) * taucols
1051  CALL pdgehrd( jw+iroffh, 1+iroffh, ns+iroffh, t, 1, 1,
1052  $ desct, work(itau), work( ipw ), lwork-ipw+1, info )
1053  END IF
1054 *
1055 * Copy updated reduced window into place.
1056 *
1057  IF( kwtop.GT.1 ) THEN
1058  CALL pdelget( 'All', '1-Tree', elem, v, 1+iroffh,
1059  $ 1+iroffh, descv )
1060  CALL pdelset( h, kwtop, kwtop-1, desch, s*elem )
1061  END IF
1062  CALL pdlacpy( 'Upper', jw-1, jw-1, t, 1+iroffh+1, 1+iroffh,
1063  $ desct, h, kwtop+1, kwtop, desch )
1064  CALL pdlacpy( 'All', 1, jw, t, 1+iroffh, 1+iroffh, desct, h,
1065  $ kwtop, kwtop, desch )
1066  CALL pdlacpy( 'All', jw-1, 1, t, 1+iroffh+1, 1+iroffh+jw-1,
1067  $ desct, h, kwtop+1, kwtop+jw-1, desch )
1068 *
1069 * Accumulate orthogonal matrix in order to update
1070 * H and Z, if requested.
1071 *
1072  IF( ns.GT.1 .AND. s.NE.zero ) THEN
1073  CALL pdormhr( 'Right', 'No', jw+iroffh, ns+iroffh, 1+iroffh,
1074  $ ns+iroffh, t, 1, 1, desct, work(itau), v, 1,
1075  $ 1, descv, work( ipw ), lwork-ipw+1, info )
1076  END IF
1077 *
1078 * Update vertical slab in H.
1079 *
1080  IF( wantt ) THEN
1081  ltop = 1
1082  ELSE
1083  ltop = ktop
1084  END IF
1085  kln = max( 0, kwtop-ltop )
1086  iroffhh = mod( ltop-1, nb )
1087  icoffhh = mod( kwtop-1, nb )
1088  hhrsrc = indxg2p( ltop, nb, myrow, desch(rsrc_), nprow )
1089  hhcsrc = indxg2p( kwtop, nb, mycol, desch(csrc_), npcol )
1090  hhrows = numroc( kln+iroffhh, nb, myrow, hhrsrc, nprow )
1091  hhcols = numroc( jw+icoffhh, nb, mycol, hhcsrc, npcol )
1092  CALL descinit( deschh, kln+iroffhh, jw+icoffhh, nb, nb,
1093  $ hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
1094  CALL pdgemm( 'No', 'No', kln, jw, jw, one, h, ltop,
1095  $ kwtop, desch, v, 1+iroffh, 1+iroffh, descv, zero,
1096  $ work, 1+iroffhh, 1+icoffhh, deschh )
1097  CALL pdlacpy( 'All', kln, jw, work, 1+iroffhh, 1+icoffhh,
1098  $ deschh, h, ltop, kwtop, desch )
1099 *
1100 * Update horizontal slab in H.
1101 *
1102  IF( wantt ) THEN
1103  kln = n-kbot
1104  iroffhh = mod( kwtop-1, nb )
1105  icoffhh = mod( kbot, nb )
1106  hhrsrc = indxg2p( kwtop, nb, myrow, desch(rsrc_), nprow )
1107  hhcsrc = indxg2p( kbot+1, nb, mycol, desch(csrc_), npcol )
1108  hhrows = numroc( jw+iroffhh, nb, myrow, hhrsrc, nprow )
1109  hhcols = numroc( kln+icoffhh, nb, mycol, hhcsrc, npcol )
1110  CALL descinit( deschh, jw+iroffhh, kln+icoffhh, nb, nb,
1111  $ hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
1112  CALL pdgemm( 'Tr', 'No', jw, kln, jw, one, v,
1113  $ 1+iroffh, 1+iroffh, descv, h, kwtop, kbot+1,
1114  $ desch, zero, work, 1+iroffhh, 1+icoffhh, deschh )
1115  CALL pdlacpy( 'All', jw, kln, work, 1+iroffhh, 1+icoffhh,
1116  $ deschh, h, kwtop, kbot+1, desch )
1117  END IF
1118 *
1119 * Update vertical slab in Z.
1120 *
1121  IF( wantz ) THEN
1122  kln = ihiz-iloz+1
1123  iroffzz = mod( iloz-1, nb )
1124  icoffzz = mod( kwtop-1, nb )
1125  zzrsrc = indxg2p( iloz, nb, myrow, descz(rsrc_), nprow )
1126  zzcsrc = indxg2p( kwtop, nb, mycol, descz(csrc_), npcol )
1127  zzrows = numroc( kln+iroffzz, nb, myrow, zzrsrc, nprow )
1128  zzcols = numroc( jw+icoffzz, nb, mycol, zzcsrc, npcol )
1129  CALL descinit( desczz, kln+iroffzz, jw+icoffzz, nb, nb,
1130  $ zzrsrc, zzcsrc, ictxt, max(1, zzrows), ierr )
1131  CALL pdgemm( 'No', 'No', kln, jw, jw, one, z, iloz,
1132  $ kwtop, descz, v, 1+iroffh, 1+iroffh, descv,
1133  $ zero, work, 1+iroffzz, 1+icoffzz, desczz )
1134  CALL pdlacpy( 'All', kln, jw, work, 1+iroffzz, 1+icoffzz,
1135  $ desczz, z, iloz, kwtop, descz )
1136  END IF
1137  END IF
1138 *
1139 * Return the number of deflations (ND) and the number of shifts (NS).
1140 * (Subtracting INFQR from the spike length takes care of the case of
1141 * a rare QR failure while calculating eigenvalues of the deflation
1142 * window.)
1143 *
1144  nd = jw - ns
1145  ns = ns - infqr
1146 *
1147 * Return optimal workspace.
1148 *
1149  work( 1 ) = dble( lwkopt )
1150  iwork( 1 ) = ilwkopt + nsel
1151 *
1152 * End of PDLAQR3
1153 *
1154  END
pdlarf
subroutine pdlarf(SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, C, IC, JC, DESCC, WORK)
Definition: pdlarf.f:3
indxg2p
integer function indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
Definition: indxg2p.f:2
max
#define max(A, B)
Definition: pcgemr.c:180
pdormhr
subroutine pdormhr(SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, TAU, C, IC, JC, DESCC, WORK, LWORK, INFO)
Definition: pdormhr.f:3
pilaenvx
integer function pilaenvx(ICTXT, ISPEC, NAME, OPTS, N1, N2, N3, N4)
Definition: pilaenvx.f:3
dlamch
double precision function dlamch(CMACH)
Definition: tools.f:10
pdelget
subroutine pdelget(SCOPE, TOP, ALPHA, A, IA, JA, DESCA)
Definition: pdelget.f:2
pdlaqr1
recursive subroutine pdlaqr1(WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK, ILWORK, INFO)
Definition: pdlaqr1.f:5
pdtrord
subroutine pdtrord(COMPQ, SELECT, PARA, N, T, IT, JT, DESCT, Q, IQ, JQ, DESCQ, WR, WI, M, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: pdtrord.f:4
pdgehrd
subroutine pdgehrd(N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pdgehrd.f:3
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
pdlaqr3
recursive subroutine pdlaqr3(WANTT, WANTZ, N, KTOP, KBOT, NW, H, DESCH, ILOZ, IHIZ, Z, DESCZ, NS, ND, SR, SI, V, DESCV, NH, T, DESCT, NV, WV, DESCW, WORK, LWORK, IWORK, LIWORK, RECLEVEL)
Definition: pdlaqr3.f:6
pdlange
double precision function pdlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pdlange.f:3
pdlaqr0
recursive subroutine pdlaqr0(WANTT, WANTZ, N, ILO, IHI, H, DESCH, WR, WI, ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK, LIWORK, INFO, RECLEVEL)
Definition: pdlaqr0.f:4
pdlaset
subroutine pdlaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: pdblastst.f:6862
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pdrot
subroutine pdrot(N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, CS, SN, WORK, LWORK, INFO)
Definition: pdrot.f:3
pdlamve
subroutine pdlamve(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB, DWORK)
Definition: pdlamve.f:3
pdlacpy
subroutine pdlacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pdlacpy.f:3
pdlarfg
subroutine pdlarfg(N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, TAU)
Definition: pdlarfg.f:3
pdelset
subroutine pdelset(A, IA, JA, DESCA, ALPHA)
Definition: pdelset.f:2
min
#define min(A, B)
Definition: pcgemr.c:181
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2