LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
dlaqr5.f
Go to the documentation of this file.
1*> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep.
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> Download DLAQR5 + dependencies
9*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f">
10*> [TGZ]</a>
11*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f">
12*> [ZIP]</a>
13*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f">
14*> [TXT]</a>
15*
16* Definition:
17* ===========
18*
19* SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
20* SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
21* LDU, NV, WV, LDWV, NH, WH, LDWH )
22*
23* .. Scalar Arguments ..
24* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
25* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
26* LOGICAL WANTT, WANTZ
27* ..
28* .. Array Arguments ..
29* DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
30* $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
31* $ Z( LDZ, * )
32* ..
33*
34*
35*> \par Purpose:
36* =============
37*>
38*> \verbatim
39*>
40*> DLAQR5, called by DLAQR0, performs a
41*> single small-bulge multi-shift QR sweep.
42*> \endverbatim
43*
44* Arguments:
45* ==========
46*
47*> \param[in] WANTT
48*> \verbatim
49*> WANTT is LOGICAL
50*> WANTT = .true. if the quasi-triangular Schur factor
51*> is being computed. WANTT is set to .false. otherwise.
52*> \endverbatim
53*>
54*> \param[in] WANTZ
55*> \verbatim
56*> WANTZ is LOGICAL
57*> WANTZ = .true. if the orthogonal Schur factor is being
58*> computed. WANTZ is set to .false. otherwise.
59*> \endverbatim
60*>
61*> \param[in] KACC22
62*> \verbatim
63*> KACC22 is INTEGER with value 0, 1, or 2.
64*> Specifies the computation mode of far-from-diagonal
65*> orthogonal updates.
66*> = 0: DLAQR5 does not accumulate reflections and does not
67*> use matrix-matrix multiply to update far-from-diagonal
68*> matrix entries.
69*> = 1: DLAQR5 accumulates reflections and uses matrix-matrix
70*> multiply to update the far-from-diagonal matrix entries.
71*> = 2: Same as KACC22 = 1. This option used to enable exploiting
72*> the 2-by-2 structure during matrix multiplications, but
73*> this is no longer supported.
74*> \endverbatim
75*>
76*> \param[in] N
77*> \verbatim
78*> N is INTEGER
79*> N is the order of the Hessenberg matrix H upon which this
80*> subroutine operates.
81*> \endverbatim
82*>
83*> \param[in] KTOP
84*> \verbatim
85*> KTOP is INTEGER
86*> \endverbatim
87*>
88*> \param[in] KBOT
89*> \verbatim
90*> KBOT is INTEGER
91*> These are the first and last rows and columns of an
92*> isolated diagonal block upon which the QR sweep is to be
93*> applied. It is assumed without a check that
94*> either KTOP = 1 or H(KTOP,KTOP-1) = 0
95*> and
96*> either KBOT = N or H(KBOT+1,KBOT) = 0.
97*> \endverbatim
98*>
99*> \param[in] NSHFTS
100*> \verbatim
101*> NSHFTS is INTEGER
102*> NSHFTS gives the number of simultaneous shifts. NSHFTS
103*> must be positive and even.
104*> \endverbatim
105*>
106*> \param[in,out] SR
107*> \verbatim
108*> SR is DOUBLE PRECISION array, dimension (NSHFTS)
109*> \endverbatim
110*>
111*> \param[in,out] SI
112*> \verbatim
113*> SI is DOUBLE PRECISION array, dimension (NSHFTS)
114*> SR contains the real parts and SI contains the imaginary
115*> parts of the NSHFTS shifts of origin that define the
116*> multi-shift QR sweep. On output SR and SI may be
117*> reordered.
118*> \endverbatim
119*>
120*> \param[in,out] H
121*> \verbatim
122*> H is DOUBLE PRECISION array, dimension (LDH,N)
123*> On input H contains a Hessenberg matrix. On output a
124*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
125*> to the isolated diagonal block in rows and columns KTOP
126*> through KBOT.
127*> \endverbatim
128*>
129*> \param[in] LDH
130*> \verbatim
131*> LDH is INTEGER
132*> LDH is the leading dimension of H just as declared in the
133*> calling procedure. LDH >= MAX(1,N).
134*> \endverbatim
135*>
136*> \param[in] ILOZ
137*> \verbatim
138*> ILOZ is INTEGER
139*> \endverbatim
140*>
141*> \param[in] IHIZ
142*> \verbatim
143*> IHIZ is INTEGER
144*> Specify the rows of Z to which transformations must be
145*> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
146*> \endverbatim
147*>
148*> \param[in,out] Z
149*> \verbatim
150*> Z is DOUBLE PRECISION array, dimension (LDZ,IHIZ)
151*> If WANTZ = .TRUE., then the QR Sweep orthogonal
152*> similarity transformation is accumulated into
153*> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
154*> If WANTZ = .FALSE., then Z is unreferenced.
155*> \endverbatim
156*>
157*> \param[in] LDZ
158*> \verbatim
159*> LDZ is INTEGER
160*> LDA is the leading dimension of Z just as declared in
161*> the calling procedure. LDZ >= N.
162*> \endverbatim
163*>
164*> \param[out] V
165*> \verbatim
166*> V is DOUBLE PRECISION array, dimension (LDV,NSHFTS/2)
167*> \endverbatim
168*>
169*> \param[in] LDV
170*> \verbatim
171*> LDV is INTEGER
172*> LDV is the leading dimension of V as declared in the
173*> calling procedure. LDV >= 3.
174*> \endverbatim
175*>
176*> \param[out] U
177*> \verbatim
178*> U is DOUBLE PRECISION array, dimension (LDU,2*NSHFTS)
179*> \endverbatim
180*>
181*> \param[in] LDU
182*> \verbatim
183*> LDU is INTEGER
184*> LDU is the leading dimension of U just as declared in the
185*> in the calling subroutine. LDU >= 2*NSHFTS.
186*> \endverbatim
187*>
188*> \param[in] NV
189*> \verbatim
190*> NV is INTEGER
191*> NV is the number of rows in WV agailable for workspace.
192*> NV >= 1.
193*> \endverbatim
194*>
195*> \param[out] WV
196*> \verbatim
197*> WV is DOUBLE PRECISION array, dimension (LDWV,2*NSHFTS)
198*> \endverbatim
199*>
200*> \param[in] LDWV
201*> \verbatim
202*> LDWV is INTEGER
203*> LDWV is the leading dimension of WV as declared in the
204*> in the calling subroutine. LDWV >= NV.
205*> \endverbatim
206*
207*> \param[in] NH
208*> \verbatim
209*> NH is INTEGER
210*> NH is the number of columns in array WH available for
211*> workspace. NH >= 1.
212*> \endverbatim
213*>
214*> \param[out] WH
215*> \verbatim
216*> WH is DOUBLE PRECISION array, dimension (LDWH,NH)
217*> \endverbatim
218*>
219*> \param[in] LDWH
220*> \verbatim
221*> LDWH is INTEGER
222*> Leading dimension of WH just as declared in the
223*> calling procedure. LDWH >= 2*NSHFTS.
224*> \endverbatim
225*>
226* Authors:
227* ========
228*
229*> \author Univ. of Tennessee
230*> \author Univ. of California Berkeley
231*> \author Univ. of Colorado Denver
232*> \author NAG Ltd.
233*
234*> \ingroup laqr5
235*
236*> \par Contributors:
237* ==================
238*>
239*> Karen Braman and Ralph Byers, Department of Mathematics,
240*> University of Kansas, USA
241*>
242*> Lars Karlsson, Daniel Kressner, and Bruno Lang
243*>
244*> Thijs Steel, Department of Computer science,
245*> KU Leuven, Belgium
246*
247*> \par References:
248* ================
249*>
250*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
251*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
252*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
253*> 929--947, 2002.
254*>
255*> Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
256*> chains of bulges in multishift QR algorithms.
257*> ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
258*>
259* =====================================================================
260 SUBROUTINE dlaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
261 $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
262 $ LDU, NV, WV, LDWV, NH, WH, LDWH )
263 IMPLICIT NONE
264*
265* -- LAPACK auxiliary routine --
266* -- LAPACK is a software package provided by Univ. of Tennessee, --
267* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
268*
269* .. Scalar Arguments ..
270 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
271 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
272 LOGICAL WANTT, WANTZ
273* ..
274* .. Array Arguments ..
275 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
276 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
277 $ z( ldz, * )
278* ..
279*
280* ================================================================
281* .. Parameters ..
282 DOUBLE PRECISION ZERO, ONE
283 PARAMETER ( ZERO = 0.0d0, one = 1.0d0 )
284* ..
285* .. Local Scalars ..
286 DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
287 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, T1, T2,
288 $ t3, tst1, tst2, ulp
289 INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
290 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
291 $ m, m22, mbot, mtop, nbmps, ndcol,
292 $ ns, nu
293 LOGICAL ACCUM, BMP22
294* ..
295* .. External Functions ..
296 DOUBLE PRECISION DLAMCH
297 EXTERNAL DLAMCH
298* ..
299* .. Intrinsic Functions ..
300*
301 INTRINSIC abs, dble, max, min, mod
302* ..
303* .. Local Arrays ..
304 DOUBLE PRECISION VT( 3 )
305* ..
306* .. External Subroutines ..
307 EXTERNAL dgemm, dlacpy, dlaqr1, dlarfg, dlaset,
308 $ dtrmm
309* ..
310* .. Executable Statements ..
311*
312* ==== If there are no shifts, then there is nothing to do. ====
313*
314 IF( nshfts.LT.2 )
315 $ RETURN
316*
317* ==== If the active block is empty or 1-by-1, then there
318* . is nothing to do. ====
319*
320 IF( ktop.GE.kbot )
321 $ RETURN
322*
323* ==== Shuffle shifts into pairs of real shifts and pairs
324* . of complex conjugate shifts assuming complex
325* . conjugate shifts are already adjacent to one
326* . another. ====
327*
328 DO 10 i = 1, nshfts - 2, 2
329 IF( si( i ).NE.-si( i+1 ) ) THEN
330*
331 swap = sr( i )
332 sr( i ) = sr( i+1 )
333 sr( i+1 ) = sr( i+2 )
334 sr( i+2 ) = swap
335*
336 swap = si( i )
337 si( i ) = si( i+1 )
338 si( i+1 ) = si( i+2 )
339 si( i+2 ) = swap
340 END IF
341 10 CONTINUE
342*
343* ==== NSHFTS is supposed to be even, but if it is odd,
344* . then simply reduce it by one. The shuffle above
345* . ensures that the dropped shift is real and that
346* . the remaining shifts are paired. ====
347*
348 ns = nshfts - mod( nshfts, 2 )
349*
350* ==== Machine constants for deflation ====
351*
352 safmin = dlamch( 'SAFE MINIMUM' )
353 safmax = one / safmin
354 ulp = dlamch( 'PRECISION' )
355 smlnum = safmin*( dble( n ) / ulp )
356*
357* ==== Use accumulated reflections to update far-from-diagonal
358* . entries ? ====
359*
360 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
361*
362* ==== clear trash ====
363*
364 IF( ktop+2.LE.kbot )
365 $ h( ktop+2, ktop ) = zero
366*
367* ==== NBMPS = number of 2-shift bulges in the chain ====
368*
369 nbmps = ns / 2
370*
371* ==== KDU = width of slab ====
372*
373 kdu = 4*nbmps
374*
375* ==== Create and chase chains of NBMPS bulges ====
376*
377 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
378*
379* JTOP = Index from which updates from the right start.
380*
381 IF( accum ) THEN
382 jtop = max( ktop, incol )
383 ELSE IF( wantt ) THEN
384 jtop = 1
385 ELSE
386 jtop = ktop
387 END IF
388*
389 ndcol = incol + kdu
390 IF( accum )
391 $ CALL dlaset( 'ALL', kdu, kdu, zero, one, u, ldu )
392*
393* ==== Near-the-diagonal bulge chase. The following loop
394* . performs the near-the-diagonal part of a small bulge
395* . multi-shift QR sweep. Each 4*NBMPS column diagonal
396* . chunk extends from column INCOL to column NDCOL
397* . (including both column INCOL and column NDCOL). The
398* . following loop chases a 2*NBMPS+1 column long chain of
399* . NBMPS bulges 2*NBMPS columns to the right. (INCOL
400* . may be less than KTOP and and NDCOL may be greater than
401* . KBOT indicating phantom columns from which to chase
402* . bulges before they are actually introduced or to which
403* . to chase bulges beyond column KBOT.) ====
404*
405 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
406*
407* ==== Bulges number MTOP to MBOT are active double implicit
408* . shift bulges. There may or may not also be small
409* . 2-by-2 bulge, if there is room. The inactive bulges
410* . (if any) must wait until the active bulges have moved
411* . down the diagonal to make room. The phantom matrix
412* . paradigm described above helps keep track. ====
413*
414 mtop = max( 1, ( ktop-krcol ) / 2+1 )
415 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
416 m22 = mbot + 1
417 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
418 $ ( kbot-2 )
419*
420* ==== Generate reflections to chase the chain right
421* . one column. (The minimum value of K is KTOP-1.) ====
422*
423 IF ( bmp22 ) THEN
424*
425* ==== Special case: 2-by-2 reflection at bottom treated
426* . separately ====
427*
428 k = krcol + 2*( m22-1 )
429 IF( k.EQ.ktop-1 ) THEN
430 CALL dlaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
431 $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
432 $ v( 1, m22 ) )
433 beta = v( 1, m22 )
434 CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
435 ELSE
436 beta = h( k+1, k )
437 v( 2, m22 ) = h( k+2, k )
438 CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
439 h( k+1, k ) = beta
440 h( k+2, k ) = zero
441 END IF
442
443*
444* ==== Perform update from right within
445* . computational window. ====
446*
447 t1 = v( 1, m22 )
448 t2 = t1*v( 2, m22 )
449 DO 30 j = jtop, min( kbot, k+3 )
450 refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
451 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
452 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
453 30 CONTINUE
454*
455* ==== Perform update from left within
456* . computational window. ====
457*
458 IF( accum ) THEN
459 jbot = min( ndcol, kbot )
460 ELSE IF( wantt ) THEN
461 jbot = n
462 ELSE
463 jbot = kbot
464 END IF
465 t1 = v( 1, m22 )
466 t2 = t1*v( 2, m22 )
467 DO 40 j = k+1, jbot
468 refsum = h( k+1, j ) + v( 2, m22 )*h( k+2, j )
469 h( k+1, j ) = h( k+1, j ) - refsum*t1
470 h( k+2, j ) = h( k+2, j ) - refsum*t2
471 40 CONTINUE
472*
473* ==== The following convergence test requires that
474* . the tradition small-compared-to-nearby-diagonals
475* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
476* . criteria both be satisfied. The latter improves
477* . accuracy in some examples. Falling back on an
478* . alternate convergence criterion when TST1 or TST2
479* . is zero (as done here) is traditional but probably
480* . unnecessary. ====
481*
482 IF( k.GE.ktop ) THEN
483 IF( h( k+1, k ).NE.zero ) THEN
484 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
485 IF( tst1.EQ.zero ) THEN
486 IF( k.GE.ktop+1 )
487 $ tst1 = tst1 + abs( h( k, k-1 ) )
488 IF( k.GE.ktop+2 )
489 $ tst1 = tst1 + abs( h( k, k-2 ) )
490 IF( k.GE.ktop+3 )
491 $ tst1 = tst1 + abs( h( k, k-3 ) )
492 IF( k.LE.kbot-2 )
493 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
494 IF( k.LE.kbot-3 )
495 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
496 IF( k.LE.kbot-4 )
497 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
498 END IF
499 IF( abs( h( k+1, k ) )
500 $ .LE.max( smlnum, ulp*tst1 ) ) THEN
501 h12 = max( abs( h( k+1, k ) ),
502 $ abs( h( k, k+1 ) ) )
503 h21 = min( abs( h( k+1, k ) ),
504 $ abs( h( k, k+1 ) ) )
505 h11 = max( abs( h( k+1, k+1 ) ),
506 $ abs( h( k, k )-h( k+1, k+1 ) ) )
507 h22 = min( abs( h( k+1, k+1 ) ),
508 $ abs( h( k, k )-h( k+1, k+1 ) ) )
509 scl = h11 + h12
510 tst2 = h22*( h11 / scl )
511*
512 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
513 $ max( smlnum, ulp*tst2 ) ) THEN
514 h( k+1, k ) = zero
515 END IF
516 END IF
517 END IF
518 END IF
519*
520* ==== Accumulate orthogonal transformations. ====
521*
522 IF( accum ) THEN
523 kms = k - incol
524 t1 = v( 1, m22 )
525 t2 = t1*v( 2, m22 )
526 DO 50 j = max( 1, ktop-incol ), kdu
527 refsum = u( j, kms+1 ) + v( 2, m22 )*u( j, kms+2 )
528 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
529 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
530 50 CONTINUE
531 ELSE IF( wantz ) THEN
532 t1 = v( 1, m22 )
533 t2 = t1*v( 2, m22 )
534 DO 60 j = iloz, ihiz
535 refsum = z( j, k+1 )+v( 2, m22 )*z( j, k+2 )
536 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
537 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
538 60 CONTINUE
539 END IF
540 END IF
541*
542* ==== Normal case: Chain of 3-by-3 reflections ====
543*
544 DO 80 m = mbot, mtop, -1
545 k = krcol + 2*( m-1 )
546 IF( k.EQ.ktop-1 ) THEN
547 CALL dlaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
548 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
549 $ v( 1, m ) )
550 alpha = v( 1, m )
551 CALL dlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
552 ELSE
553*
554* ==== Perform delayed transformation of row below
555* . Mth bulge. Exploit fact that first two elements
556* . of row are actually zero. ====
557*
558 t1 = v( 1, m )
559 t2 = t1*v( 2, m )
560 t3 = t1*v( 3, m )
561 refsum = v( 3, m )*h( k+3, k+2 )
562 h( k+3, k ) = -refsum*t1
563 h( k+3, k+1 ) = -refsum*t2
564 h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*t3
565*
566* ==== Calculate reflection to move
567* . Mth bulge one step. ====
568*
569 beta = h( k+1, k )
570 v( 2, m ) = h( k+2, k )
571 v( 3, m ) = h( k+3, k )
572 CALL dlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
573*
574* ==== A Bulge may collapse because of vigilant
575* . deflation or destructive underflow. In the
576* . underflow case, try the two-small-subdiagonals
577* . trick to try to reinflate the bulge. ====
578*
579 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
580 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
581*
582* ==== Typical case: not collapsed (yet). ====
583*
584 h( k+1, k ) = beta
585 h( k+2, k ) = zero
586 h( k+3, k ) = zero
587 ELSE
588*
589* ==== Atypical case: collapsed. Attempt to
590* . reintroduce ignoring H(K+1,K) and H(K+2,K).
591* . If the fill resulting from the new
592* . reflector is too large, then abandon it.
593* . Otherwise, use the new one. ====
594*
595 CALL dlaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
596 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
597 $ vt )
598 alpha = vt( 1 )
599 CALL dlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
600 t1 = vt( 1 )
601 t2 = t1*vt( 2 )
602 t3 = t1*vt( 3 )
603 refsum = h( k+1, k ) + vt( 2 )*h( k+2, k )
604*
605 IF( abs( h( k+2, k )-refsum*t2 )+
606 $ abs( refsum*t3 ).GT.ulp*
607 $ ( abs( h( k, k ) )+abs( h( k+1,
608 $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
609*
610* ==== Starting a new bulge here would
611* . create non-negligible fill. Use
612* . the old one with trepidation. ====
613*
614 h( k+1, k ) = beta
615 h( k+2, k ) = zero
616 h( k+3, k ) = zero
617 ELSE
618*
619* ==== Starting a new bulge here would
620* . create only negligible fill.
621* . Replace the old reflector with
622* . the new one. ====
623*
624 h( k+1, k ) = h( k+1, k ) - refsum*t1
625 h( k+2, k ) = zero
626 h( k+3, k ) = zero
627 v( 1, m ) = vt( 1 )
628 v( 2, m ) = vt( 2 )
629 v( 3, m ) = vt( 3 )
630 END IF
631 END IF
632 END IF
633*
634* ==== Apply reflection from the right and
635* . the first column of update from the left.
636* . These updates are required for the vigilant
637* . deflation check. We still delay most of the
638* . updates from the left for efficiency. ====
639*
640 t1 = v( 1, m )
641 t2 = t1*v( 2, m )
642 t3 = t1*v( 3, m )
643 DO 70 j = jtop, min( kbot, k+3 )
644 refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
645 $ + v( 3, m )*h( j, k+3 )
646 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
647 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
648 h( j, k+3 ) = h( j, k+3 ) - refsum*t3
649 70 CONTINUE
650*
651* ==== Perform update from left for subsequent
652* . column. ====
653*
654 refsum = h( k+1, k+1 ) + v( 2, m )*h( k+2, k+1 )
655 $ + v( 3, m )*h( k+3, k+1 )
656 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
657 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
658 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
659*
660* ==== The following convergence test requires that
661* . the tradition small-compared-to-nearby-diagonals
662* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
663* . criteria both be satisfied. The latter improves
664* . accuracy in some examples. Falling back on an
665* . alternate convergence criterion when TST1 or TST2
666* . is zero (as done here) is traditional but probably
667* . unnecessary. ====
668*
669 IF( k.LT.ktop)
670 $ cycle
671 IF( h( k+1, k ).NE.zero ) THEN
672 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
673 IF( tst1.EQ.zero ) THEN
674 IF( k.GE.ktop+1 )
675 $ tst1 = tst1 + abs( h( k, k-1 ) )
676 IF( k.GE.ktop+2 )
677 $ tst1 = tst1 + abs( h( k, k-2 ) )
678 IF( k.GE.ktop+3 )
679 $ tst1 = tst1 + abs( h( k, k-3 ) )
680 IF( k.LE.kbot-2 )
681 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
682 IF( k.LE.kbot-3 )
683 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
684 IF( k.LE.kbot-4 )
685 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
686 END IF
687 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
688 $ THEN
689 h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
690 h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
691 h11 = max( abs( h( k+1, k+1 ) ),
692 $ abs( h( k, k )-h( k+1, k+1 ) ) )
693 h22 = min( abs( h( k+1, k+1 ) ),
694 $ abs( h( k, k )-h( k+1, k+1 ) ) )
695 scl = h11 + h12
696 tst2 = h22*( h11 / scl )
697*
698 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
699 $ max( smlnum, ulp*tst2 ) ) THEN
700 h( k+1, k ) = zero
701 END IF
702 END IF
703 END IF
704 80 CONTINUE
705*
706* ==== Multiply H by reflections from the left ====
707*
708 IF( accum ) THEN
709 jbot = min( ndcol, kbot )
710 ELSE IF( wantt ) THEN
711 jbot = n
712 ELSE
713 jbot = kbot
714 END IF
715*
716 DO 100 m = mbot, mtop, -1
717 k = krcol + 2*( m-1 )
718 t1 = v( 1, m )
719 t2 = t1*v( 2, m )
720 t3 = t1*v( 3, m )
721 DO 90 j = max( ktop, krcol + 2*m ), jbot
722 refsum = h( k+1, j ) + v( 2, m )*h( k+2, j )
723 $ + v( 3, m )*h( k+3, j )
724 h( k+1, j ) = h( k+1, j ) - refsum*t1
725 h( k+2, j ) = h( k+2, j ) - refsum*t2
726 h( k+3, j ) = h( k+3, j ) - refsum*t3
727 90 CONTINUE
728 100 CONTINUE
729*
730* ==== Accumulate orthogonal transformations. ====
731*
732 IF( accum ) THEN
733*
734* ==== Accumulate U. (If needed, update Z later
735* . with an efficient matrix-matrix
736* . multiply.) ====
737*
738 DO 120 m = mbot, mtop, -1
739 k = krcol + 2*( m-1 )
740 kms = k - incol
741 i2 = max( 1, ktop-incol )
742 i2 = max( i2, kms-(krcol-incol)+1 )
743 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
744 t1 = v( 1, m )
745 t2 = t1*v( 2, m )
746 t3 = t1*v( 3, m )
747 DO 110 j = i2, i4
748 refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
749 $ + v( 3, m )*u( j, kms+3 )
750 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
751 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
752 u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
753 110 CONTINUE
754 120 CONTINUE
755 ELSE IF( wantz ) THEN
756*
757* ==== U is not accumulated, so update Z
758* . now by multiplying by reflections
759* . from the right. ====
760*
761 DO 140 m = mbot, mtop, -1
762 k = krcol + 2*( m-1 )
763 t1 = v( 1, m )
764 t2 = t1*v( 2, m )
765 t3 = t1*v( 3, m )
766 DO 130 j = iloz, ihiz
767 refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
768 $ + v( 3, m )*z( j, k+3 )
769 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
770 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
771 z( j, k+3 ) = z( j, k+3 ) - refsum*t3
772 130 CONTINUE
773 140 CONTINUE
774 END IF
775*
776* ==== End of near-the-diagonal bulge chase. ====
777*
778 145 CONTINUE
779*
780* ==== Use U (if accumulated) to update far-from-diagonal
781* . entries in H. If required, use U to update Z as
782* . well. ====
783*
784 IF( accum ) THEN
785 IF( wantt ) THEN
786 jtop = 1
787 jbot = n
788 ELSE
789 jtop = ktop
790 jbot = kbot
791 END IF
792 k1 = max( 1, ktop-incol )
793 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
794*
795* ==== Horizontal Multiply ====
796*
797 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
798 jlen = min( nh, jbot-jcol+1 )
799 CALL dgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
800 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
801 $ ldwh )
802 CALL dlacpy( 'ALL', nu, jlen, wh, ldwh,
803 $ h( incol+k1, jcol ), ldh )
804 150 CONTINUE
805*
806* ==== Vertical multiply ====
807*
808 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
809 jlen = min( nv, max( ktop, incol )-jrow )
810 CALL dgemm( 'N', 'N', jlen, nu, nu, one,
811 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
812 $ ldu, zero, wv, ldwv )
813 CALL dlacpy( 'ALL', jlen, nu, wv, ldwv,
814 $ h( jrow, incol+k1 ), ldh )
815 160 CONTINUE
816*
817* ==== Z multiply (also vertical) ====
818*
819 IF( wantz ) THEN
820 DO 170 jrow = iloz, ihiz, nv
821 jlen = min( nv, ihiz-jrow+1 )
822 CALL dgemm( 'N', 'N', jlen, nu, nu, one,
823 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
824 $ ldu, zero, wv, ldwv )
825 CALL dlacpy( 'ALL', jlen, nu, wv, ldwv,
826 $ z( jrow, incol+k1 ), ldz )
827 170 CONTINUE
828 END IF
829 END IF
830 180 CONTINUE
831*
832* ==== End of DLAQR5 ====
833*
834 END
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:188
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:101
subroutine dlaqr1(n, h, ldh, sr1, si1, sr2, si2, v)
DLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition dlaqr1.f:119
subroutine dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h, ldh, iloz, ihiz, z, ldz, v, ldv, u, ldu, nv, wv, ldwv, nh, wh, ldwh)
DLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition dlaqr5.f:263
subroutine dlarfg(n, alpha, x, incx, tau)
DLARFG generates an elementary reflector (Householder matrix).
Definition dlarfg.f:104
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:108
subroutine dtrmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
DTRMM
Definition dtrmm.f:177