LAPACK 3.11.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
slaqr5.f
Go to the documentation of this file.
1*> \brief \b SLAQR5 performs a single small-bulge multi-shift QR sweep.
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SLAQR5 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slaqr5.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slaqr5.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slaqr5.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
22* SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
23* LDU, NV, WV, LDWV, NH, WH, LDWH )
24*
25* .. Scalar Arguments ..
26* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28* LOGICAL WANTT, WANTZ
29* ..
30* .. Array Arguments ..
31* REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
32* $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
33* $ Z( LDZ, * )
34* ..
35*
36*
37*> \par Purpose:
38* =============
39*>
40*> \verbatim
41*>
42*> SLAQR5, called by SLAQR0, performs a
43*> single small-bulge multi-shift QR sweep.
44*> \endverbatim
45*
46* Arguments:
47* ==========
48*
49*> \param[in] WANTT
50*> \verbatim
51*> WANTT is LOGICAL
52*> WANTT = .true. if the quasi-triangular Schur factor
53*> is being computed. WANTT is set to .false. otherwise.
54*> \endverbatim
55*>
56*> \param[in] WANTZ
57*> \verbatim
58*> WANTZ is LOGICAL
59*> WANTZ = .true. if the orthogonal Schur factor is being
60*> computed. WANTZ is set to .false. otherwise.
61*> \endverbatim
62*>
63*> \param[in] KACC22
64*> \verbatim
65*> KACC22 is INTEGER with value 0, 1, or 2.
66*> Specifies the computation mode of far-from-diagonal
67*> orthogonal updates.
68*> = 0: SLAQR5 does not accumulate reflections and does not
69*> use matrix-matrix multiply to update far-from-diagonal
70*> matrix entries.
71*> = 1: SLAQR5 accumulates reflections and uses matrix-matrix
72*> multiply to update the far-from-diagonal matrix entries.
73*> = 2: Same as KACC22 = 1. This option used to enable exploiting
74*> the 2-by-2 structure during matrix multiplications, but
75*> this is no longer supported.
76*> \endverbatim
77*>
78*> \param[in] N
79*> \verbatim
80*> N is INTEGER
81*> N is the order of the Hessenberg matrix H upon which this
82*> subroutine operates.
83*> \endverbatim
84*>
85*> \param[in] KTOP
86*> \verbatim
87*> KTOP is INTEGER
88*> \endverbatim
89*>
90*> \param[in] KBOT
91*> \verbatim
92*> KBOT is INTEGER
93*> These are the first and last rows and columns of an
94*> isolated diagonal block upon which the QR sweep is to be
95*> applied. It is assumed without a check that
96*> either KTOP = 1 or H(KTOP,KTOP-1) = 0
97*> and
98*> either KBOT = N or H(KBOT+1,KBOT) = 0.
99*> \endverbatim
100*>
101*> \param[in] NSHFTS
102*> \verbatim
103*> NSHFTS is INTEGER
104*> NSHFTS gives the number of simultaneous shifts. NSHFTS
105*> must be positive and even.
106*> \endverbatim
107*>
108*> \param[in,out] SR
109*> \verbatim
110*> SR is REAL array, dimension (NSHFTS)
111*> \endverbatim
112*>
113*> \param[in,out] SI
114*> \verbatim
115*> SI is REAL array, dimension (NSHFTS)
116*> SR contains the real parts and SI contains the imaginary
117*> parts of the NSHFTS shifts of origin that define the
118*> multi-shift QR sweep. On output SR and SI may be
119*> reordered.
120*> \endverbatim
121*>
122*> \param[in,out] H
123*> \verbatim
124*> H is REAL array, dimension (LDH,N)
125*> On input H contains a Hessenberg matrix. On output a
126*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
127*> to the isolated diagonal block in rows and columns KTOP
128*> through KBOT.
129*> \endverbatim
130*>
131*> \param[in] LDH
132*> \verbatim
133*> LDH is INTEGER
134*> LDH is the leading dimension of H just as declared in the
135*> calling procedure. LDH >= MAX(1,N).
136*> \endverbatim
137*>
138*> \param[in] ILOZ
139*> \verbatim
140*> ILOZ is INTEGER
141*> \endverbatim
142*>
143*> \param[in] IHIZ
144*> \verbatim
145*> IHIZ is INTEGER
146*> Specify the rows of Z to which transformations must be
147*> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
148*> \endverbatim
149*>
150*> \param[in,out] Z
151*> \verbatim
152*> Z is REAL array, dimension (LDZ,IHIZ)
153*> If WANTZ = .TRUE., then the QR Sweep orthogonal
154*> similarity transformation is accumulated into
155*> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
156*> If WANTZ = .FALSE., then Z is unreferenced.
157*> \endverbatim
158*>
159*> \param[in] LDZ
160*> \verbatim
161*> LDZ is INTEGER
162*> LDA is the leading dimension of Z just as declared in
163*> the calling procedure. LDZ >= N.
164*> \endverbatim
165*>
166*> \param[out] V
167*> \verbatim
168*> V is REAL array, dimension (LDV,NSHFTS/2)
169*> \endverbatim
170*>
171*> \param[in] LDV
172*> \verbatim
173*> LDV is INTEGER
174*> LDV is the leading dimension of V as declared in the
175*> calling procedure. LDV >= 3.
176*> \endverbatim
177*>
178*> \param[out] U
179*> \verbatim
180*> U is REAL array, dimension (LDU,2*NSHFTS)
181*> \endverbatim
182*>
183*> \param[in] LDU
184*> \verbatim
185*> LDU is INTEGER
186*> LDU is the leading dimension of U just as declared in the
187*> in the calling subroutine. LDU >= 2*NSHFTS.
188*> \endverbatim
189*>
190*> \param[in] NV
191*> \verbatim
192*> NV is INTEGER
193*> NV is the number of rows in WV agailable for workspace.
194*> NV >= 1.
195*> \endverbatim
196*>
197*> \param[out] WV
198*> \verbatim
199*> WV is REAL array, dimension (LDWV,2*NSHFTS)
200*> \endverbatim
201*>
202*> \param[in] LDWV
203*> \verbatim
204*> LDWV is INTEGER
205*> LDWV is the leading dimension of WV as declared in the
206*> in the calling subroutine. LDWV >= NV.
207*> \endverbatim
208*
209*> \param[in] NH
210*> \verbatim
211*> NH is INTEGER
212*> NH is the number of columns in array WH available for
213*> workspace. NH >= 1.
214*> \endverbatim
215*>
216*> \param[out] WH
217*> \verbatim
218*> WH is REAL array, dimension (LDWH,NH)
219*> \endverbatim
220*>
221*> \param[in] LDWH
222*> \verbatim
223*> LDWH is INTEGER
224*> Leading dimension of WH just as declared in the
225*> calling procedure. LDWH >= 2*NSHFTS.
226*> \endverbatim
227*>
228* Authors:
229* ========
230*
231*> \author Univ. of Tennessee
232*> \author Univ. of California Berkeley
233*> \author Univ. of Colorado Denver
234*> \author NAG Ltd.
235*
236*> \ingroup realOTHERauxiliary
237*
238*> \par Contributors:
239* ==================
240*>
241*> Karen Braman and Ralph Byers, Department of Mathematics,
242*> University of Kansas, USA
243*>
244*> Lars Karlsson, Daniel Kressner, and Bruno Lang
245*>
246*> Thijs Steel, Department of Computer science,
247*> KU Leuven, Belgium
248*
249*> \par References:
250* ================
251*>
252*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
253*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
254*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
255*> 929--947, 2002.
256*>
257*> Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
258*> chains of bulges in multishift QR algorithms.
259*> ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
260*>
261* =====================================================================
262 SUBROUTINE slaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
263 $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
264 $ LDU, NV, WV, LDWV, NH, WH, LDWH )
265 IMPLICIT NONE
266*
267* -- LAPACK auxiliary routine --
268* -- LAPACK is a software package provided by Univ. of Tennessee, --
269* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
270*
271* .. Scalar Arguments ..
272 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
273 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
274 LOGICAL WANTT, WANTZ
275* ..
276* .. Array Arguments ..
277 REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
278 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
279 $ z( ldz, * )
280* ..
281*
282* ================================================================
283* .. Parameters ..
284 REAL ZERO, ONE
285 PARAMETER ( ZERO = 0.0e0, one = 1.0e0 )
286* ..
287* .. Local Scalars ..
288 REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
289 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, T1, T2,
290 $ t3, tst1, tst2, ulp
291 INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
292 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
293 $ m, m22, mbot, mtop, nbmps, ndcol,
294 $ ns, nu
295 LOGICAL ACCUM, BMP22
296* ..
297* .. External Functions ..
298 REAL SLAMCH
299 EXTERNAL SLAMCH
300* ..
301* .. Intrinsic Functions ..
302*
303 INTRINSIC abs, max, min, mod, real
304* ..
305* .. Local Arrays ..
306 REAL VT( 3 )
307* ..
308* .. External Subroutines ..
309 EXTERNAL sgemm, slabad, slacpy, slaqr1, slarfg, slaset,
310 $ strmm
311* ..
312* .. Executable Statements ..
313*
314* ==== If there are no shifts, then there is nothing to do. ====
315*
316 IF( nshfts.LT.2 )
317 $ RETURN
318*
319* ==== If the active block is empty or 1-by-1, then there
320* . is nothing to do. ====
321*
322 IF( ktop.GE.kbot )
323 $ RETURN
324*
325* ==== Shuffle shifts into pairs of real shifts and pairs
326* . of complex conjugate shifts assuming complex
327* . conjugate shifts are already adjacent to one
328* . another. ====
329*
330 DO 10 i = 1, nshfts - 2, 2
331 IF( si( i ).NE.-si( i+1 ) ) THEN
332*
333 swap = sr( i )
334 sr( i ) = sr( i+1 )
335 sr( i+1 ) = sr( i+2 )
336 sr( i+2 ) = swap
337*
338 swap = si( i )
339 si( i ) = si( i+1 )
340 si( i+1 ) = si( i+2 )
341 si( i+2 ) = swap
342 END IF
343 10 CONTINUE
344*
345* ==== NSHFTS is supposed to be even, but if it is odd,
346* . then simply reduce it by one. The shuffle above
347* . ensures that the dropped shift is real and that
348* . the remaining shifts are paired. ====
349*
350 ns = nshfts - mod( nshfts, 2 )
351*
352* ==== Machine constants for deflation ====
353*
354 safmin = slamch( 'SAFE MINIMUM' )
355 safmax = one / safmin
356 CALL slabad( safmin, safmax )
357 ulp = slamch( 'PRECISION' )
358 smlnum = safmin*( real( n ) / ulp )
359*
360* ==== Use accumulated reflections to update far-from-diagonal
361* . entries ? ====
362*
363 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
364*
365* ==== clear trash ====
366*
367 IF( ktop+2.LE.kbot )
368 $ h( ktop+2, ktop ) = zero
369*
370* ==== NBMPS = number of 2-shift bulges in the chain ====
371*
372 nbmps = ns / 2
373*
374* ==== KDU = width of slab ====
375*
376 kdu = 4*nbmps
377*
378* ==== Create and chase chains of NBMPS bulges ====
379*
380 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
381*
382* JTOP = Index from which updates from the right start.
383*
384 IF( accum ) THEN
385 jtop = max( ktop, incol )
386 ELSE IF( wantt ) THEN
387 jtop = 1
388 ELSE
389 jtop = ktop
390 END IF
391*
392 ndcol = incol + kdu
393 IF( accum )
394 $ CALL slaset( 'ALL', kdu, kdu, zero, one, u, ldu )
395*
396* ==== Near-the-diagonal bulge chase. The following loop
397* . performs the near-the-diagonal part of a small bulge
398* . multi-shift QR sweep. Each 4*NBMPS column diagonal
399* . chunk extends from column INCOL to column NDCOL
400* . (including both column INCOL and column NDCOL). The
401* . following loop chases a 2*NBMPS+1 column long chain of
402* . NBMPS bulges 2*NBMPS-1 columns to the right. (INCOL
403* . may be less than KTOP and and NDCOL may be greater than
404* . KBOT indicating phantom columns from which to chase
405* . bulges before they are actually introduced or to which
406* . to chase bulges beyond column KBOT.) ====
407*
408 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
409*
410* ==== Bulges number MTOP to MBOT are active double implicit
411* . shift bulges. There may or may not also be small
412* . 2-by-2 bulge, if there is room. The inactive bulges
413* . (if any) must wait until the active bulges have moved
414* . down the diagonal to make room. The phantom matrix
415* . paradigm described above helps keep track. ====
416*
417 mtop = max( 1, ( ktop-krcol ) / 2+1 )
418 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
419 m22 = mbot + 1
420 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
421 $ ( kbot-2 )
422*
423* ==== Generate reflections to chase the chain right
424* . one column. (The minimum value of K is KTOP-1.) ====
425*
426 IF ( bmp22 ) THEN
427*
428* ==== Special case: 2-by-2 reflection at bottom treated
429* . separately ====
430*
431 k = krcol + 2*( m22-1 )
432 IF( k.EQ.ktop-1 ) THEN
433 CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
434 $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
435 $ v( 1, m22 ) )
436 beta = v( 1, m22 )
437 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
438 ELSE
439 beta = h( k+1, k )
440 v( 2, m22 ) = h( k+2, k )
441 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
442 h( k+1, k ) = beta
443 h( k+2, k ) = zero
444 END IF
445
446*
447* ==== Perform update from right within
448* . computational window. ====
449*
450 t1 = v( 1, m22 )
451 t2 = t1*v( 2, m22 )
452 DO 30 j = jtop, min( kbot, k+3 )
453 refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
454 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
455 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
456 30 CONTINUE
457*
458* ==== Perform update from left within
459* . computational window. ====
460*
461 IF( accum ) THEN
462 jbot = min( ndcol, kbot )
463 ELSE IF( wantt ) THEN
464 jbot = n
465 ELSE
466 jbot = kbot
467 END IF
468 t1 = v( 1, m22 )
469 t2 = t1*v( 2, m22 )
470 DO 40 j = k+1, jbot
471 refsum = h( k+1, j ) + v( 2, m22 )*h( k+2, j )
472 h( k+1, j ) = h( k+1, j ) - refsum*t1
473 h( k+2, j ) = h( k+2, j ) - refsum*t2
474 40 CONTINUE
475*
476* ==== The following convergence test requires that
477* . the tradition small-compared-to-nearby-diagonals
478* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
479* . criteria both be satisfied. The latter improves
480* . accuracy in some examples. Falling back on an
481* . alternate convergence criterion when TST1 or TST2
482* . is zero (as done here) is traditional but probably
483* . unnecessary. ====
484*
485 IF( k.GE.ktop ) THEN
486 IF( h( k+1, k ).NE.zero ) THEN
487 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
488 IF( tst1.EQ.zero ) THEN
489 IF( k.GE.ktop+1 )
490 $ tst1 = tst1 + abs( h( k, k-1 ) )
491 IF( k.GE.ktop+2 )
492 $ tst1 = tst1 + abs( h( k, k-2 ) )
493 IF( k.GE.ktop+3 )
494 $ tst1 = tst1 + abs( h( k, k-3 ) )
495 IF( k.LE.kbot-2 )
496 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
497 IF( k.LE.kbot-3 )
498 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
499 IF( k.LE.kbot-4 )
500 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
501 END IF
502 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
503 $ THEN
504 h12 = max( abs( h( k+1, k ) ),
505 $ abs( h( k, k+1 ) ) )
506 h21 = min( abs( h( k+1, k ) ),
507 $ abs( h( k, k+1 ) ) )
508 h11 = max( abs( h( k+1, k+1 ) ),
509 $ abs( h( k, k )-h( k+1, k+1 ) ) )
510 h22 = min( abs( h( k+1, k+1 ) ),
511 $ abs( h( k, k )-h( k+1, k+1 ) ) )
512 scl = h11 + h12
513 tst2 = h22*( h11 / scl )
514*
515 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
516 $ max( smlnum, ulp*tst2 ) ) THEN
517 h( k+1, k ) = zero
518 END IF
519 END IF
520 END IF
521 END IF
522*
523* ==== Accumulate orthogonal transformations. ====
524*
525 IF( accum ) THEN
526 kms = k - incol
527 t1 = v( 1, m22 )
528 t2 = t1*v( 2, m22 )
529 DO 50 j = max( 1, ktop-incol ), kdu
530 refsum = u( j, kms+1 ) + v( 2, m22 )*u( j, kms+2 )
531 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
532 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
533 50 CONTINUE
534 ELSE IF( wantz ) THEN
535 t1 = v( 1, m22 )
536 t2 = t1*v( 2, m22 )
537 DO 60 j = iloz, ihiz
538 refsum = z( j, k+1 )+v( 2, m22 )*z( j, k+2 )
539 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
540 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
541 60 CONTINUE
542 END IF
543 END IF
544*
545* ==== Normal case: Chain of 3-by-3 reflections ====
546*
547 DO 80 m = mbot, mtop, -1
548 k = krcol + 2*( m-1 )
549 IF( k.EQ.ktop-1 ) THEN
550 CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
551 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
552 $ v( 1, m ) )
553 alpha = v( 1, m )
554 CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
555 ELSE
556*
557* ==== Perform delayed transformation of row below
558* . Mth bulge. Exploit fact that first two elements
559* . of row are actually zero. ====
560*
561 refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 )
562 h( k+3, k ) = -refsum
563 h( k+3, k+1 ) = -refsum*v( 2, m )
564 h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3, m )
565*
566* ==== Calculate reflection to move
567* . Mth bulge one step. ====
568*
569 beta = h( k+1, k )
570 v( 2, m ) = h( k+2, k )
571 v( 3, m ) = h( k+3, k )
572 CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
573*
574* ==== A Bulge may collapse because of vigilant
575* . deflation or destructive underflow. In the
576* . underflow case, try the two-small-subdiagonals
577* . trick to try to reinflate the bulge. ====
578*
579 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
580 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
581*
582* ==== Typical case: not collapsed (yet). ====
583*
584 h( k+1, k ) = beta
585 h( k+2, k ) = zero
586 h( k+3, k ) = zero
587 ELSE
588*
589* ==== Atypical case: collapsed. Attempt to
590* . reintroduce ignoring H(K+1,K) and H(K+2,K).
591* . If the fill resulting from the new
592* . reflector is too large, then abandon it.
593* . Otherwise, use the new one. ====
594*
595 CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
596 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
597 $ vt )
598 alpha = vt( 1 )
599 CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
600 refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
601 $ h( k+2, k ) )
602*
603 IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
604 $ abs( refsum*vt( 3 ) ).GT.ulp*
605 $ ( abs( h( k, k ) )+abs( h( k+1,
606 $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
607*
608* ==== Starting a new bulge here would
609* . create non-negligible fill. Use
610* . the old one with trepidation. ====
611*
612 h( k+1, k ) = beta
613 h( k+2, k ) = zero
614 h( k+3, k ) = zero
615 ELSE
616*
617* ==== Starting a new bulge here would
618* . create only negligible fill.
619* . Replace the old reflector with
620* . the new one. ====
621*
622 h( k+1, k ) = h( k+1, k ) - refsum
623 h( k+2, k ) = zero
624 h( k+3, k ) = zero
625 v( 1, m ) = vt( 1 )
626 v( 2, m ) = vt( 2 )
627 v( 3, m ) = vt( 3 )
628 END IF
629 END IF
630 END IF
631*
632* ==== Apply reflection from the right and
633* . the first column of update from the left.
634* . These updates are required for the vigilant
635* . deflation check. We still delay most of the
636* . updates from the left for efficiency. ====
637*
638 t1 = v( 1, m )
639 t2 = t1*v( 2, m )
640 t3 = t1*v( 3, m )
641 DO 70 j = jtop, min( kbot, k+3 )
642 refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
643 $ + v( 3, m )*h( j, k+3 )
644 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
645 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
646 h( j, k+3 ) = h( j, k+3 ) - refsum*t3
647 70 CONTINUE
648*
649* ==== Perform update from left for subsequent
650* . column. ====
651*
652 refsum = h( k+1, k+1 ) + v( 2, m )*h( k+2, k+1 )
653 $ + v( 3, m )*h( k+3, k+1 )
654 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
655 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
656 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
657*
658* ==== The following convergence test requires that
659* . the tradition small-compared-to-nearby-diagonals
660* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
661* . criteria both be satisfied. The latter improves
662* . accuracy in some examples. Falling back on an
663* . alternate convergence criterion when TST1 or TST2
664* . is zero (as done here) is traditional but probably
665* . unnecessary. ====
666*
667 IF( k.LT.ktop)
668 $ cycle
669 IF( h( k+1, k ).NE.zero ) THEN
670 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
671 IF( tst1.EQ.zero ) THEN
672 IF( k.GE.ktop+1 )
673 $ tst1 = tst1 + abs( h( k, k-1 ) )
674 IF( k.GE.ktop+2 )
675 $ tst1 = tst1 + abs( h( k, k-2 ) )
676 IF( k.GE.ktop+3 )
677 $ tst1 = tst1 + abs( h( k, k-3 ) )
678 IF( k.LE.kbot-2 )
679 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
680 IF( k.LE.kbot-3 )
681 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
682 IF( k.LE.kbot-4 )
683 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
684 END IF
685 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
686 $ THEN
687 h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
688 h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
689 h11 = max( abs( h( k+1, k+1 ) ),
690 $ abs( h( k, k )-h( k+1, k+1 ) ) )
691 h22 = min( abs( h( k+1, k+1 ) ),
692 $ abs( h( k, k )-h( k+1, k+1 ) ) )
693 scl = h11 + h12
694 tst2 = h22*( h11 / scl )
695*
696 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
697 $ max( smlnum, ulp*tst2 ) ) THEN
698 h( k+1, k ) = zero
699 END IF
700 END IF
701 END IF
702 80 CONTINUE
703*
704* ==== Multiply H by reflections from the left ====
705*
706 IF( accum ) THEN
707 jbot = min( ndcol, kbot )
708 ELSE IF( wantt ) THEN
709 jbot = n
710 ELSE
711 jbot = kbot
712 END IF
713*
714 DO 100 m = mbot, mtop, -1
715 k = krcol + 2*( m-1 )
716 t1 = v( 1, m )
717 t2 = t1*v( 2, m )
718 t3 = t1*v( 3, m )
719 DO 90 j = max( ktop, krcol + 2*m ), jbot
720 refsum = h( k+1, j ) + v( 2, m )*h( k+2, j )
721 $ + v( 3, m )*h( k+3, j )
722 h( k+1, j ) = h( k+1, j ) - refsum*t1
723 h( k+2, j ) = h( k+2, j ) - refsum*t2
724 h( k+3, j ) = h( k+3, j ) - refsum*t3
725 90 CONTINUE
726 100 CONTINUE
727*
728* ==== Accumulate orthogonal transformations. ====
729*
730 IF( accum ) THEN
731*
732* ==== Accumulate U. (If needed, update Z later
733* . with an efficient matrix-matrix
734* . multiply.) ====
735*
736 DO 120 m = mbot, mtop, -1
737 k = krcol + 2*( m-1 )
738 kms = k - incol
739 i2 = max( 1, ktop-incol )
740 i2 = max( i2, kms-(krcol-incol)+1 )
741 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
742 t1 = v( 1, m )
743 t2 = t1*v( 2, m )
744 t3 = t1*v( 3, m )
745 DO 110 j = i2, i4
746 refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
747 $ + v( 3, m )*u( j, kms+3 )
748 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
749 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
750 u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
751 110 CONTINUE
752 120 CONTINUE
753 ELSE IF( wantz ) THEN
754*
755* ==== U is not accumulated, so update Z
756* . now by multiplying by reflections
757* . from the right. ====
758*
759 DO 140 m = mbot, mtop, -1
760 k = krcol + 2*( m-1 )
761 t1 = v( 1, m )
762 t2 = t1*v( 2, m )
763 t3 = t1*v( 3, m )
764 DO 130 j = iloz, ihiz
765 refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
766 $ + v( 3, m )*z( j, k+3 )
767 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
768 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
769 z( j, k+3 ) = z( j, k+3 ) - refsum*t3
770 130 CONTINUE
771 140 CONTINUE
772 END IF
773*
774* ==== End of near-the-diagonal bulge chase. ====
775*
776 145 CONTINUE
777*
778* ==== Use U (if accumulated) to update far-from-diagonal
779* . entries in H. If required, use U to update Z as
780* . well. ====
781*
782 IF( accum ) THEN
783 IF( wantt ) THEN
784 jtop = 1
785 jbot = n
786 ELSE
787 jtop = ktop
788 jbot = kbot
789 END IF
790 k1 = max( 1, ktop-incol )
791 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
792*
793* ==== Horizontal Multiply ====
794*
795 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
796 jlen = min( nh, jbot-jcol+1 )
797 CALL sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
798 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
799 $ ldwh )
800 CALL slacpy( 'ALL', nu, jlen, wh, ldwh,
801 $ h( incol+k1, jcol ), ldh )
802 150 CONTINUE
803*
804* ==== Vertical multiply ====
805*
806 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
807 jlen = min( nv, max( ktop, incol )-jrow )
808 CALL sgemm( 'N', 'N', jlen, nu, nu, one,
809 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
810 $ ldu, zero, wv, ldwv )
811 CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
812 $ h( jrow, incol+k1 ), ldh )
813 160 CONTINUE
814*
815* ==== Z multiply (also vertical) ====
816*
817 IF( wantz ) THEN
818 DO 170 jrow = iloz, ihiz, nv
819 jlen = min( nv, ihiz-jrow+1 )
820 CALL sgemm( 'N', 'N', jlen, nu, nu, one,
821 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
822 $ ldu, zero, wv, ldwv )
823 CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
824 $ z( jrow, incol+k1 ), ldz )
825 170 CONTINUE
826 END IF
827 END IF
828 180 CONTINUE
829*
830* ==== End of SLAQR5 ====
831*
832 END
subroutine slabad(SMALL, LARGE)
SLABAD
Definition: slabad.f:74
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: slaset.f:110
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:103
subroutine slarfg(N, ALPHA, X, INCX, TAU)
SLARFG generates an elementary reflector (Householder matrix).
Definition: slarfg.f:106
subroutine slaqr1(N, H, LDH, SR1, SI1, SR2, SI2, V)
SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition: slaqr1.f:121
subroutine slaqr5(WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, WV, LDWV, NH, WH, LDWH)
SLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition: slaqr5.f:265
subroutine strmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
STRMM
Definition: strmm.f:177
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:187