LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
slaqr5.f
Go to the documentation of this file.
1*> \brief \b SLAQR5 performs a single small-bulge multi-shift QR sweep.
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SLAQR5 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slaqr5.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slaqr5.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slaqr5.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
22* SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
23* LDU, NV, WV, LDWV, NH, WH, LDWH )
24*
25* .. Scalar Arguments ..
26* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28* LOGICAL WANTT, WANTZ
29* ..
30* .. Array Arguments ..
31* REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
32* $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
33* $ Z( LDZ, * )
34* ..
35*
36*
37*> \par Purpose:
38* =============
39*>
40*> \verbatim
41*>
42*> SLAQR5, called by SLAQR0, performs a
43*> single small-bulge multi-shift QR sweep.
44*> \endverbatim
45*
46* Arguments:
47* ==========
48*
49*> \param[in] WANTT
50*> \verbatim
51*> WANTT is LOGICAL
52*> WANTT = .true. if the quasi-triangular Schur factor
53*> is being computed. WANTT is set to .false. otherwise.
54*> \endverbatim
55*>
56*> \param[in] WANTZ
57*> \verbatim
58*> WANTZ is LOGICAL
59*> WANTZ = .true. if the orthogonal Schur factor is being
60*> computed. WANTZ is set to .false. otherwise.
61*> \endverbatim
62*>
63*> \param[in] KACC22
64*> \verbatim
65*> KACC22 is INTEGER with value 0, 1, or 2.
66*> Specifies the computation mode of far-from-diagonal
67*> orthogonal updates.
68*> = 0: SLAQR5 does not accumulate reflections and does not
69*> use matrix-matrix multiply to update far-from-diagonal
70*> matrix entries.
71*> = 1: SLAQR5 accumulates reflections and uses matrix-matrix
72*> multiply to update the far-from-diagonal matrix entries.
73*> = 2: Same as KACC22 = 1. This option used to enable exploiting
74*> the 2-by-2 structure during matrix multiplications, but
75*> this is no longer supported.
76*> \endverbatim
77*>
78*> \param[in] N
79*> \verbatim
80*> N is INTEGER
81*> N is the order of the Hessenberg matrix H upon which this
82*> subroutine operates.
83*> \endverbatim
84*>
85*> \param[in] KTOP
86*> \verbatim
87*> KTOP is INTEGER
88*> \endverbatim
89*>
90*> \param[in] KBOT
91*> \verbatim
92*> KBOT is INTEGER
93*> These are the first and last rows and columns of an
94*> isolated diagonal block upon which the QR sweep is to be
95*> applied. It is assumed without a check that
96*> either KTOP = 1 or H(KTOP,KTOP-1) = 0
97*> and
98*> either KBOT = N or H(KBOT+1,KBOT) = 0.
99*> \endverbatim
100*>
101*> \param[in] NSHFTS
102*> \verbatim
103*> NSHFTS is INTEGER
104*> NSHFTS gives the number of simultaneous shifts. NSHFTS
105*> must be positive and even.
106*> \endverbatim
107*>
108*> \param[in,out] SR
109*> \verbatim
110*> SR is REAL array, dimension (NSHFTS)
111*> \endverbatim
112*>
113*> \param[in,out] SI
114*> \verbatim
115*> SI is REAL array, dimension (NSHFTS)
116*> SR contains the real parts and SI contains the imaginary
117*> parts of the NSHFTS shifts of origin that define the
118*> multi-shift QR sweep. On output SR and SI may be
119*> reordered.
120*> \endverbatim
121*>
122*> \param[in,out] H
123*> \verbatim
124*> H is REAL array, dimension (LDH,N)
125*> On input H contains a Hessenberg matrix. On output a
126*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
127*> to the isolated diagonal block in rows and columns KTOP
128*> through KBOT.
129*> \endverbatim
130*>
131*> \param[in] LDH
132*> \verbatim
133*> LDH is INTEGER
134*> LDH is the leading dimension of H just as declared in the
135*> calling procedure. LDH >= MAX(1,N).
136*> \endverbatim
137*>
138*> \param[in] ILOZ
139*> \verbatim
140*> ILOZ is INTEGER
141*> \endverbatim
142*>
143*> \param[in] IHIZ
144*> \verbatim
145*> IHIZ is INTEGER
146*> Specify the rows of Z to which transformations must be
147*> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
148*> \endverbatim
149*>
150*> \param[in,out] Z
151*> \verbatim
152*> Z is REAL array, dimension (LDZ,IHIZ)
153*> If WANTZ = .TRUE., then the QR Sweep orthogonal
154*> similarity transformation is accumulated into
155*> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
156*> If WANTZ = .FALSE., then Z is unreferenced.
157*> \endverbatim
158*>
159*> \param[in] LDZ
160*> \verbatim
161*> LDZ is INTEGER
162*> LDA is the leading dimension of Z just as declared in
163*> the calling procedure. LDZ >= N.
164*> \endverbatim
165*>
166*> \param[out] V
167*> \verbatim
168*> V is REAL array, dimension (LDV,NSHFTS/2)
169*> \endverbatim
170*>
171*> \param[in] LDV
172*> \verbatim
173*> LDV is INTEGER
174*> LDV is the leading dimension of V as declared in the
175*> calling procedure. LDV >= 3.
176*> \endverbatim
177*>
178*> \param[out] U
179*> \verbatim
180*> U is REAL array, dimension (LDU,2*NSHFTS)
181*> \endverbatim
182*>
183*> \param[in] LDU
184*> \verbatim
185*> LDU is INTEGER
186*> LDU is the leading dimension of U just as declared in the
187*> in the calling subroutine. LDU >= 2*NSHFTS.
188*> \endverbatim
189*>
190*> \param[in] NV
191*> \verbatim
192*> NV is INTEGER
193*> NV is the number of rows in WV agailable for workspace.
194*> NV >= 1.
195*> \endverbatim
196*>
197*> \param[out] WV
198*> \verbatim
199*> WV is REAL array, dimension (LDWV,2*NSHFTS)
200*> \endverbatim
201*>
202*> \param[in] LDWV
203*> \verbatim
204*> LDWV is INTEGER
205*> LDWV is the leading dimension of WV as declared in the
206*> in the calling subroutine. LDWV >= NV.
207*> \endverbatim
208*
209*> \param[in] NH
210*> \verbatim
211*> NH is INTEGER
212*> NH is the number of columns in array WH available for
213*> workspace. NH >= 1.
214*> \endverbatim
215*>
216*> \param[out] WH
217*> \verbatim
218*> WH is REAL array, dimension (LDWH,NH)
219*> \endverbatim
220*>
221*> \param[in] LDWH
222*> \verbatim
223*> LDWH is INTEGER
224*> Leading dimension of WH just as declared in the
225*> calling procedure. LDWH >= 2*NSHFTS.
226*> \endverbatim
227*>
228* Authors:
229* ========
230*
231*> \author Univ. of Tennessee
232*> \author Univ. of California Berkeley
233*> \author Univ. of Colorado Denver
234*> \author NAG Ltd.
235*
236*> \ingroup laqr5
237*
238*> \par Contributors:
239* ==================
240*>
241*> Karen Braman and Ralph Byers, Department of Mathematics,
242*> University of Kansas, USA
243*>
244*> Lars Karlsson, Daniel Kressner, and Bruno Lang
245*>
246*> Thijs Steel, Department of Computer science,
247*> KU Leuven, Belgium
248*
249*> \par References:
250* ================
251*>
252*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
253*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
254*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
255*> 929--947, 2002.
256*>
257*> Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
258*> chains of bulges in multishift QR algorithms.
259*> ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
260*>
261* =====================================================================
262 SUBROUTINE slaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
263 $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
264 $ LDU, NV, WV, LDWV, NH, WH, LDWH )
265 IMPLICIT NONE
266*
267* -- LAPACK auxiliary routine --
268* -- LAPACK is a software package provided by Univ. of Tennessee, --
269* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
270*
271* .. Scalar Arguments ..
272 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
273 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
274 LOGICAL WANTT, WANTZ
275* ..
276* .. Array Arguments ..
277 REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
278 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
279 $ z( ldz, * )
280* ..
281*
282* ================================================================
283* .. Parameters ..
284 REAL ZERO, ONE
285 PARAMETER ( ZERO = 0.0e0, one = 1.0e0 )
286* ..
287* .. Local Scalars ..
288 REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
289 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, T1, T2,
290 $ t3, tst1, tst2, ulp
291 INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
292 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
293 $ m, m22, mbot, mtop, nbmps, ndcol,
294 $ ns, nu
295 LOGICAL ACCUM, BMP22
296* ..
297* .. External Functions ..
298 REAL SLAMCH
299 EXTERNAL SLAMCH
300* ..
301* .. Intrinsic Functions ..
302*
303 INTRINSIC abs, max, min, mod, real
304* ..
305* .. Local Arrays ..
306 REAL VT( 3 )
307* ..
308* .. External Subroutines ..
309 EXTERNAL sgemm, slacpy, slaqr1, slarfg, slaset, strmm
310* ..
311* .. Executable Statements ..
312*
313* ==== If there are no shifts, then there is nothing to do. ====
314*
315 IF( nshfts.LT.2 )
316 $ RETURN
317*
318* ==== If the active block is empty or 1-by-1, then there
319* . is nothing to do. ====
320*
321 IF( ktop.GE.kbot )
322 $ RETURN
323*
324* ==== Shuffle shifts into pairs of real shifts and pairs
325* . of complex conjugate shifts assuming complex
326* . conjugate shifts are already adjacent to one
327* . another. ====
328*
329 DO 10 i = 1, nshfts - 2, 2
330 IF( si( i ).NE.-si( i+1 ) ) THEN
331*
332 swap = sr( i )
333 sr( i ) = sr( i+1 )
334 sr( i+1 ) = sr( i+2 )
335 sr( i+2 ) = swap
336*
337 swap = si( i )
338 si( i ) = si( i+1 )
339 si( i+1 ) = si( i+2 )
340 si( i+2 ) = swap
341 END IF
342 10 CONTINUE
343*
344* ==== NSHFTS is supposed to be even, but if it is odd,
345* . then simply reduce it by one. The shuffle above
346* . ensures that the dropped shift is real and that
347* . the remaining shifts are paired. ====
348*
349 ns = nshfts - mod( nshfts, 2 )
350*
351* ==== Machine constants for deflation ====
352*
353 safmin = slamch( 'SAFE MINIMUM' )
354 safmax = one / safmin
355 ulp = slamch( 'PRECISION' )
356 smlnum = safmin*( real( n ) / ulp )
357*
358* ==== Use accumulated reflections to update far-from-diagonal
359* . entries ? ====
360*
361 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
362*
363* ==== clear trash ====
364*
365 IF( ktop+2.LE.kbot )
366 $ h( ktop+2, ktop ) = zero
367*
368* ==== NBMPS = number of 2-shift bulges in the chain ====
369*
370 nbmps = ns / 2
371*
372* ==== KDU = width of slab ====
373*
374 kdu = 4*nbmps
375*
376* ==== Create and chase chains of NBMPS bulges ====
377*
378 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
379*
380* JTOP = Index from which updates from the right start.
381*
382 IF( accum ) THEN
383 jtop = max( ktop, incol )
384 ELSE IF( wantt ) THEN
385 jtop = 1
386 ELSE
387 jtop = ktop
388 END IF
389*
390 ndcol = incol + kdu
391 IF( accum )
392 $ CALL slaset( 'ALL', kdu, kdu, zero, one, u, ldu )
393*
394* ==== Near-the-diagonal bulge chase. The following loop
395* . performs the near-the-diagonal part of a small bulge
396* . multi-shift QR sweep. Each 4*NBMPS column diagonal
397* . chunk extends from column INCOL to column NDCOL
398* . (including both column INCOL and column NDCOL). The
399* . following loop chases a 2*NBMPS+1 column long chain of
400* . NBMPS bulges 2*NBMPS-1 columns to the right. (INCOL
401* . may be less than KTOP and and NDCOL may be greater than
402* . KBOT indicating phantom columns from which to chase
403* . bulges before they are actually introduced or to which
404* . to chase bulges beyond column KBOT.) ====
405*
406 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
407*
408* ==== Bulges number MTOP to MBOT are active double implicit
409* . shift bulges. There may or may not also be small
410* . 2-by-2 bulge, if there is room. The inactive bulges
411* . (if any) must wait until the active bulges have moved
412* . down the diagonal to make room. The phantom matrix
413* . paradigm described above helps keep track. ====
414*
415 mtop = max( 1, ( ktop-krcol ) / 2+1 )
416 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
417 m22 = mbot + 1
418 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
419 $ ( kbot-2 )
420*
421* ==== Generate reflections to chase the chain right
422* . one column. (The minimum value of K is KTOP-1.) ====
423*
424 IF ( bmp22 ) THEN
425*
426* ==== Special case: 2-by-2 reflection at bottom treated
427* . separately ====
428*
429 k = krcol + 2*( m22-1 )
430 IF( k.EQ.ktop-1 ) THEN
431 CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
432 $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
433 $ v( 1, m22 ) )
434 beta = v( 1, m22 )
435 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
436 ELSE
437 beta = h( k+1, k )
438 v( 2, m22 ) = h( k+2, k )
439 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
440 h( k+1, k ) = beta
441 h( k+2, k ) = zero
442 END IF
443
444*
445* ==== Perform update from right within
446* . computational window. ====
447*
448 t1 = v( 1, m22 )
449 t2 = t1*v( 2, m22 )
450 DO 30 j = jtop, min( kbot, k+3 )
451 refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
452 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
453 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
454 30 CONTINUE
455*
456* ==== Perform update from left within
457* . computational window. ====
458*
459 IF( accum ) THEN
460 jbot = min( ndcol, kbot )
461 ELSE IF( wantt ) THEN
462 jbot = n
463 ELSE
464 jbot = kbot
465 END IF
466 t1 = v( 1, m22 )
467 t2 = t1*v( 2, m22 )
468 DO 40 j = k+1, jbot
469 refsum = h( k+1, j ) + v( 2, m22 )*h( k+2, j )
470 h( k+1, j ) = h( k+1, j ) - refsum*t1
471 h( k+2, j ) = h( k+2, j ) - refsum*t2
472 40 CONTINUE
473*
474* ==== The following convergence test requires that
475* . the tradition small-compared-to-nearby-diagonals
476* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
477* . criteria both be satisfied. The latter improves
478* . accuracy in some examples. Falling back on an
479* . alternate convergence criterion when TST1 or TST2
480* . is zero (as done here) is traditional but probably
481* . unnecessary. ====
482*
483 IF( k.GE.ktop ) THEN
484 IF( h( k+1, k ).NE.zero ) THEN
485 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
486 IF( tst1.EQ.zero ) THEN
487 IF( k.GE.ktop+1 )
488 $ tst1 = tst1 + abs( h( k, k-1 ) )
489 IF( k.GE.ktop+2 )
490 $ tst1 = tst1 + abs( h( k, k-2 ) )
491 IF( k.GE.ktop+3 )
492 $ tst1 = tst1 + abs( h( k, k-3 ) )
493 IF( k.LE.kbot-2 )
494 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
495 IF( k.LE.kbot-3 )
496 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
497 IF( k.LE.kbot-4 )
498 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
499 END IF
500 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
501 $ THEN
502 h12 = max( abs( h( k+1, k ) ),
503 $ abs( h( k, k+1 ) ) )
504 h21 = min( abs( h( k+1, k ) ),
505 $ abs( h( k, k+1 ) ) )
506 h11 = max( abs( h( k+1, k+1 ) ),
507 $ abs( h( k, k )-h( k+1, k+1 ) ) )
508 h22 = min( abs( h( k+1, k+1 ) ),
509 $ abs( h( k, k )-h( k+1, k+1 ) ) )
510 scl = h11 + h12
511 tst2 = h22*( h11 / scl )
512*
513 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
514 $ max( smlnum, ulp*tst2 ) ) THEN
515 h( k+1, k ) = zero
516 END IF
517 END IF
518 END IF
519 END IF
520*
521* ==== Accumulate orthogonal transformations. ====
522*
523 IF( accum ) THEN
524 kms = k - incol
525 t1 = v( 1, m22 )
526 t2 = t1*v( 2, m22 )
527 DO 50 j = max( 1, ktop-incol ), kdu
528 refsum = u( j, kms+1 ) + v( 2, m22 )*u( j, kms+2 )
529 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
530 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
531 50 CONTINUE
532 ELSE IF( wantz ) THEN
533 t1 = v( 1, m22 )
534 t2 = t1*v( 2, m22 )
535 DO 60 j = iloz, ihiz
536 refsum = z( j, k+1 )+v( 2, m22 )*z( j, k+2 )
537 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
538 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
539 60 CONTINUE
540 END IF
541 END IF
542*
543* ==== Normal case: Chain of 3-by-3 reflections ====
544*
545 DO 80 m = mbot, mtop, -1
546 k = krcol + 2*( m-1 )
547 IF( k.EQ.ktop-1 ) THEN
548 CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
549 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
550 $ v( 1, m ) )
551 alpha = v( 1, m )
552 CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
553 ELSE
554*
555* ==== Perform delayed transformation of row below
556* . Mth bulge. Exploit fact that first two elements
557* . of row are actually zero. ====
558*
559 t1 = v( 1, m )
560 t2 = t1*v( 2, m )
561 t3 = t1*v( 3, m )
562 refsum = v( 3, m )*h( k+3, k+2 )
563 h( k+3, k ) = -refsum*t1
564 h( k+3, k+1 ) = -refsum*t2
565 h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*t3
566*
567* ==== Calculate reflection to move
568* . Mth bulge one step. ====
569*
570 beta = h( k+1, k )
571 v( 2, m ) = h( k+2, k )
572 v( 3, m ) = h( k+3, k )
573 CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
574*
575* ==== A Bulge may collapse because of vigilant
576* . deflation or destructive underflow. In the
577* . underflow case, try the two-small-subdiagonals
578* . trick to try to reinflate the bulge. ====
579*
580 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
581 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
582*
583* ==== Typical case: not collapsed (yet). ====
584*
585 h( k+1, k ) = beta
586 h( k+2, k ) = zero
587 h( k+3, k ) = zero
588 ELSE
589*
590* ==== Atypical case: collapsed. Attempt to
591* . reintroduce ignoring H(K+1,K) and H(K+2,K).
592* . If the fill resulting from the new
593* . reflector is too large, then abandon it.
594* . Otherwise, use the new one. ====
595*
596 CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
597 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
598 $ vt )
599 alpha = vt( 1 )
600 CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
601 t1 = vt( 1 )
602 t2 = t1*vt( 2 )
603 t3 = t2*vt( 3 )
604 refsum = h( k+1, k )+vt( 2 )*h( k+2, k )
605*
606 IF( abs( h( k+2, k )-refsum*t2 )+
607 $ abs( refsum*t3 ).GT.ulp*
608 $ ( abs( h( k, k ) )+abs( h( k+1,
609 $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
610*
611* ==== Starting a new bulge here would
612* . create non-negligible fill. Use
613* . the old one with trepidation. ====
614*
615 h( k+1, k ) = beta
616 h( k+2, k ) = zero
617 h( k+3, k ) = zero
618 ELSE
619*
620* ==== Starting a new bulge here would
621* . create only negligible fill.
622* . Replace the old reflector with
623* . the new one. ====
624*
625 h( k+1, k ) = h( k+1, k ) - refsum*t1
626 h( k+2, k ) = zero
627 h( k+3, k ) = zero
628 v( 1, m ) = vt( 1 )
629 v( 2, m ) = vt( 2 )
630 v( 3, m ) = vt( 3 )
631 END IF
632 END IF
633 END IF
634*
635* ==== Apply reflection from the right and
636* . the first column of update from the left.
637* . These updates are required for the vigilant
638* . deflation check. We still delay most of the
639* . updates from the left for efficiency. ====
640*
641 t1 = v( 1, m )
642 t2 = t1*v( 2, m )
643 t3 = t1*v( 3, m )
644 DO 70 j = jtop, min( kbot, k+3 )
645 refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
646 $ + v( 3, m )*h( j, k+3 )
647 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
648 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
649 h( j, k+3 ) = h( j, k+3 ) - refsum*t3
650 70 CONTINUE
651*
652* ==== Perform update from left for subsequent
653* . column. ====
654*
655 refsum = h( k+1, k+1 ) + v( 2, m )*h( k+2, k+1 )
656 $ + v( 3, m )*h( k+3, k+1 )
657 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
658 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
659 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
660*
661* ==== The following convergence test requires that
662* . the tradition small-compared-to-nearby-diagonals
663* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
664* . criteria both be satisfied. The latter improves
665* . accuracy in some examples. Falling back on an
666* . alternate convergence criterion when TST1 or TST2
667* . is zero (as done here) is traditional but probably
668* . unnecessary. ====
669*
670 IF( k.LT.ktop)
671 $ cycle
672 IF( h( k+1, k ).NE.zero ) THEN
673 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
674 IF( tst1.EQ.zero ) THEN
675 IF( k.GE.ktop+1 )
676 $ tst1 = tst1 + abs( h( k, k-1 ) )
677 IF( k.GE.ktop+2 )
678 $ tst1 = tst1 + abs( h( k, k-2 ) )
679 IF( k.GE.ktop+3 )
680 $ tst1 = tst1 + abs( h( k, k-3 ) )
681 IF( k.LE.kbot-2 )
682 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
683 IF( k.LE.kbot-3 )
684 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
685 IF( k.LE.kbot-4 )
686 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
687 END IF
688 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
689 $ THEN
690 h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
691 h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
692 h11 = max( abs( h( k+1, k+1 ) ),
693 $ abs( h( k, k )-h( k+1, k+1 ) ) )
694 h22 = min( abs( h( k+1, k+1 ) ),
695 $ abs( h( k, k )-h( k+1, k+1 ) ) )
696 scl = h11 + h12
697 tst2 = h22*( h11 / scl )
698*
699 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
700 $ max( smlnum, ulp*tst2 ) ) THEN
701 h( k+1, k ) = zero
702 END IF
703 END IF
704 END IF
705 80 CONTINUE
706*
707* ==== Multiply H by reflections from the left ====
708*
709 IF( accum ) THEN
710 jbot = min( ndcol, kbot )
711 ELSE IF( wantt ) THEN
712 jbot = n
713 ELSE
714 jbot = kbot
715 END IF
716*
717 DO 100 m = mbot, mtop, -1
718 k = krcol + 2*( m-1 )
719 t1 = v( 1, m )
720 t2 = t1*v( 2, m )
721 t3 = t1*v( 3, m )
722 DO 90 j = max( ktop, krcol + 2*m ), jbot
723 refsum = h( k+1, j ) + v( 2, m )*h( k+2, j )
724 $ + v( 3, m )*h( k+3, j )
725 h( k+1, j ) = h( k+1, j ) - refsum*t1
726 h( k+2, j ) = h( k+2, j ) - refsum*t2
727 h( k+3, j ) = h( k+3, j ) - refsum*t3
728 90 CONTINUE
729 100 CONTINUE
730*
731* ==== Accumulate orthogonal transformations. ====
732*
733 IF( accum ) THEN
734*
735* ==== Accumulate U. (If needed, update Z later
736* . with an efficient matrix-matrix
737* . multiply.) ====
738*
739 DO 120 m = mbot, mtop, -1
740 k = krcol + 2*( m-1 )
741 kms = k - incol
742 i2 = max( 1, ktop-incol )
743 i2 = max( i2, kms-(krcol-incol)+1 )
744 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
745 t1 = v( 1, m )
746 t2 = t1*v( 2, m )
747 t3 = t1*v( 3, m )
748 DO 110 j = i2, i4
749 refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
750 $ + v( 3, m )*u( j, kms+3 )
751 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
752 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
753 u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
754 110 CONTINUE
755 120 CONTINUE
756 ELSE IF( wantz ) THEN
757*
758* ==== U is not accumulated, so update Z
759* . now by multiplying by reflections
760* . from the right. ====
761*
762 DO 140 m = mbot, mtop, -1
763 k = krcol + 2*( m-1 )
764 t1 = v( 1, m )
765 t2 = t1*v( 2, m )
766 t3 = t1*v( 3, m )
767 DO 130 j = iloz, ihiz
768 refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
769 $ + v( 3, m )*z( j, k+3 )
770 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
771 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
772 z( j, k+3 ) = z( j, k+3 ) - refsum*t3
773 130 CONTINUE
774 140 CONTINUE
775 END IF
776*
777* ==== End of near-the-diagonal bulge chase. ====
778*
779 145 CONTINUE
780*
781* ==== Use U (if accumulated) to update far-from-diagonal
782* . entries in H. If required, use U to update Z as
783* . well. ====
784*
785 IF( accum ) THEN
786 IF( wantt ) THEN
787 jtop = 1
788 jbot = n
789 ELSE
790 jtop = ktop
791 jbot = kbot
792 END IF
793 k1 = max( 1, ktop-incol )
794 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
795*
796* ==== Horizontal Multiply ====
797*
798 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
799 jlen = min( nh, jbot-jcol+1 )
800 CALL sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
801 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
802 $ ldwh )
803 CALL slacpy( 'ALL', nu, jlen, wh, ldwh,
804 $ h( incol+k1, jcol ), ldh )
805 150 CONTINUE
806*
807* ==== Vertical multiply ====
808*
809 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
810 jlen = min( nv, max( ktop, incol )-jrow )
811 CALL sgemm( 'N', 'N', jlen, nu, nu, one,
812 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
813 $ ldu, zero, wv, ldwv )
814 CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
815 $ h( jrow, incol+k1 ), ldh )
816 160 CONTINUE
817*
818* ==== Z multiply (also vertical) ====
819*
820 IF( wantz ) THEN
821 DO 170 jrow = iloz, ihiz, nv
822 jlen = min( nv, ihiz-jrow+1 )
823 CALL sgemm( 'N', 'N', jlen, nu, nu, one,
824 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
825 $ ldu, zero, wv, ldwv )
826 CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
827 $ z( jrow, incol+k1 ), ldz )
828 170 CONTINUE
829 END IF
830 END IF
831 180 CONTINUE
832*
833* ==== End of SLAQR5 ====
834*
835 END
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:188
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
Definition slacpy.f:103
subroutine slaqr1(n, h, ldh, sr1, si1, sr2, si2, v)
SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition slaqr1.f:121
subroutine slaqr5(wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h, ldh, iloz, ihiz, z, ldz, v, ldv, u, ldu, nv, wv, ldwv, nh, wh, ldwh)
SLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition slaqr5.f:265
subroutine slarfg(n, alpha, x, incx, tau)
SLARFG generates an elementary reflector (Householder matrix).
Definition slarfg.f:106
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:110
subroutine strmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
STRMM
Definition strmm.f:177