LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ slaqr5()

subroutine slaqr5 ( logical wantt,
logical wantz,
integer kacc22,
integer n,
integer ktop,
integer kbot,
integer nshfts,
real, dimension( * ) sr,
real, dimension( * ) si,
real, dimension( ldh, * ) h,
integer ldh,
integer iloz,
integer ihiz,
real, dimension( ldz, * ) z,
integer ldz,
real, dimension( ldv, * ) v,
integer ldv,
real, dimension( ldu, * ) u,
integer ldu,
integer nv,
real, dimension( ldwv, * ) wv,
integer ldwv,
integer nh,
real, dimension( ldwh, * ) wh,
integer ldwh )

SLAQR5 performs a single small-bulge multi-shift QR sweep.

Download SLAQR5 + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!>    SLAQR5, called by SLAQR0, performs a
!>    single small-bulge multi-shift QR sweep.
!> 
Parameters
[in]WANTT
!>          WANTT is LOGICAL
!>             WANTT = .true. if the quasi-triangular Schur factor
!>             is being computed.  WANTT is set to .false. otherwise.
!> 
[in]WANTZ
!>          WANTZ is LOGICAL
!>             WANTZ = .true. if the orthogonal Schur factor is being
!>             computed.  WANTZ is set to .false. otherwise.
!> 
[in]KACC22
!>          KACC22 is INTEGER with value 0, 1, or 2.
!>             Specifies the computation mode of far-from-diagonal
!>             orthogonal updates.
!>        = 0: SLAQR5 does not accumulate reflections and does not
!>             use matrix-matrix multiply to update far-from-diagonal
!>             matrix entries.
!>        = 1: SLAQR5 accumulates reflections and uses matrix-matrix
!>             multiply to update the far-from-diagonal matrix entries.
!>        = 2: Same as KACC22 = 1. This option used to enable exploiting
!>             the 2-by-2 structure during matrix multiplications, but
!>             this is no longer supported.
!> 
[in]N
!>          N is INTEGER
!>             N is the order of the Hessenberg matrix H upon which this
!>             subroutine operates.
!> 
[in]KTOP
!>          KTOP is INTEGER
!> 
[in]KBOT
!>          KBOT is INTEGER
!>             These are the first and last rows and columns of an
!>             isolated diagonal block upon which the QR sweep is to be
!>             applied. It is assumed without a check that
!>                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0
!>             and
!>                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.
!> 
[in]NSHFTS
!>          NSHFTS is INTEGER
!>             NSHFTS gives the number of simultaneous shifts.  NSHFTS
!>             must be positive and even.
!> 
[in,out]SR
!>          SR is REAL array, dimension (NSHFTS)
!> 
[in,out]SI
!>          SI is REAL array, dimension (NSHFTS)
!>             SR contains the real parts and SI contains the imaginary
!>             parts of the NSHFTS shifts of origin that define the
!>             multi-shift QR sweep.  On output SR and SI may be
!>             reordered.
!> 
[in,out]H
!>          H is REAL array, dimension (LDH,N)
!>             On input H contains a Hessenberg matrix.  On output a
!>             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
!>             to the isolated diagonal block in rows and columns KTOP
!>             through KBOT.
!> 
[in]LDH
!>          LDH is INTEGER
!>             LDH is the leading dimension of H just as declared in the
!>             calling procedure.  LDH >= MAX(1,N).
!> 
[in]ILOZ
!>          ILOZ is INTEGER
!> 
[in]IHIZ
!>          IHIZ is INTEGER
!>             Specify the rows of Z to which transformations must be
!>             applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
!> 
[in,out]Z
!>          Z is REAL array, dimension (LDZ,IHIZ)
!>             If WANTZ = .TRUE., then the QR Sweep orthogonal
!>             similarity transformation is accumulated into
!>             Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
!>             If WANTZ = .FALSE., then Z is unreferenced.
!> 
[in]LDZ
!>          LDZ is INTEGER
!>             LDA is the leading dimension of Z just as declared in
!>             the calling procedure. LDZ >= N.
!> 
[out]V
!>          V is REAL array, dimension (LDV,NSHFTS/2)
!> 
[in]LDV
!>          LDV is INTEGER
!>             LDV is the leading dimension of V as declared in the
!>             calling procedure.  LDV >= 3.
!> 
[out]U
!>          U is REAL array, dimension (LDU,2*NSHFTS)
!> 
[in]LDU
!>          LDU is INTEGER
!>             LDU is the leading dimension of U just as declared in the
!>             in the calling subroutine.  LDU >= 2*NSHFTS.
!> 
[in]NV
!>          NV is INTEGER
!>             NV is the number of rows in WV agailable for workspace.
!>             NV >= 1.
!> 
[out]WV
!>          WV is REAL array, dimension (LDWV,2*NSHFTS)
!> 
[in]LDWV
!>          LDWV is INTEGER
!>             LDWV is the leading dimension of WV as declared in the
!>             in the calling subroutine.  LDWV >= NV.
!> 
[in]NH
!>          NH is INTEGER
!>             NH is the number of columns in array WH available for
!>             workspace. NH >= 1.
!> 
[out]WH
!>          WH is REAL array, dimension (LDWH,NH)
!> 
[in]LDWH
!>          LDWH is INTEGER
!>             Leading dimension of WH just as declared in the
!>             calling procedure.  LDWH >= 2*NSHFTS.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
Karen Braman and Ralph Byers, Department of Mathematics, University of Kansas, USA

Lars Karlsson, Daniel Kressner, and Bruno Lang

Thijs Steel, Department of Computer science, KU Leuven, Belgium

References:
K. Braman, R. Byers and R. Mathias, The Multi-Shift QR Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 Performance, SIAM Journal of Matrix Analysis, volume 23, pages 929–947, 2002.

Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed chains of bulges in multishift QR algorithms. ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).

Definition at line 260 of file slaqr5.f.

263 IMPLICIT NONE
264*
265* -- LAPACK auxiliary routine --
266* -- LAPACK is a software package provided by Univ. of Tennessee, --
267* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
268*
269* .. Scalar Arguments ..
270 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
271 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
272 LOGICAL WANTT, WANTZ
273* ..
274* .. Array Arguments ..
275 REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
276 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
277 $ Z( LDZ, * )
278* ..
279*
280* ================================================================
281* .. Parameters ..
282 REAL ZERO, ONE
283 parameter( zero = 0.0e0, one = 1.0e0 )
284* ..
285* .. Local Scalars ..
286 REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
287 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, T1, T2,
288 $ T3, TST1, TST2, ULP
289 INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
290 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
291 $ M, M22, MBOT, MTOP, NBMPS, NDCOL,
292 $ NS, NU
293 LOGICAL ACCUM, BMP22
294* ..
295* .. External Functions ..
296 REAL SLAMCH
297 EXTERNAL slamch
298* ..
299* .. Intrinsic Functions ..
300*
301 INTRINSIC abs, max, min, mod, real
302* ..
303* .. Local Arrays ..
304 REAL VT( 3 )
305* ..
306* .. External Subroutines ..
307 EXTERNAL sgemm, slacpy, slaqr1, slarfg, slaset,
308 $ strmm
309* ..
310* .. Executable Statements ..
311*
312* ==== If there are no shifts, then there is nothing to do. ====
313*
314 IF( nshfts.LT.2 )
315 $ RETURN
316*
317* ==== If the active block is empty or 1-by-1, then there
318* . is nothing to do. ====
319*
320 IF( ktop.GE.kbot )
321 $ RETURN
322*
323* ==== Shuffle shifts into pairs of real shifts and pairs
324* . of complex conjugate shifts assuming complex
325* . conjugate shifts are already adjacent to one
326* . another. ====
327*
328 DO 10 i = 1, nshfts - 2, 2
329 IF( si( i ).NE.-si( i+1 ) ) THEN
330*
331 swap = sr( i )
332 sr( i ) = sr( i+1 )
333 sr( i+1 ) = sr( i+2 )
334 sr( i+2 ) = swap
335*
336 swap = si( i )
337 si( i ) = si( i+1 )
338 si( i+1 ) = si( i+2 )
339 si( i+2 ) = swap
340 END IF
341 10 CONTINUE
342*
343* ==== NSHFTS is supposed to be even, but if it is odd,
344* . then simply reduce it by one. The shuffle above
345* . ensures that the dropped shift is real and that
346* . the remaining shifts are paired. ====
347*
348 ns = nshfts - mod( nshfts, 2 )
349*
350* ==== Machine constants for deflation ====
351*
352 safmin = slamch( 'SAFE MINIMUM' )
353 safmax = one / safmin
354 ulp = slamch( 'PRECISION' )
355 smlnum = safmin*( real( n ) / ulp )
356*
357* ==== Use accumulated reflections to update far-from-diagonal
358* . entries ? ====
359*
360 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
361*
362* ==== clear trash ====
363*
364 IF( ktop+2.LE.kbot )
365 $ h( ktop+2, ktop ) = zero
366*
367* ==== NBMPS = number of 2-shift bulges in the chain ====
368*
369 nbmps = ns / 2
370*
371* ==== KDU = width of slab ====
372*
373 kdu = 4*nbmps
374*
375* ==== Create and chase chains of NBMPS bulges ====
376*
377 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
378*
379* JTOP = Index from which updates from the right start.
380*
381 IF( accum ) THEN
382 jtop = max( ktop, incol )
383 ELSE IF( wantt ) THEN
384 jtop = 1
385 ELSE
386 jtop = ktop
387 END IF
388*
389 ndcol = incol + kdu
390 IF( accum )
391 $ CALL slaset( 'ALL', kdu, kdu, zero, one, u, ldu )
392*
393* ==== Near-the-diagonal bulge chase. The following loop
394* . performs the near-the-diagonal part of a small bulge
395* . multi-shift QR sweep. Each 4*NBMPS column diagonal
396* . chunk extends from column INCOL to column NDCOL
397* . (including both column INCOL and column NDCOL). The
398* . following loop chases a 2*NBMPS+1 column long chain of
399* . NBMPS bulges 2*NBMPS-1 columns to the right. (INCOL
400* . may be less than KTOP and and NDCOL may be greater than
401* . KBOT indicating phantom columns from which to chase
402* . bulges before they are actually introduced or to which
403* . to chase bulges beyond column KBOT.) ====
404*
405 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
406*
407* ==== Bulges number MTOP to MBOT are active double implicit
408* . shift bulges. There may or may not also be small
409* . 2-by-2 bulge, if there is room. The inactive bulges
410* . (if any) must wait until the active bulges have moved
411* . down the diagonal to make room. The phantom matrix
412* . paradigm described above helps keep track. ====
413*
414 mtop = max( 1, ( ktop-krcol ) / 2+1 )
415 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
416 m22 = mbot + 1
417 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
418 $ ( kbot-2 )
419*
420* ==== Generate reflections to chase the chain right
421* . one column. (The minimum value of K is KTOP-1.) ====
422*
423 IF ( bmp22 ) THEN
424*
425* ==== Special case: 2-by-2 reflection at bottom treated
426* . separately ====
427*
428 k = krcol + 2*( m22-1 )
429 IF( k.EQ.ktop-1 ) THEN
430 CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
431 $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
432 $ v( 1, m22 ) )
433 beta = v( 1, m22 )
434 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
435 ELSE
436 beta = h( k+1, k )
437 v( 2, m22 ) = h( k+2, k )
438 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
439 h( k+1, k ) = beta
440 h( k+2, k ) = zero
441 END IF
442
443*
444* ==== Perform update from right within
445* . computational window. ====
446*
447 t1 = v( 1, m22 )
448 t2 = t1*v( 2, m22 )
449 DO 30 j = jtop, min( kbot, k+3 )
450 refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
451 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
452 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
453 30 CONTINUE
454*
455* ==== Perform update from left within
456* . computational window. ====
457*
458 IF( accum ) THEN
459 jbot = min( ndcol, kbot )
460 ELSE IF( wantt ) THEN
461 jbot = n
462 ELSE
463 jbot = kbot
464 END IF
465 t1 = v( 1, m22 )
466 t2 = t1*v( 2, m22 )
467 DO 40 j = k+1, jbot
468 refsum = h( k+1, j ) + v( 2, m22 )*h( k+2, j )
469 h( k+1, j ) = h( k+1, j ) - refsum*t1
470 h( k+2, j ) = h( k+2, j ) - refsum*t2
471 40 CONTINUE
472*
473* ==== The following convergence test requires that
474* . the tradition small-compared-to-nearby-diagonals
475* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
476* . criteria both be satisfied. The latter improves
477* . accuracy in some examples. Falling back on an
478* . alternate convergence criterion when TST1 or TST2
479* . is zero (as done here) is traditional but probably
480* . unnecessary. ====
481*
482 IF( k.GE.ktop ) THEN
483 IF( h( k+1, k ).NE.zero ) THEN
484 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
485 IF( tst1.EQ.zero ) THEN
486 IF( k.GE.ktop+1 )
487 $ tst1 = tst1 + abs( h( k, k-1 ) )
488 IF( k.GE.ktop+2 )
489 $ tst1 = tst1 + abs( h( k, k-2 ) )
490 IF( k.GE.ktop+3 )
491 $ tst1 = tst1 + abs( h( k, k-3 ) )
492 IF( k.LE.kbot-2 )
493 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
494 IF( k.LE.kbot-3 )
495 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
496 IF( k.LE.kbot-4 )
497 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
498 END IF
499 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
500 $ THEN
501 h12 = max( abs( h( k+1, k ) ),
502 $ abs( h( k, k+1 ) ) )
503 h21 = min( abs( h( k+1, k ) ),
504 $ abs( h( k, k+1 ) ) )
505 h11 = max( abs( h( k+1, k+1 ) ),
506 $ abs( h( k, k )-h( k+1, k+1 ) ) )
507 h22 = min( abs( h( k+1, k+1 ) ),
508 $ abs( h( k, k )-h( k+1, k+1 ) ) )
509 scl = h11 + h12
510 tst2 = h22*( h11 / scl )
511*
512 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
513 $ max( smlnum, ulp*tst2 ) ) THEN
514 h( k+1, k ) = zero
515 END IF
516 END IF
517 END IF
518 END IF
519*
520* ==== Accumulate orthogonal transformations. ====
521*
522 IF( accum ) THEN
523 kms = k - incol
524 t1 = v( 1, m22 )
525 t2 = t1*v( 2, m22 )
526 DO 50 j = max( 1, ktop-incol ), kdu
527 refsum = u( j, kms+1 ) + v( 2, m22 )*u( j, kms+2 )
528 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
529 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
530 50 CONTINUE
531 ELSE IF( wantz ) THEN
532 t1 = v( 1, m22 )
533 t2 = t1*v( 2, m22 )
534 DO 60 j = iloz, ihiz
535 refsum = z( j, k+1 )+v( 2, m22 )*z( j, k+2 )
536 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
537 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
538 60 CONTINUE
539 END IF
540 END IF
541*
542* ==== Normal case: Chain of 3-by-3 reflections ====
543*
544 DO 80 m = mbot, mtop, -1
545 k = krcol + 2*( m-1 )
546 IF( k.EQ.ktop-1 ) THEN
547 CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
548 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
549 $ v( 1, m ) )
550 alpha = v( 1, m )
551 CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
552 ELSE
553*
554* ==== Perform delayed transformation of row below
555* . Mth bulge. Exploit fact that first two elements
556* . of row are actually zero. ====
557*
558 t1 = v( 1, m )
559 t2 = t1*v( 2, m )
560 t3 = t1*v( 3, m )
561 refsum = v( 3, m )*h( k+3, k+2 )
562 h( k+3, k ) = -refsum*t1
563 h( k+3, k+1 ) = -refsum*t2
564 h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*t3
565*
566* ==== Calculate reflection to move
567* . Mth bulge one step. ====
568*
569 beta = h( k+1, k )
570 v( 2, m ) = h( k+2, k )
571 v( 3, m ) = h( k+3, k )
572 CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
573*
574* ==== A Bulge may collapse because of vigilant
575* . deflation or destructive underflow. In the
576* . underflow case, try the two-small-subdiagonals
577* . trick to try to reinflate the bulge. ====
578*
579 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
580 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
581*
582* ==== Typical case: not collapsed (yet). ====
583*
584 h( k+1, k ) = beta
585 h( k+2, k ) = zero
586 h( k+3, k ) = zero
587 ELSE
588*
589* ==== Atypical case: collapsed. Attempt to
590* . reintroduce ignoring H(K+1,K) and H(K+2,K).
591* . If the fill resulting from the new
592* . reflector is too large, then abandon it.
593* . Otherwise, use the new one. ====
594*
595 CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
596 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
597 $ vt )
598 alpha = vt( 1 )
599 CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
600 t1 = vt( 1 )
601 t2 = t1*vt( 2 )
602 t3 = t2*vt( 3 )
603 refsum = h( k+1, k )+vt( 2 )*h( k+2, k )
604*
605 IF( abs( h( k+2, k )-refsum*t2 )+
606 $ abs( refsum*t3 ).GT.ulp*
607 $ ( abs( h( k, k ) )+abs( h( k+1,
608 $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
609*
610* ==== Starting a new bulge here would
611* . create non-negligible fill. Use
612* . the old one with trepidation. ====
613*
614 h( k+1, k ) = beta
615 h( k+2, k ) = zero
616 h( k+3, k ) = zero
617 ELSE
618*
619* ==== Starting a new bulge here would
620* . create only negligible fill.
621* . Replace the old reflector with
622* . the new one. ====
623*
624 h( k+1, k ) = h( k+1, k ) - refsum*t1
625 h( k+2, k ) = zero
626 h( k+3, k ) = zero
627 v( 1, m ) = vt( 1 )
628 v( 2, m ) = vt( 2 )
629 v( 3, m ) = vt( 3 )
630 END IF
631 END IF
632 END IF
633*
634* ==== Apply reflection from the right and
635* . the first column of update from the left.
636* . These updates are required for the vigilant
637* . deflation check. We still delay most of the
638* . updates from the left for efficiency. ====
639*
640 t1 = v( 1, m )
641 t2 = t1*v( 2, m )
642 t3 = t1*v( 3, m )
643 DO 70 j = jtop, min( kbot, k+3 )
644 refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
645 $ + v( 3, m )*h( j, k+3 )
646 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
647 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
648 h( j, k+3 ) = h( j, k+3 ) - refsum*t3
649 70 CONTINUE
650*
651* ==== Perform update from left for subsequent
652* . column. ====
653*
654 refsum = h( k+1, k+1 ) + v( 2, m )*h( k+2, k+1 )
655 $ + v( 3, m )*h( k+3, k+1 )
656 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
657 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
658 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
659*
660* ==== The following convergence test requires that
661* . the tradition small-compared-to-nearby-diagonals
662* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
663* . criteria both be satisfied. The latter improves
664* . accuracy in some examples. Falling back on an
665* . alternate convergence criterion when TST1 or TST2
666* . is zero (as done here) is traditional but probably
667* . unnecessary. ====
668*
669 IF( k.LT.ktop)
670 $ cycle
671 IF( h( k+1, k ).NE.zero ) THEN
672 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
673 IF( tst1.EQ.zero ) THEN
674 IF( k.GE.ktop+1 )
675 $ tst1 = tst1 + abs( h( k, k-1 ) )
676 IF( k.GE.ktop+2 )
677 $ tst1 = tst1 + abs( h( k, k-2 ) )
678 IF( k.GE.ktop+3 )
679 $ tst1 = tst1 + abs( h( k, k-3 ) )
680 IF( k.LE.kbot-2 )
681 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
682 IF( k.LE.kbot-3 )
683 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
684 IF( k.LE.kbot-4 )
685 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
686 END IF
687 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
688 $ THEN
689 h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
690 h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
691 h11 = max( abs( h( k+1, k+1 ) ),
692 $ abs( h( k, k )-h( k+1, k+1 ) ) )
693 h22 = min( abs( h( k+1, k+1 ) ),
694 $ abs( h( k, k )-h( k+1, k+1 ) ) )
695 scl = h11 + h12
696 tst2 = h22*( h11 / scl )
697*
698 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
699 $ max( smlnum, ulp*tst2 ) ) THEN
700 h( k+1, k ) = zero
701 END IF
702 END IF
703 END IF
704 80 CONTINUE
705*
706* ==== Multiply H by reflections from the left ====
707*
708 IF( accum ) THEN
709 jbot = min( ndcol, kbot )
710 ELSE IF( wantt ) THEN
711 jbot = n
712 ELSE
713 jbot = kbot
714 END IF
715*
716 DO 100 m = mbot, mtop, -1
717 k = krcol + 2*( m-1 )
718 t1 = v( 1, m )
719 t2 = t1*v( 2, m )
720 t3 = t1*v( 3, m )
721 DO 90 j = max( ktop, krcol + 2*m ), jbot
722 refsum = h( k+1, j ) + v( 2, m )*h( k+2, j )
723 $ + v( 3, m )*h( k+3, j )
724 h( k+1, j ) = h( k+1, j ) - refsum*t1
725 h( k+2, j ) = h( k+2, j ) - refsum*t2
726 h( k+3, j ) = h( k+3, j ) - refsum*t3
727 90 CONTINUE
728 100 CONTINUE
729*
730* ==== Accumulate orthogonal transformations. ====
731*
732 IF( accum ) THEN
733*
734* ==== Accumulate U. (If needed, update Z later
735* . with an efficient matrix-matrix
736* . multiply.) ====
737*
738 DO 120 m = mbot, mtop, -1
739 k = krcol + 2*( m-1 )
740 kms = k - incol
741 i2 = max( 1, ktop-incol )
742 i2 = max( i2, kms-(krcol-incol)+1 )
743 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
744 t1 = v( 1, m )
745 t2 = t1*v( 2, m )
746 t3 = t1*v( 3, m )
747 DO 110 j = i2, i4
748 refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
749 $ + v( 3, m )*u( j, kms+3 )
750 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
751 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
752 u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
753 110 CONTINUE
754 120 CONTINUE
755 ELSE IF( wantz ) THEN
756*
757* ==== U is not accumulated, so update Z
758* . now by multiplying by reflections
759* . from the right. ====
760*
761 DO 140 m = mbot, mtop, -1
762 k = krcol + 2*( m-1 )
763 t1 = v( 1, m )
764 t2 = t1*v( 2, m )
765 t3 = t1*v( 3, m )
766 DO 130 j = iloz, ihiz
767 refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
768 $ + v( 3, m )*z( j, k+3 )
769 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
770 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
771 z( j, k+3 ) = z( j, k+3 ) - refsum*t3
772 130 CONTINUE
773 140 CONTINUE
774 END IF
775*
776* ==== End of near-the-diagonal bulge chase. ====
777*
778 145 CONTINUE
779*
780* ==== Use U (if accumulated) to update far-from-diagonal
781* . entries in H. If required, use U to update Z as
782* . well. ====
783*
784 IF( accum ) THEN
785 IF( wantt ) THEN
786 jtop = 1
787 jbot = n
788 ELSE
789 jtop = ktop
790 jbot = kbot
791 END IF
792 k1 = max( 1, ktop-incol )
793 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
794*
795* ==== Horizontal Multiply ====
796*
797 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
798 jlen = min( nh, jbot-jcol+1 )
799 CALL sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
800 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
801 $ ldwh )
802 CALL slacpy( 'ALL', nu, jlen, wh, ldwh,
803 $ h( incol+k1, jcol ), ldh )
804 150 CONTINUE
805*
806* ==== Vertical multiply ====
807*
808 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
809 jlen = min( nv, max( ktop, incol )-jrow )
810 CALL sgemm( 'N', 'N', jlen, nu, nu, one,
811 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
812 $ ldu, zero, wv, ldwv )
813 CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
814 $ h( jrow, incol+k1 ), ldh )
815 160 CONTINUE
816*
817* ==== Z multiply (also vertical) ====
818*
819 IF( wantz ) THEN
820 DO 170 jrow = iloz, ihiz, nv
821 jlen = min( nv, ihiz-jrow+1 )
822 CALL sgemm( 'N', 'N', jlen, nu, nu, one,
823 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
824 $ ldu, zero, wv, ldwv )
825 CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
826 $ z( jrow, incol+k1 ), ldz )
827 170 CONTINUE
828 END IF
829 END IF
830 180 CONTINUE
831*
832* ==== End of SLAQR5 ====
833*
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:188
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
Definition slacpy.f:101
real function slamch(cmach)
SLAMCH
Definition slamch.f:68
subroutine slaqr1(n, h, ldh, sr1, si1, sr2, si2, v)
SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition slaqr1.f:119
subroutine slarfg(n, alpha, x, incx, tau)
SLARFG generates an elementary reflector (Householder matrix).
Definition slarfg.f:104
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:108
subroutine strmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
STRMM
Definition strmm.f:177
Here is the call graph for this function:
Here is the caller graph for this function: