LAPACK 3.11.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ slaqr5()

subroutine slaqr5 ( logical  WANTT,
logical  WANTZ,
integer  KACC22,
integer  N,
integer  KTOP,
integer  KBOT,
integer  NSHFTS,
real, dimension( * )  SR,
real, dimension( * )  SI,
real, dimension( ldh, * )  H,
integer  LDH,
integer  ILOZ,
integer  IHIZ,
real, dimension( ldz, * )  Z,
integer  LDZ,
real, dimension( ldv, * )  V,
integer  LDV,
real, dimension( ldu, * )  U,
integer  LDU,
integer  NV,
real, dimension( ldwv, * )  WV,
integer  LDWV,
integer  NH,
real, dimension( ldwh, * )  WH,
integer  LDWH 
)

SLAQR5 performs a single small-bulge multi-shift QR sweep.

Download SLAQR5 + dependencies [TGZ] [ZIP] [TXT]

Purpose:
    SLAQR5, called by SLAQR0, performs a
    single small-bulge multi-shift QR sweep.
Parameters
[in]WANTT
          WANTT is LOGICAL
             WANTT = .true. if the quasi-triangular Schur factor
             is being computed.  WANTT is set to .false. otherwise.
[in]WANTZ
          WANTZ is LOGICAL
             WANTZ = .true. if the orthogonal Schur factor is being
             computed.  WANTZ is set to .false. otherwise.
[in]KACC22
          KACC22 is INTEGER with value 0, 1, or 2.
             Specifies the computation mode of far-from-diagonal
             orthogonal updates.
        = 0: SLAQR5 does not accumulate reflections and does not
             use matrix-matrix multiply to update far-from-diagonal
             matrix entries.
        = 1: SLAQR5 accumulates reflections and uses matrix-matrix
             multiply to update the far-from-diagonal matrix entries.
        = 2: Same as KACC22 = 1. This option used to enable exploiting
             the 2-by-2 structure during matrix multiplications, but
             this is no longer supported.
[in]N
          N is INTEGER
             N is the order of the Hessenberg matrix H upon which this
             subroutine operates.
[in]KTOP
          KTOP is INTEGER
[in]KBOT
          KBOT is INTEGER
             These are the first and last rows and columns of an
             isolated diagonal block upon which the QR sweep is to be
             applied. It is assumed without a check that
                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0
             and
                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.
[in]NSHFTS
          NSHFTS is INTEGER
             NSHFTS gives the number of simultaneous shifts.  NSHFTS
             must be positive and even.
[in,out]SR
          SR is REAL array, dimension (NSHFTS)
[in,out]SI
          SI is REAL array, dimension (NSHFTS)
             SR contains the real parts and SI contains the imaginary
             parts of the NSHFTS shifts of origin that define the
             multi-shift QR sweep.  On output SR and SI may be
             reordered.
[in,out]H
          H is REAL array, dimension (LDH,N)
             On input H contains a Hessenberg matrix.  On output a
             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
             to the isolated diagonal block in rows and columns KTOP
             through KBOT.
[in]LDH
          LDH is INTEGER
             LDH is the leading dimension of H just as declared in the
             calling procedure.  LDH >= MAX(1,N).
[in]ILOZ
          ILOZ is INTEGER
[in]IHIZ
          IHIZ is INTEGER
             Specify the rows of Z to which transformations must be
             applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
[in,out]Z
          Z is REAL array, dimension (LDZ,IHIZ)
             If WANTZ = .TRUE., then the QR Sweep orthogonal
             similarity transformation is accumulated into
             Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
             If WANTZ = .FALSE., then Z is unreferenced.
[in]LDZ
          LDZ is INTEGER
             LDA is the leading dimension of Z just as declared in
             the calling procedure. LDZ >= N.
[out]V
          V is REAL array, dimension (LDV,NSHFTS/2)
[in]LDV
          LDV is INTEGER
             LDV is the leading dimension of V as declared in the
             calling procedure.  LDV >= 3.
[out]U
          U is REAL array, dimension (LDU,2*NSHFTS)
[in]LDU
          LDU is INTEGER
             LDU is the leading dimension of U just as declared in the
             in the calling subroutine.  LDU >= 2*NSHFTS.
[in]NV
          NV is INTEGER
             NV is the number of rows in WV agailable for workspace.
             NV >= 1.
[out]WV
          WV is REAL array, dimension (LDWV,2*NSHFTS)
[in]LDWV
          LDWV is INTEGER
             LDWV is the leading dimension of WV as declared in the
             in the calling subroutine.  LDWV >= NV.
[in]NH
          NH is INTEGER
             NH is the number of columns in array WH available for
             workspace. NH >= 1.
[out]WH
          WH is REAL array, dimension (LDWH,NH)
[in]LDWH
          LDWH is INTEGER
             Leading dimension of WH just as declared in the
             calling procedure.  LDWH >= 2*NSHFTS.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
Karen Braman and Ralph Byers, Department of Mathematics, University of Kansas, USA

Lars Karlsson, Daniel Kressner, and Bruno Lang

Thijs Steel, Department of Computer science, KU Leuven, Belgium

References:
K. Braman, R. Byers and R. Mathias, The Multi-Shift QR Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 Performance, SIAM Journal of Matrix Analysis, volume 23, pages 929–947, 2002.

Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed chains of bulges in multishift QR algorithms. ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).

Definition at line 262 of file slaqr5.f.

265 IMPLICIT NONE
266*
267* -- LAPACK auxiliary routine --
268* -- LAPACK is a software package provided by Univ. of Tennessee, --
269* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
270*
271* .. Scalar Arguments ..
272 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
273 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
274 LOGICAL WANTT, WANTZ
275* ..
276* .. Array Arguments ..
277 REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
278 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
279 $ Z( LDZ, * )
280* ..
281*
282* ================================================================
283* .. Parameters ..
284 REAL ZERO, ONE
285 parameter( zero = 0.0e0, one = 1.0e0 )
286* ..
287* .. Local Scalars ..
288 REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
289 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, T1, T2,
290 $ T3, TST1, TST2, ULP
291 INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
292 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
293 $ M, M22, MBOT, MTOP, NBMPS, NDCOL,
294 $ NS, NU
295 LOGICAL ACCUM, BMP22
296* ..
297* .. External Functions ..
298 REAL SLAMCH
299 EXTERNAL slamch
300* ..
301* .. Intrinsic Functions ..
302*
303 INTRINSIC abs, max, min, mod, real
304* ..
305* .. Local Arrays ..
306 REAL VT( 3 )
307* ..
308* .. External Subroutines ..
309 EXTERNAL sgemm, slabad, slacpy, slaqr1, slarfg, slaset,
310 $ strmm
311* ..
312* .. Executable Statements ..
313*
314* ==== If there are no shifts, then there is nothing to do. ====
315*
316 IF( nshfts.LT.2 )
317 $ RETURN
318*
319* ==== If the active block is empty or 1-by-1, then there
320* . is nothing to do. ====
321*
322 IF( ktop.GE.kbot )
323 $ RETURN
324*
325* ==== Shuffle shifts into pairs of real shifts and pairs
326* . of complex conjugate shifts assuming complex
327* . conjugate shifts are already adjacent to one
328* . another. ====
329*
330 DO 10 i = 1, nshfts - 2, 2
331 IF( si( i ).NE.-si( i+1 ) ) THEN
332*
333 swap = sr( i )
334 sr( i ) = sr( i+1 )
335 sr( i+1 ) = sr( i+2 )
336 sr( i+2 ) = swap
337*
338 swap = si( i )
339 si( i ) = si( i+1 )
340 si( i+1 ) = si( i+2 )
341 si( i+2 ) = swap
342 END IF
343 10 CONTINUE
344*
345* ==== NSHFTS is supposed to be even, but if it is odd,
346* . then simply reduce it by one. The shuffle above
347* . ensures that the dropped shift is real and that
348* . the remaining shifts are paired. ====
349*
350 ns = nshfts - mod( nshfts, 2 )
351*
352* ==== Machine constants for deflation ====
353*
354 safmin = slamch( 'SAFE MINIMUM' )
355 safmax = one / safmin
356 CALL slabad( safmin, safmax )
357 ulp = slamch( 'PRECISION' )
358 smlnum = safmin*( real( n ) / ulp )
359*
360* ==== Use accumulated reflections to update far-from-diagonal
361* . entries ? ====
362*
363 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
364*
365* ==== clear trash ====
366*
367 IF( ktop+2.LE.kbot )
368 $ h( ktop+2, ktop ) = zero
369*
370* ==== NBMPS = number of 2-shift bulges in the chain ====
371*
372 nbmps = ns / 2
373*
374* ==== KDU = width of slab ====
375*
376 kdu = 4*nbmps
377*
378* ==== Create and chase chains of NBMPS bulges ====
379*
380 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
381*
382* JTOP = Index from which updates from the right start.
383*
384 IF( accum ) THEN
385 jtop = max( ktop, incol )
386 ELSE IF( wantt ) THEN
387 jtop = 1
388 ELSE
389 jtop = ktop
390 END IF
391*
392 ndcol = incol + kdu
393 IF( accum )
394 $ CALL slaset( 'ALL', kdu, kdu, zero, one, u, ldu )
395*
396* ==== Near-the-diagonal bulge chase. The following loop
397* . performs the near-the-diagonal part of a small bulge
398* . multi-shift QR sweep. Each 4*NBMPS column diagonal
399* . chunk extends from column INCOL to column NDCOL
400* . (including both column INCOL and column NDCOL). The
401* . following loop chases a 2*NBMPS+1 column long chain of
402* . NBMPS bulges 2*NBMPS-1 columns to the right. (INCOL
403* . may be less than KTOP and and NDCOL may be greater than
404* . KBOT indicating phantom columns from which to chase
405* . bulges before they are actually introduced or to which
406* . to chase bulges beyond column KBOT.) ====
407*
408 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
409*
410* ==== Bulges number MTOP to MBOT are active double implicit
411* . shift bulges. There may or may not also be small
412* . 2-by-2 bulge, if there is room. The inactive bulges
413* . (if any) must wait until the active bulges have moved
414* . down the diagonal to make room. The phantom matrix
415* . paradigm described above helps keep track. ====
416*
417 mtop = max( 1, ( ktop-krcol ) / 2+1 )
418 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
419 m22 = mbot + 1
420 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
421 $ ( kbot-2 )
422*
423* ==== Generate reflections to chase the chain right
424* . one column. (The minimum value of K is KTOP-1.) ====
425*
426 IF ( bmp22 ) THEN
427*
428* ==== Special case: 2-by-2 reflection at bottom treated
429* . separately ====
430*
431 k = krcol + 2*( m22-1 )
432 IF( k.EQ.ktop-1 ) THEN
433 CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
434 $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
435 $ v( 1, m22 ) )
436 beta = v( 1, m22 )
437 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
438 ELSE
439 beta = h( k+1, k )
440 v( 2, m22 ) = h( k+2, k )
441 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
442 h( k+1, k ) = beta
443 h( k+2, k ) = zero
444 END IF
445
446*
447* ==== Perform update from right within
448* . computational window. ====
449*
450 t1 = v( 1, m22 )
451 t2 = t1*v( 2, m22 )
452 DO 30 j = jtop, min( kbot, k+3 )
453 refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
454 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
455 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
456 30 CONTINUE
457*
458* ==== Perform update from left within
459* . computational window. ====
460*
461 IF( accum ) THEN
462 jbot = min( ndcol, kbot )
463 ELSE IF( wantt ) THEN
464 jbot = n
465 ELSE
466 jbot = kbot
467 END IF
468 t1 = v( 1, m22 )
469 t2 = t1*v( 2, m22 )
470 DO 40 j = k+1, jbot
471 refsum = h( k+1, j ) + v( 2, m22 )*h( k+2, j )
472 h( k+1, j ) = h( k+1, j ) - refsum*t1
473 h( k+2, j ) = h( k+2, j ) - refsum*t2
474 40 CONTINUE
475*
476* ==== The following convergence test requires that
477* . the tradition small-compared-to-nearby-diagonals
478* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
479* . criteria both be satisfied. The latter improves
480* . accuracy in some examples. Falling back on an
481* . alternate convergence criterion when TST1 or TST2
482* . is zero (as done here) is traditional but probably
483* . unnecessary. ====
484*
485 IF( k.GE.ktop ) THEN
486 IF( h( k+1, k ).NE.zero ) THEN
487 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
488 IF( tst1.EQ.zero ) THEN
489 IF( k.GE.ktop+1 )
490 $ tst1 = tst1 + abs( h( k, k-1 ) )
491 IF( k.GE.ktop+2 )
492 $ tst1 = tst1 + abs( h( k, k-2 ) )
493 IF( k.GE.ktop+3 )
494 $ tst1 = tst1 + abs( h( k, k-3 ) )
495 IF( k.LE.kbot-2 )
496 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
497 IF( k.LE.kbot-3 )
498 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
499 IF( k.LE.kbot-4 )
500 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
501 END IF
502 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
503 $ THEN
504 h12 = max( abs( h( k+1, k ) ),
505 $ abs( h( k, k+1 ) ) )
506 h21 = min( abs( h( k+1, k ) ),
507 $ abs( h( k, k+1 ) ) )
508 h11 = max( abs( h( k+1, k+1 ) ),
509 $ abs( h( k, k )-h( k+1, k+1 ) ) )
510 h22 = min( abs( h( k+1, k+1 ) ),
511 $ abs( h( k, k )-h( k+1, k+1 ) ) )
512 scl = h11 + h12
513 tst2 = h22*( h11 / scl )
514*
515 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
516 $ max( smlnum, ulp*tst2 ) ) THEN
517 h( k+1, k ) = zero
518 END IF
519 END IF
520 END IF
521 END IF
522*
523* ==== Accumulate orthogonal transformations. ====
524*
525 IF( accum ) THEN
526 kms = k - incol
527 t1 = v( 1, m22 )
528 t2 = t1*v( 2, m22 )
529 DO 50 j = max( 1, ktop-incol ), kdu
530 refsum = u( j, kms+1 ) + v( 2, m22 )*u( j, kms+2 )
531 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
532 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
533 50 CONTINUE
534 ELSE IF( wantz ) THEN
535 t1 = v( 1, m22 )
536 t2 = t1*v( 2, m22 )
537 DO 60 j = iloz, ihiz
538 refsum = z( j, k+1 )+v( 2, m22 )*z( j, k+2 )
539 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
540 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
541 60 CONTINUE
542 END IF
543 END IF
544*
545* ==== Normal case: Chain of 3-by-3 reflections ====
546*
547 DO 80 m = mbot, mtop, -1
548 k = krcol + 2*( m-1 )
549 IF( k.EQ.ktop-1 ) THEN
550 CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
551 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
552 $ v( 1, m ) )
553 alpha = v( 1, m )
554 CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
555 ELSE
556*
557* ==== Perform delayed transformation of row below
558* . Mth bulge. Exploit fact that first two elements
559* . of row are actually zero. ====
560*
561 refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 )
562 h( k+3, k ) = -refsum
563 h( k+3, k+1 ) = -refsum*v( 2, m )
564 h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3, m )
565*
566* ==== Calculate reflection to move
567* . Mth bulge one step. ====
568*
569 beta = h( k+1, k )
570 v( 2, m ) = h( k+2, k )
571 v( 3, m ) = h( k+3, k )
572 CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
573*
574* ==== A Bulge may collapse because of vigilant
575* . deflation or destructive underflow. In the
576* . underflow case, try the two-small-subdiagonals
577* . trick to try to reinflate the bulge. ====
578*
579 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
580 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
581*
582* ==== Typical case: not collapsed (yet). ====
583*
584 h( k+1, k ) = beta
585 h( k+2, k ) = zero
586 h( k+3, k ) = zero
587 ELSE
588*
589* ==== Atypical case: collapsed. Attempt to
590* . reintroduce ignoring H(K+1,K) and H(K+2,K).
591* . If the fill resulting from the new
592* . reflector is too large, then abandon it.
593* . Otherwise, use the new one. ====
594*
595 CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
596 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
597 $ vt )
598 alpha = vt( 1 )
599 CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
600 refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
601 $ h( k+2, k ) )
602*
603 IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
604 $ abs( refsum*vt( 3 ) ).GT.ulp*
605 $ ( abs( h( k, k ) )+abs( h( k+1,
606 $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
607*
608* ==== Starting a new bulge here would
609* . create non-negligible fill. Use
610* . the old one with trepidation. ====
611*
612 h( k+1, k ) = beta
613 h( k+2, k ) = zero
614 h( k+3, k ) = zero
615 ELSE
616*
617* ==== Starting a new bulge here would
618* . create only negligible fill.
619* . Replace the old reflector with
620* . the new one. ====
621*
622 h( k+1, k ) = h( k+1, k ) - refsum
623 h( k+2, k ) = zero
624 h( k+3, k ) = zero
625 v( 1, m ) = vt( 1 )
626 v( 2, m ) = vt( 2 )
627 v( 3, m ) = vt( 3 )
628 END IF
629 END IF
630 END IF
631*
632* ==== Apply reflection from the right and
633* . the first column of update from the left.
634* . These updates are required for the vigilant
635* . deflation check. We still delay most of the
636* . updates from the left for efficiency. ====
637*
638 t1 = v( 1, m )
639 t2 = t1*v( 2, m )
640 t3 = t1*v( 3, m )
641 DO 70 j = jtop, min( kbot, k+3 )
642 refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
643 $ + v( 3, m )*h( j, k+3 )
644 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
645 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
646 h( j, k+3 ) = h( j, k+3 ) - refsum*t3
647 70 CONTINUE
648*
649* ==== Perform update from left for subsequent
650* . column. ====
651*
652 refsum = h( k+1, k+1 ) + v( 2, m )*h( k+2, k+1 )
653 $ + v( 3, m )*h( k+3, k+1 )
654 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
655 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
656 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
657*
658* ==== The following convergence test requires that
659* . the tradition small-compared-to-nearby-diagonals
660* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
661* . criteria both be satisfied. The latter improves
662* . accuracy in some examples. Falling back on an
663* . alternate convergence criterion when TST1 or TST2
664* . is zero (as done here) is traditional but probably
665* . unnecessary. ====
666*
667 IF( k.LT.ktop)
668 $ cycle
669 IF( h( k+1, k ).NE.zero ) THEN
670 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
671 IF( tst1.EQ.zero ) THEN
672 IF( k.GE.ktop+1 )
673 $ tst1 = tst1 + abs( h( k, k-1 ) )
674 IF( k.GE.ktop+2 )
675 $ tst1 = tst1 + abs( h( k, k-2 ) )
676 IF( k.GE.ktop+3 )
677 $ tst1 = tst1 + abs( h( k, k-3 ) )
678 IF( k.LE.kbot-2 )
679 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
680 IF( k.LE.kbot-3 )
681 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
682 IF( k.LE.kbot-4 )
683 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
684 END IF
685 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
686 $ THEN
687 h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
688 h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
689 h11 = max( abs( h( k+1, k+1 ) ),
690 $ abs( h( k, k )-h( k+1, k+1 ) ) )
691 h22 = min( abs( h( k+1, k+1 ) ),
692 $ abs( h( k, k )-h( k+1, k+1 ) ) )
693 scl = h11 + h12
694 tst2 = h22*( h11 / scl )
695*
696 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
697 $ max( smlnum, ulp*tst2 ) ) THEN
698 h( k+1, k ) = zero
699 END IF
700 END IF
701 END IF
702 80 CONTINUE
703*
704* ==== Multiply H by reflections from the left ====
705*
706 IF( accum ) THEN
707 jbot = min( ndcol, kbot )
708 ELSE IF( wantt ) THEN
709 jbot = n
710 ELSE
711 jbot = kbot
712 END IF
713*
714 DO 100 m = mbot, mtop, -1
715 k = krcol + 2*( m-1 )
716 t1 = v( 1, m )
717 t2 = t1*v( 2, m )
718 t3 = t1*v( 3, m )
719 DO 90 j = max( ktop, krcol + 2*m ), jbot
720 refsum = h( k+1, j ) + v( 2, m )*h( k+2, j )
721 $ + v( 3, m )*h( k+3, j )
722 h( k+1, j ) = h( k+1, j ) - refsum*t1
723 h( k+2, j ) = h( k+2, j ) - refsum*t2
724 h( k+3, j ) = h( k+3, j ) - refsum*t3
725 90 CONTINUE
726 100 CONTINUE
727*
728* ==== Accumulate orthogonal transformations. ====
729*
730 IF( accum ) THEN
731*
732* ==== Accumulate U. (If needed, update Z later
733* . with an efficient matrix-matrix
734* . multiply.) ====
735*
736 DO 120 m = mbot, mtop, -1
737 k = krcol + 2*( m-1 )
738 kms = k - incol
739 i2 = max( 1, ktop-incol )
740 i2 = max( i2, kms-(krcol-incol)+1 )
741 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
742 t1 = v( 1, m )
743 t2 = t1*v( 2, m )
744 t3 = t1*v( 3, m )
745 DO 110 j = i2, i4
746 refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
747 $ + v( 3, m )*u( j, kms+3 )
748 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
749 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
750 u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
751 110 CONTINUE
752 120 CONTINUE
753 ELSE IF( wantz ) THEN
754*
755* ==== U is not accumulated, so update Z
756* . now by multiplying by reflections
757* . from the right. ====
758*
759 DO 140 m = mbot, mtop, -1
760 k = krcol + 2*( m-1 )
761 t1 = v( 1, m )
762 t2 = t1*v( 2, m )
763 t3 = t1*v( 3, m )
764 DO 130 j = iloz, ihiz
765 refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
766 $ + v( 3, m )*z( j, k+3 )
767 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
768 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
769 z( j, k+3 ) = z( j, k+3 ) - refsum*t3
770 130 CONTINUE
771 140 CONTINUE
772 END IF
773*
774* ==== End of near-the-diagonal bulge chase. ====
775*
776 145 CONTINUE
777*
778* ==== Use U (if accumulated) to update far-from-diagonal
779* . entries in H. If required, use U to update Z as
780* . well. ====
781*
782 IF( accum ) THEN
783 IF( wantt ) THEN
784 jtop = 1
785 jbot = n
786 ELSE
787 jtop = ktop
788 jbot = kbot
789 END IF
790 k1 = max( 1, ktop-incol )
791 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
792*
793* ==== Horizontal Multiply ====
794*
795 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
796 jlen = min( nh, jbot-jcol+1 )
797 CALL sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
798 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
799 $ ldwh )
800 CALL slacpy( 'ALL', nu, jlen, wh, ldwh,
801 $ h( incol+k1, jcol ), ldh )
802 150 CONTINUE
803*
804* ==== Vertical multiply ====
805*
806 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
807 jlen = min( nv, max( ktop, incol )-jrow )
808 CALL sgemm( 'N', 'N', jlen, nu, nu, one,
809 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
810 $ ldu, zero, wv, ldwv )
811 CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
812 $ h( jrow, incol+k1 ), ldh )
813 160 CONTINUE
814*
815* ==== Z multiply (also vertical) ====
816*
817 IF( wantz ) THEN
818 DO 170 jrow = iloz, ihiz, nv
819 jlen = min( nv, ihiz-jrow+1 )
820 CALL sgemm( 'N', 'N', jlen, nu, nu, one,
821 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
822 $ ldu, zero, wv, ldwv )
823 CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
824 $ z( jrow, incol+k1 ), ldz )
825 170 CONTINUE
826 END IF
827 END IF
828 180 CONTINUE
829*
830* ==== End of SLAQR5 ====
831*
subroutine slabad(SMALL, LARGE)
SLABAD
Definition: slabad.f:74
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: slaset.f:110
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:103
subroutine slarfg(N, ALPHA, X, INCX, TAU)
SLARFG generates an elementary reflector (Householder matrix).
Definition: slarfg.f:106
subroutine slaqr1(N, H, LDH, SR1, SI1, SR2, SI2, V)
SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition: slaqr1.f:121
subroutine strmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
STRMM
Definition: strmm.f:177
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:187
real function slamch(CMACH)
SLAMCH
Definition: slamch.f:68
Here is the call graph for this function:
Here is the caller graph for this function: