LAPACK  3.6.1
LAPACK: Linear Algebra PACKage
subroutine slaqr5 ( logical  WANTT,
logical  WANTZ,
integer  KACC22,
integer  N,
integer  KTOP,
integer  KBOT,
integer  NSHFTS,
real, dimension( * )  SR,
real, dimension( * )  SI,
real, dimension( ldh, * )  H,
integer  LDH,
integer  ILOZ,
integer  IHIZ,
real, dimension( ldz, * )  Z,
integer  LDZ,
real, dimension( ldv, * )  V,
integer  LDV,
real, dimension( ldu, * )  U,
integer  LDU,
integer  NV,
real, dimension( ldwv, * )  WV,
integer  LDWV,
integer  NH,
real, dimension( ldwh, * )  WH,
integer  LDWH 
)

SLAQR5 performs a single small-bulge multi-shift QR sweep.

Download SLAQR5 + dependencies [TGZ] [ZIP] [TXT]

Purpose:
    SLAQR5, called by SLAQR0, performs a
    single small-bulge multi-shift QR sweep.
Parameters
[in]WANTT
          WANTT is logical scalar
             WANTT = .true. if the quasi-triangular Schur factor
             is being computed.  WANTT is set to .false. otherwise.
[in]WANTZ
          WANTZ is logical scalar
             WANTZ = .true. if the orthogonal Schur factor is being
             computed.  WANTZ is set to .false. otherwise.
[in]KACC22
          KACC22 is integer with value 0, 1, or 2.
             Specifies the computation mode of far-from-diagonal
             orthogonal updates.
        = 0: SLAQR5 does not accumulate reflections and does not
             use matrix-matrix multiply to update far-from-diagonal
             matrix entries.
        = 1: SLAQR5 accumulates reflections and uses matrix-matrix
             multiply to update the far-from-diagonal matrix entries.
        = 2: SLAQR5 accumulates reflections, uses matrix-matrix
             multiply to update the far-from-diagonal matrix entries,
             and takes advantage of 2-by-2 block structure during
             matrix multiplies.
[in]N
          N is integer scalar
             N is the order of the Hessenberg matrix H upon which this
             subroutine operates.
[in]KTOP
          KTOP is integer scalar
[in]KBOT
          KBOT is integer scalar
             These are the first and last rows and columns of an
             isolated diagonal block upon which the QR sweep is to be
             applied. It is assumed without a check that
                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0
             and
                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.
[in]NSHFTS
          NSHFTS is integer scalar
             NSHFTS gives the number of simultaneous shifts.  NSHFTS
             must be positive and even.
[in,out]SR
          SR is REAL array of size (NSHFTS)
[in,out]SI
          SI is REAL array of size (NSHFTS)
             SR contains the real parts and SI contains the imaginary
             parts of the NSHFTS shifts of origin that define the
             multi-shift QR sweep.  On output SR and SI may be
             reordered.
[in,out]H
          H is REAL array of size (LDH,N)
             On input H contains a Hessenberg matrix.  On output a
             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
             to the isolated diagonal block in rows and columns KTOP
             through KBOT.
[in]LDH
          LDH is integer scalar
             LDH is the leading dimension of H just as declared in the
             calling procedure.  LDH.GE.MAX(1,N).
[in]ILOZ
          ILOZ is INTEGER
[in]IHIZ
          IHIZ is INTEGER
             Specify the rows of Z to which transformations must be
             applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
[in,out]Z
          Z is REAL array of size (LDZ,IHIZ)
             If WANTZ = .TRUE., then the QR Sweep orthogonal
             similarity transformation is accumulated into
             Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
             If WANTZ = .FALSE., then Z is unreferenced.
[in]LDZ
          LDZ is integer scalar
             LDA is the leading dimension of Z just as declared in
             the calling procedure. LDZ.GE.N.
[out]V
          V is REAL array of size (LDV,NSHFTS/2)
[in]LDV
          LDV is integer scalar
             LDV is the leading dimension of V as declared in the
             calling procedure.  LDV.GE.3.
[out]U
          U is REAL array of size
             (LDU,3*NSHFTS-3)
[in]LDU
          LDU is integer scalar
             LDU is the leading dimension of U just as declared in the
             in the calling subroutine.  LDU.GE.3*NSHFTS-3.
[in]NH
          NH is integer scalar
             NH is the number of columns in array WH available for
             workspace. NH.GE.1.
[out]WH
          WH is REAL array of size (LDWH,NH)
[in]LDWH
          LDWH is integer scalar
             Leading dimension of WH just as declared in the
             calling procedure.  LDWH.GE.3*NSHFTS-3.
[in]NV
          NV is integer scalar
             NV is the number of rows in WV agailable for workspace.
             NV.GE.1.
[out]WV
          WV is REAL array of size
             (LDWV,3*NSHFTS-3)
[in]LDWV
          LDWV is integer scalar
             LDWV is the leading dimension of WV as declared in the
             in the calling subroutine.  LDWV.GE.NV.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
June 2016
Contributors:
Karen Braman and Ralph Byers, Department of Mathematics, University of Kansas, USA
References:
K. Braman, R. Byers and R. Mathias, The Multi-Shift QR Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 Performance, SIAM Journal of Matrix Analysis, volume 23, pages 929–947, 2002.

Definition at line 261 of file slaqr5.f.

261 *
262 * -- LAPACK auxiliary routine (version 3.6.1) --
263 * -- LAPACK is a software package provided by Univ. of Tennessee, --
264 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
265 * June 2016
266 *
267 * .. Scalar Arguments ..
268  INTEGER ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv,
269  $ ldwh, ldwv, ldz, n, nh, nshfts, nv
270  LOGICAL wantt, wantz
271 * ..
272 * .. Array Arguments ..
273  REAL h( ldh, * ), si( * ), sr( * ), u( ldu, * ),
274  $ v( ldv, * ), wh( ldwh, * ), wv( ldwv, * ),
275  $ z( ldz, * )
276 * ..
277 *
278 * ================================================================
279 * .. Parameters ..
280  REAL zero, one
281  parameter ( zero = 0.0e0, one = 1.0e0 )
282 * ..
283 * .. Local Scalars ..
284  REAL alpha, beta, h11, h12, h21, h22, refsum,
285  $ safmax, safmin, scl, smlnum, swap, tst1, tst2,
286  $ ulp
287  INTEGER i, i2, i4, incol, j, j2, j4, jbot, jcol, jlen,
288  $ jrow, jtop, k, k1, kdu, kms, knz, krcol, kzs,
289  $ m, m22, mbot, mend, mstart, mtop, nbmps, ndcol,
290  $ ns, nu
291  LOGICAL accum, blk22, bmp22
292 * ..
293 * .. External Functions ..
294  REAL slamch
295  EXTERNAL slamch
296 * ..
297 * .. Intrinsic Functions ..
298 *
299  INTRINSIC abs, max, min, mod, real
300 * ..
301 * .. Local Arrays ..
302  REAL vt( 3 )
303 * ..
304 * .. External Subroutines ..
305  EXTERNAL sgemm, slabad, slacpy, slaqr1, slarfg, slaset,
306  $ strmm
307 * ..
308 * .. Executable Statements ..
309 *
310 * ==== If there are no shifts, then there is nothing to do. ====
311 *
312  IF( nshfts.LT.2 )
313  $ RETURN
314 *
315 * ==== If the active block is empty or 1-by-1, then there
316 * . is nothing to do. ====
317 *
318  IF( ktop.GE.kbot )
319  $ RETURN
320 *
321 * ==== Shuffle shifts into pairs of real shifts and pairs
322 * . of complex conjugate shifts assuming complex
323 * . conjugate shifts are already adjacent to one
324 * . another. ====
325 *
326  DO 10 i = 1, nshfts - 2, 2
327  IF( si( i ).NE.-si( i+1 ) ) THEN
328 *
329  swap = sr( i )
330  sr( i ) = sr( i+1 )
331  sr( i+1 ) = sr( i+2 )
332  sr( i+2 ) = swap
333 *
334  swap = si( i )
335  si( i ) = si( i+1 )
336  si( i+1 ) = si( i+2 )
337  si( i+2 ) = swap
338  END IF
339  10 CONTINUE
340 *
341 * ==== NSHFTS is supposed to be even, but if it is odd,
342 * . then simply reduce it by one. The shuffle above
343 * . ensures that the dropped shift is real and that
344 * . the remaining shifts are paired. ====
345 *
346  ns = nshfts - mod( nshfts, 2 )
347 *
348 * ==== Machine constants for deflation ====
349 *
350  safmin = slamch( 'SAFE MINIMUM' )
351  safmax = one / safmin
352  CALL slabad( safmin, safmax )
353  ulp = slamch( 'PRECISION' )
354  smlnum = safmin*( REAL( N ) / ulp )
355 *
356 * ==== Use accumulated reflections to update far-from-diagonal
357 * . entries ? ====
358 *
359  accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
360 *
361 * ==== If so, exploit the 2-by-2 block structure? ====
362 *
363  blk22 = ( ns.GT.2 ) .AND. ( kacc22.EQ.2 )
364 *
365 * ==== clear trash ====
366 *
367  IF( ktop+2.LE.kbot )
368  $ h( ktop+2, ktop ) = zero
369 *
370 * ==== NBMPS = number of 2-shift bulges in the chain ====
371 *
372  nbmps = ns / 2
373 *
374 * ==== KDU = width of slab ====
375 *
376  kdu = 6*nbmps - 3
377 *
378 * ==== Create and chase chains of NBMPS bulges ====
379 *
380  DO 220 incol = 3*( 1-nbmps ) + ktop - 1, kbot - 2, 3*nbmps - 2
381  ndcol = incol + kdu
382  IF( accum )
383  $ CALL slaset( 'ALL', kdu, kdu, zero, one, u, ldu )
384 *
385 * ==== Near-the-diagonal bulge chase. The following loop
386 * . performs the near-the-diagonal part of a small bulge
387 * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
388 * . chunk extends from column INCOL to column NDCOL
389 * . (including both column INCOL and column NDCOL). The
390 * . following loop chases a 3*NBMPS column long chain of
391 * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
392 * . may be less than KTOP and and NDCOL may be greater than
393 * . KBOT indicating phantom columns from which to chase
394 * . bulges before they are actually introduced or to which
395 * . to chase bulges beyond column KBOT.) ====
396 *
397  DO 150 krcol = incol, min( incol+3*nbmps-3, kbot-2 )
398 *
399 * ==== Bulges number MTOP to MBOT are active double implicit
400 * . shift bulges. There may or may not also be small
401 * . 2-by-2 bulge, if there is room. The inactive bulges
402 * . (if any) must wait until the active bulges have moved
403 * . down the diagonal to make room. The phantom matrix
404 * . paradigm described above helps keep track. ====
405 *
406  mtop = max( 1, ( ( ktop-1 )-krcol+2 ) / 3+1 )
407  mbot = min( nbmps, ( kbot-krcol ) / 3 )
408  m22 = mbot + 1
409  bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+3*( m22-1 ) ).EQ.
410  $ ( kbot-2 )
411 *
412 * ==== Generate reflections to chase the chain right
413 * . one column. (The minimum value of K is KTOP-1.) ====
414 *
415  DO 20 m = mtop, mbot
416  k = krcol + 3*( m-1 )
417  IF( k.EQ.ktop-1 ) THEN
418  CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
419  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
420  $ v( 1, m ) )
421  alpha = v( 1, m )
422  CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
423  ELSE
424  beta = h( k+1, k )
425  v( 2, m ) = h( k+2, k )
426  v( 3, m ) = h( k+3, k )
427  CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
428 *
429 * ==== A Bulge may collapse because of vigilant
430 * . deflation or destructive underflow. In the
431 * . underflow case, try the two-small-subdiagonals
432 * . trick to try to reinflate the bulge. ====
433 *
434  IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
435  $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
436 *
437 * ==== Typical case: not collapsed (yet). ====
438 *
439  h( k+1, k ) = beta
440  h( k+2, k ) = zero
441  h( k+3, k ) = zero
442  ELSE
443 *
444 * ==== Atypical case: collapsed. Attempt to
445 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
446 * . If the fill resulting from the new
447 * . reflector is too large, then abandon it.
448 * . Otherwise, use the new one. ====
449 *
450  CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
451  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
452  $ vt )
453  alpha = vt( 1 )
454  CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
455  refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
456  $ h( k+2, k ) )
457 *
458  IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
459  $ abs( refsum*vt( 3 ) ).GT.ulp*
460  $ ( abs( h( k, k ) )+abs( h( k+1,
461  $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
462 *
463 * ==== Starting a new bulge here would
464 * . create non-negligible fill. Use
465 * . the old one with trepidation. ====
466 *
467  h( k+1, k ) = beta
468  h( k+2, k ) = zero
469  h( k+3, k ) = zero
470  ELSE
471 *
472 * ==== Stating a new bulge here would
473 * . create only negligible fill.
474 * . Replace the old reflector with
475 * . the new one. ====
476 *
477  h( k+1, k ) = h( k+1, k ) - refsum
478  h( k+2, k ) = zero
479  h( k+3, k ) = zero
480  v( 1, m ) = vt( 1 )
481  v( 2, m ) = vt( 2 )
482  v( 3, m ) = vt( 3 )
483  END IF
484  END IF
485  END IF
486  20 CONTINUE
487 *
488 * ==== Generate a 2-by-2 reflection, if needed. ====
489 *
490  k = krcol + 3*( m22-1 )
491  IF( bmp22 ) THEN
492  IF( k.EQ.ktop-1 ) THEN
493  CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
494  $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
495  $ v( 1, m22 ) )
496  beta = v( 1, m22 )
497  CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
498  ELSE
499  beta = h( k+1, k )
500  v( 2, m22 ) = h( k+2, k )
501  CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
502  h( k+1, k ) = beta
503  h( k+2, k ) = zero
504  END IF
505  END IF
506 *
507 * ==== Multiply H by reflections from the left ====
508 *
509  IF( accum ) THEN
510  jbot = min( ndcol, kbot )
511  ELSE IF( wantt ) THEN
512  jbot = n
513  ELSE
514  jbot = kbot
515  END IF
516  DO 40 j = max( ktop, krcol ), jbot
517  mend = min( mbot, ( j-krcol+2 ) / 3 )
518  DO 30 m = mtop, mend
519  k = krcol + 3*( m-1 )
520  refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*
521  $ h( k+2, j )+v( 3, m )*h( k+3, j ) )
522  h( k+1, j ) = h( k+1, j ) - refsum
523  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
524  h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
525  30 CONTINUE
526  40 CONTINUE
527  IF( bmp22 ) THEN
528  k = krcol + 3*( m22-1 )
529  DO 50 j = max( k+1, ktop ), jbot
530  refsum = v( 1, m22 )*( h( k+1, j )+v( 2, m22 )*
531  $ h( k+2, j ) )
532  h( k+1, j ) = h( k+1, j ) - refsum
533  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
534  50 CONTINUE
535  END IF
536 *
537 * ==== Multiply H by reflections from the right.
538 * . Delay filling in the last row until the
539 * . vigilant deflation check is complete. ====
540 *
541  IF( accum ) THEN
542  jtop = max( ktop, incol )
543  ELSE IF( wantt ) THEN
544  jtop = 1
545  ELSE
546  jtop = ktop
547  END IF
548  DO 90 m = mtop, mbot
549  IF( v( 1, m ).NE.zero ) THEN
550  k = krcol + 3*( m-1 )
551  DO 60 j = jtop, min( kbot, k+3 )
552  refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
553  $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
554  h( j, k+1 ) = h( j, k+1 ) - refsum
555  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m )
556  h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m )
557  60 CONTINUE
558 *
559  IF( accum ) THEN
560 *
561 * ==== Accumulate U. (If necessary, update Z later
562 * . with with an efficient matrix-matrix
563 * . multiply.) ====
564 *
565  kms = k - incol
566  DO 70 j = max( 1, ktop-incol ), kdu
567  refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
568  $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
569  u( j, kms+1 ) = u( j, kms+1 ) - refsum
570  u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m )
571  u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m )
572  70 CONTINUE
573  ELSE IF( wantz ) THEN
574 *
575 * ==== U is not accumulated, so update Z
576 * . now by multiplying by reflections
577 * . from the right. ====
578 *
579  DO 80 j = iloz, ihiz
580  refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
581  $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
582  z( j, k+1 ) = z( j, k+1 ) - refsum
583  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m )
584  z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m )
585  80 CONTINUE
586  END IF
587  END IF
588  90 CONTINUE
589 *
590 * ==== Special case: 2-by-2 reflection (if needed) ====
591 *
592  k = krcol + 3*( m22-1 )
593  IF( bmp22 ) THEN
594  IF ( v( 1, m22 ).NE.zero ) THEN
595  DO 100 j = jtop, min( kbot, k+3 )
596  refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
597  $ h( j, k+2 ) )
598  h( j, k+1 ) = h( j, k+1 ) - refsum
599  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m22 )
600  100 CONTINUE
601 *
602  IF( accum ) THEN
603  kms = k - incol
604  DO 110 j = max( 1, ktop-incol ), kdu
605  refsum = v( 1, m22 )*( u( j, kms+1 )+
606  $ v( 2, m22 )*u( j, kms+2 ) )
607  u( j, kms+1 ) = u( j, kms+1 ) - refsum
608  u( j, kms+2 ) = u( j, kms+2 ) - refsum*
609  $ v( 2, m22 )
610  110 CONTINUE
611  ELSE IF( wantz ) THEN
612  DO 120 j = iloz, ihiz
613  refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
614  $ z( j, k+2 ) )
615  z( j, k+1 ) = z( j, k+1 ) - refsum
616  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 )
617  120 CONTINUE
618  END IF
619  END IF
620  END IF
621 *
622 * ==== Vigilant deflation check ====
623 *
624  mstart = mtop
625  IF( krcol+3*( mstart-1 ).LT.ktop )
626  $ mstart = mstart + 1
627  mend = mbot
628  IF( bmp22 )
629  $ mend = mend + 1
630  IF( krcol.EQ.kbot-2 )
631  $ mend = mend + 1
632  DO 130 m = mstart, mend
633  k = min( kbot-1, krcol+3*( m-1 ) )
634 *
635 * ==== The following convergence test requires that
636 * . the tradition small-compared-to-nearby-diagonals
637 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
638 * . criteria both be satisfied. The latter improves
639 * . accuracy in some examples. Falling back on an
640 * . alternate convergence criterion when TST1 or TST2
641 * . is zero (as done here) is traditional but probably
642 * . unnecessary. ====
643 *
644  IF( h( k+1, k ).NE.zero ) THEN
645  tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
646  IF( tst1.EQ.zero ) THEN
647  IF( k.GE.ktop+1 )
648  $ tst1 = tst1 + abs( h( k, k-1 ) )
649  IF( k.GE.ktop+2 )
650  $ tst1 = tst1 + abs( h( k, k-2 ) )
651  IF( k.GE.ktop+3 )
652  $ tst1 = tst1 + abs( h( k, k-3 ) )
653  IF( k.LE.kbot-2 )
654  $ tst1 = tst1 + abs( h( k+2, k+1 ) )
655  IF( k.LE.kbot-3 )
656  $ tst1 = tst1 + abs( h( k+3, k+1 ) )
657  IF( k.LE.kbot-4 )
658  $ tst1 = tst1 + abs( h( k+4, k+1 ) )
659  END IF
660  IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
661  $ THEN
662  h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
663  h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
664  h11 = max( abs( h( k+1, k+1 ) ),
665  $ abs( h( k, k )-h( k+1, k+1 ) ) )
666  h22 = min( abs( h( k+1, k+1 ) ),
667  $ abs( h( k, k )-h( k+1, k+1 ) ) )
668  scl = h11 + h12
669  tst2 = h22*( h11 / scl )
670 *
671  IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
672  $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
673  END IF
674  END IF
675  130 CONTINUE
676 *
677 * ==== Fill in the last row of each bulge. ====
678 *
679  mend = min( nbmps, ( kbot-krcol-1 ) / 3 )
680  DO 140 m = mtop, mend
681  k = krcol + 3*( m-1 )
682  refsum = v( 1, m )*v( 3, m )*h( k+4, k+3 )
683  h( k+4, k+1 ) = -refsum
684  h( k+4, k+2 ) = -refsum*v( 2, m )
685  h( k+4, k+3 ) = h( k+4, k+3 ) - refsum*v( 3, m )
686  140 CONTINUE
687 *
688 * ==== End of near-the-diagonal bulge chase. ====
689 *
690  150 CONTINUE
691 *
692 * ==== Use U (if accumulated) to update far-from-diagonal
693 * . entries in H. If required, use U to update Z as
694 * . well. ====
695 *
696  IF( accum ) THEN
697  IF( wantt ) THEN
698  jtop = 1
699  jbot = n
700  ELSE
701  jtop = ktop
702  jbot = kbot
703  END IF
704  IF( ( .NOT.blk22 ) .OR. ( incol.LT.ktop ) .OR.
705  $ ( ndcol.GT.kbot ) .OR. ( ns.LE.2 ) ) THEN
706 *
707 * ==== Updates not exploiting the 2-by-2 block
708 * . structure of U. K1 and NU keep track of
709 * . the location and size of U in the special
710 * . cases of introducing bulges and chasing
711 * . bulges off the bottom. In these special
712 * . cases and in case the number of shifts
713 * . is NS = 2, there is no 2-by-2 block
714 * . structure to exploit. ====
715 *
716  k1 = max( 1, ktop-incol )
717  nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
718 *
719 * ==== Horizontal Multiply ====
720 *
721  DO 160 jcol = min( ndcol, kbot ) + 1, jbot, nh
722  jlen = min( nh, jbot-jcol+1 )
723  CALL sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
724  $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
725  $ ldwh )
726  CALL slacpy( 'ALL', nu, jlen, wh, ldwh,
727  $ h( incol+k1, jcol ), ldh )
728  160 CONTINUE
729 *
730 * ==== Vertical multiply ====
731 *
732  DO 170 jrow = jtop, max( ktop, incol ) - 1, nv
733  jlen = min( nv, max( ktop, incol )-jrow )
734  CALL sgemm( 'N', 'N', jlen, nu, nu, one,
735  $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
736  $ ldu, zero, wv, ldwv )
737  CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
738  $ h( jrow, incol+k1 ), ldh )
739  170 CONTINUE
740 *
741 * ==== Z multiply (also vertical) ====
742 *
743  IF( wantz ) THEN
744  DO 180 jrow = iloz, ihiz, nv
745  jlen = min( nv, ihiz-jrow+1 )
746  CALL sgemm( 'N', 'N', jlen, nu, nu, one,
747  $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
748  $ ldu, zero, wv, ldwv )
749  CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
750  $ z( jrow, incol+k1 ), ldz )
751  180 CONTINUE
752  END IF
753  ELSE
754 *
755 * ==== Updates exploiting U's 2-by-2 block structure.
756 * . (I2, I4, J2, J4 are the last rows and columns
757 * . of the blocks.) ====
758 *
759  i2 = ( kdu+1 ) / 2
760  i4 = kdu
761  j2 = i4 - i2
762  j4 = kdu
763 *
764 * ==== KZS and KNZ deal with the band of zeros
765 * . along the diagonal of one of the triangular
766 * . blocks. ====
767 *
768  kzs = ( j4-j2 ) - ( ns+1 )
769  knz = ns + 1
770 *
771 * ==== Horizontal multiply ====
772 *
773  DO 190 jcol = min( ndcol, kbot ) + 1, jbot, nh
774  jlen = min( nh, jbot-jcol+1 )
775 *
776 * ==== Copy bottom of H to top+KZS of scratch ====
777 * (The first KZS rows get multiplied by zero.) ====
778 *
779  CALL slacpy( 'ALL', knz, jlen, h( incol+1+j2, jcol ),
780  $ ldh, wh( kzs+1, 1 ), ldwh )
781 *
782 * ==== Multiply by U21**T ====
783 *
784  CALL slaset( 'ALL', kzs, jlen, zero, zero, wh, ldwh )
785  CALL strmm( 'L', 'U', 'C', 'N', knz, jlen, one,
786  $ u( j2+1, 1+kzs ), ldu, wh( kzs+1, 1 ),
787  $ ldwh )
788 *
789 * ==== Multiply top of H by U11**T ====
790 *
791  CALL sgemm( 'C', 'N', i2, jlen, j2, one, u, ldu,
792  $ h( incol+1, jcol ), ldh, one, wh, ldwh )
793 *
794 * ==== Copy top of H to bottom of WH ====
795 *
796  CALL slacpy( 'ALL', j2, jlen, h( incol+1, jcol ), ldh,
797  $ wh( i2+1, 1 ), ldwh )
798 *
799 * ==== Multiply by U21**T ====
800 *
801  CALL strmm( 'L', 'L', 'C', 'N', j2, jlen, one,
802  $ u( 1, i2+1 ), ldu, wh( i2+1, 1 ), ldwh )
803 *
804 * ==== Multiply by U22 ====
805 *
806  CALL sgemm( 'C', 'N', i4-i2, jlen, j4-j2, one,
807  $ u( j2+1, i2+1 ), ldu,
808  $ h( incol+1+j2, jcol ), ldh, one,
809  $ wh( i2+1, 1 ), ldwh )
810 *
811 * ==== Copy it back ====
812 *
813  CALL slacpy( 'ALL', kdu, jlen, wh, ldwh,
814  $ h( incol+1, jcol ), ldh )
815  190 CONTINUE
816 *
817 * ==== Vertical multiply ====
818 *
819  DO 200 jrow = jtop, max( incol, ktop ) - 1, nv
820  jlen = min( nv, max( incol, ktop )-jrow )
821 *
822 * ==== Copy right of H to scratch (the first KZS
823 * . columns get multiplied by zero) ====
824 *
825  CALL slacpy( 'ALL', jlen, knz, h( jrow, incol+1+j2 ),
826  $ ldh, wv( 1, 1+kzs ), ldwv )
827 *
828 * ==== Multiply by U21 ====
829 *
830  CALL slaset( 'ALL', jlen, kzs, zero, zero, wv, ldwv )
831  CALL strmm( 'R', 'U', 'N', 'N', jlen, knz, one,
832  $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
833  $ ldwv )
834 *
835 * ==== Multiply by U11 ====
836 *
837  CALL sgemm( 'N', 'N', jlen, i2, j2, one,
838  $ h( jrow, incol+1 ), ldh, u, ldu, one, wv,
839  $ ldwv )
840 *
841 * ==== Copy left of H to right of scratch ====
842 *
843  CALL slacpy( 'ALL', jlen, j2, h( jrow, incol+1 ), ldh,
844  $ wv( 1, 1+i2 ), ldwv )
845 *
846 * ==== Multiply by U21 ====
847 *
848  CALL strmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
849  $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ), ldwv )
850 *
851 * ==== Multiply by U22 ====
852 *
853  CALL sgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
854  $ h( jrow, incol+1+j2 ), ldh,
855  $ u( j2+1, i2+1 ), ldu, one, wv( 1, 1+i2 ),
856  $ ldwv )
857 *
858 * ==== Copy it back ====
859 *
860  CALL slacpy( 'ALL', jlen, kdu, wv, ldwv,
861  $ h( jrow, incol+1 ), ldh )
862  200 CONTINUE
863 *
864 * ==== Multiply Z (also vertical) ====
865 *
866  IF( wantz ) THEN
867  DO 210 jrow = iloz, ihiz, nv
868  jlen = min( nv, ihiz-jrow+1 )
869 *
870 * ==== Copy right of Z to left of scratch (first
871 * . KZS columns get multiplied by zero) ====
872 *
873  CALL slacpy( 'ALL', jlen, knz,
874  $ z( jrow, incol+1+j2 ), ldz,
875  $ wv( 1, 1+kzs ), ldwv )
876 *
877 * ==== Multiply by U12 ====
878 *
879  CALL slaset( 'ALL', jlen, kzs, zero, zero, wv,
880  $ ldwv )
881  CALL strmm( 'R', 'U', 'N', 'N', jlen, knz, one,
882  $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
883  $ ldwv )
884 *
885 * ==== Multiply by U11 ====
886 *
887  CALL sgemm( 'N', 'N', jlen, i2, j2, one,
888  $ z( jrow, incol+1 ), ldz, u, ldu, one,
889  $ wv, ldwv )
890 *
891 * ==== Copy left of Z to right of scratch ====
892 *
893  CALL slacpy( 'ALL', jlen, j2, z( jrow, incol+1 ),
894  $ ldz, wv( 1, 1+i2 ), ldwv )
895 *
896 * ==== Multiply by U21 ====
897 *
898  CALL strmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
899  $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ),
900  $ ldwv )
901 *
902 * ==== Multiply by U22 ====
903 *
904  CALL sgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
905  $ z( jrow, incol+1+j2 ), ldz,
906  $ u( j2+1, i2+1 ), ldu, one,
907  $ wv( 1, 1+i2 ), ldwv )
908 *
909 * ==== Copy the result back to Z ====
910 *
911  CALL slacpy( 'ALL', jlen, kdu, wv, ldwv,
912  $ z( jrow, incol+1 ), ldz )
913  210 CONTINUE
914  END IF
915  END IF
916  END IF
917  220 CONTINUE
918 *
919 * ==== End of SLAQR5 ====
920 *
subroutine slarfg(N, ALPHA, X, INCX, TAU)
SLARFG generates an elementary reflector (Householder matrix).
Definition: slarfg.f:108
subroutine slabad(SMALL, LARGE)
SLABAD
Definition: slabad.f:76
subroutine slaqr1(N, H, LDH, SR1, SI1, SR2, SI2, V)
SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition: slaqr1.f:123
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:189
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:105
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: slaset.f:112
subroutine strmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
STRMM
Definition: strmm.f:179
real function slamch(CMACH)
SLAMCH
Definition: slamch.f:69

Here is the call graph for this function:

Here is the caller graph for this function: