Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed chains of bulges in multishift QR algorithms. ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014). 
  263      IMPLICIT NONE
  264
  265
  266
  267
  268
  269
  270      INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
  271     $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
  272      LOGICAL            WANTT, WANTZ
  273
  274
  275      REAL               H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
  276     $                   V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
  277     $                   Z( LDZ, * )
  278
  279
  280
  281
  282      REAL               ZERO, ONE
  283      parameter( zero = 0.0e0, one = 1.0e0 )
  284
  285
  286      REAL               ALPHA, BETA, H11, H12, H21, H22, REFSUM,
  287     $                   SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, T1, T2,
  288     $                   T3, TST1, TST2, ULP
  289      INTEGER            I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
  290     $                   JROW, JTOP, K, K1, KDU, KMS, KRCOL,
  291     $                   M, M22, MBOT, MTOP, NBMPS, NDCOL,
  292     $                   NS, NU
  293      LOGICAL            ACCUM, BMP22
  294
  295
  296      REAL               SLAMCH
  298
  299
  300
  301      INTRINSIC          abs, max, min, mod, real
  302
  303
  304      REAL               VT( 3 )
  305
  306
  309
  310
  311
  312
  313
  314      IF( nshfts.LT.2 )
  315     $   RETURN
  316
  317
  318
  319
  320      IF( ktop.GE.kbot )
  321     $   RETURN
  322
  323
  324
  325
  326
  327
  328      DO 10 i = 1, nshfts - 2, 2
  329         IF( si( i ).NE.-si( i+1 ) ) THEN
  330
  331            swap = sr( i )
  332            sr( i ) = sr( i+1 )
  333            sr( i+1 ) = sr( i+2 )
  334            sr( i+2 ) = swap
  335
  336            swap = si( i )
  337            si( i ) = si( i+1 )
  338            si( i+1 ) = si( i+2 )
  339            si( i+2 ) = swap
  340         END IF
  341   10 CONTINUE
  342
  343
  344
  345
  346
  347
  348      ns = nshfts - mod( nshfts, 2 )
  349
  350
  351
  352      safmin = 
slamch( 
'SAFE MINIMUM' )
 
  353      safmax = one / safmin
  354      ulp = 
slamch( 
'PRECISION' )
 
  355      smlnum = safmin*( real( n ) / ulp )
  356
  357
  358
  359
  360      accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
  361
  362
  363
  364      IF( ktop+2.LE.kbot )
  365     $   h( ktop+2, ktop ) = zero
  366
  367
  368
  369      nbmps = ns / 2
  370
  371
  372
  373      kdu = 4*nbmps
  374
  375
  376
  377      DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
  378
  379
  380
  381         IF( accum ) THEN
  382            jtop = max( ktop, incol )
  383         ELSE IF( wantt ) THEN
  384            jtop = 1
  385         ELSE
  386            jtop = ktop
  387         END IF
  388
  389         ndcol = incol + kdu
  390         IF( accum )
  391     $      
CALL slaset( 
'ALL', kdu, kdu, zero, one, u, ldu )
 
  392
  393
  394
  395
  396
  397
  398
  399
  400
  401
  402
  403
  404
  405         DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
  406
  407
  408
  409
  410
  411
  412
  413
  414            mtop = max( 1, ( ktop-krcol ) / 2+1 )
  415            mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
  416            m22 = mbot + 1
  417            bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
  418     $              ( kbot-2 )
  419
  420
  421
  422
  423            IF ( bmp22 ) THEN
  424
  425
  426
  427
  428               k = krcol + 2*( m22-1 )
  429               IF( k.EQ.ktop-1 ) THEN
  430                  CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
 
  431     $                         si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
  432     $                         v( 1, m22 ) )
  433                  beta = v( 1, m22 )
  434                  CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
 
  435               ELSE
  436                  beta = h( k+1, k )
  437                  v( 2, m22 ) = h( k+2, k )
  438                  CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
 
  439                  h( k+1, k ) = beta
  440                  h( k+2, k ) = zero
  441               END IF
  442 
  443
  444
  445
  446
  447               t1 = v( 1, m22 )
  448               t2 = t1*v( 2, m22 )
  449               DO 30 j = jtop, min( kbot, k+3 )
  450                  refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
  451                  h( j, k+1 ) = h( j, k+1 ) - refsum*t1
  452                  h( j, k+2 ) = h( j, k+2 ) - refsum*t2
  453   30          CONTINUE
  454
  455
  456
  457
  458               IF( accum ) THEN
  459                  jbot = min( ndcol, kbot )
  460               ELSE IF( wantt ) THEN
  461                  jbot = n
  462               ELSE
  463                  jbot = kbot
  464               END IF
  465               t1 = v( 1, m22 )
  466               t2 = t1*v( 2, m22 )
  467               DO 40 j = k+1, jbot
  468                  refsum = h( k+1, j ) + v( 2, m22 )*h( k+2, j )
  469                  h( k+1, j ) = h( k+1, j ) - refsum*t1
  470                  h( k+2, j ) = h( k+2, j ) - refsum*t2
  471   40          CONTINUE
  472
  473
  474
  475
  476
  477
  478
  479
  480
  481
  482               IF( k.GE.ktop ) THEN
  483                  IF( h( k+1, k ).NE.zero ) THEN
  484                     tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
  485                     IF( tst1.EQ.zero ) THEN
  486                        IF( k.GE.ktop+1 )
  487     $                     tst1 = tst1 + abs( h( k, k-1 ) )
  488                        IF( k.GE.ktop+2 )
  489     $                     tst1 = tst1 + abs( h( k, k-2 ) )
  490                        IF( k.GE.ktop+3 )
  491     $                     tst1 = tst1 + abs( h( k, k-3 ) )
  492                        IF( k.LE.kbot-2 )
  493     $                     tst1 = tst1 + abs( h( k+2, k+1 ) )
  494                        IF( k.LE.kbot-3 )
  495     $                     tst1 = tst1 + abs( h( k+3, k+1 ) )
  496                        IF( k.LE.kbot-4 )
  497     $                     tst1 = tst1 + abs( h( k+4, k+1 ) )
  498                     END IF
  499                     IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
  500     $                    THEN
  501                        h12 = max( abs( h( k+1, k ) ),
  502     $                             abs( h( k, k+1 ) ) )
  503                        h21 = min( abs( h( k+1, k ) ),
  504     $                             abs( h( k, k+1 ) ) )
  505                        h11 = max( abs( h( k+1, k+1 ) ),
  506     $                        abs( h( k, k )-h( k+1, k+1 ) ) )
  507                        h22 = min( abs( h( k+1, k+1 ) ),
  508     $                        abs( h( k, k )-h( k+1, k+1 ) ) )
  509                        scl = h11 + h12
  510                        tst2 = h22*( h11 / scl )
  511
  512                        IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
  513     $                      max( smlnum, ulp*tst2 ) ) THEN
  514                           h( k+1, k ) = zero
  515                        END IF
  516                     END IF
  517                  END IF
  518               END IF
  519
  520
  521
  522               IF( accum ) THEN
  523                  kms = k - incol
  524                  t1 = v( 1, m22 )
  525                  t2 = t1*v( 2, m22 )
  526                  DO 50 j = max( 1, ktop-incol ), kdu
  527                     refsum = u( j, kms+1 ) + v( 2, m22 )*u( j, kms+2 )
  528                     u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
  529                     u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
  530  50                 CONTINUE
  531               ELSE IF( wantz ) THEN
  532                  t1 = v( 1, m22 )
  533                  t2 = t1*v( 2, m22 )
  534                  DO 60 j = iloz, ihiz
  535                     refsum = z( j, k+1 )+v( 2, m22 )*z( j, k+2 )
  536                     z( j, k+1 ) = z( j, k+1 ) - refsum*t1
  537                     z( j, k+2 ) = z( j, k+2 ) - refsum*t2
  538  60              CONTINUE
  539               END IF
  540            END IF
  541
  542
  543
  544            DO 80 m = mbot, mtop, -1
  545               k = krcol + 2*( m-1 )
  546               IF( k.EQ.ktop-1 ) THEN
  547                  CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
 
  548     $                         si( 2*m-1 ), sr( 2*m ), si( 2*m ),
  549     $                         v( 1, m ) )
  550                  alpha = v( 1, m )
  551                  CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
 
  552               ELSE
  553
  554
  555
  556
  557
  558                  t1 = v( 1, m )
  559                  t2 = t1*v( 2, m )
  560                  t3 = t1*v( 3, m )
  561                  refsum = v( 3, m )*h( k+3, k+2 )
  562                  h( k+3, k   ) = -refsum*t1
  563                  h( k+3, k+1 ) = -refsum*t2
  564                  h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*t3
  565
  566
  567
  568
  569                  beta      = h( k+1, k )
  570                  v( 2, m ) = h( k+2, k )
  571                  v( 3, m ) = h( k+3, k )
  572                  CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
 
  573
  574
  575
  576
  577
  578
  579                  IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
  580     $                zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
  581
  582
  583
  584                     h( k+1, k ) = beta
  585                     h( k+2, k ) = zero
  586                     h( k+3, k ) = zero
  587                  ELSE
  588
  589
  590
  591
  592
  593
  594
  595                     CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
 
  596     $                            si( 2*m-1 ), sr( 2*m ), si( 2*m ),
  597     $                            vt )
  598                     alpha = vt( 1 )
  599                     CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
 
  600                     t1 = vt( 1 )
  601                     t2 = t1*vt( 2 )
  602                     t3 = t2*vt( 3 )
  603                     refsum = h( k+1, k )+vt( 2 )*h( k+2, k )
  604
  605                     IF( abs( h( k+2, k )-refsum*t2 )+
  606     $                   abs( refsum*t3 ).GT.ulp*
  607     $                   ( abs( h( k, k ) )+abs( h( k+1,
  608     $                   k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
  609
  610
  611
  612
  613
  614                        h( k+1, k ) = beta
  615                        h( k+2, k ) = zero
  616                        h( k+3, k ) = zero
  617                     ELSE
  618
  619
  620
  621
  622
  623
  624                        h( k+1, k ) = h( k+1, k ) - refsum*t1
  625                        h( k+2, k ) = zero
  626                        h( k+3, k ) = zero
  627                        v( 1, m ) = vt( 1 )
  628                        v( 2, m ) = vt( 2 )
  629                        v( 3, m ) = vt( 3 )
  630                     END IF
  631                  END IF
  632               END IF
  633
  634
  635
  636
  637
  638
  639
  640               t1 = v( 1, m )
  641               t2 = t1*v( 2, m )
  642               t3 = t1*v( 3, m )
  643               DO 70 j = jtop, min( kbot, k+3 )
  644                  refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
  645     $                     + v( 3, m )*h( j, k+3 )
  646                  h( j, k+1 ) = h( j, k+1 ) - refsum*t1
  647                  h( j, k+2 ) = h( j, k+2 ) - refsum*t2
  648                  h( j, k+3 ) = h( j, k+3 ) - refsum*t3
  649   70          CONTINUE
  650
  651
  652
  653
  654               refsum = h( k+1, k+1 ) + v( 2, m )*h( k+2, k+1 )
  655     $                  + v( 3, m )*h( k+3, k+1 )
  656               h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
  657               h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
  658               h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
  659
  660
  661
  662
  663
  664
  665
  666
  667
  668
  669               IF( k.LT.ktop)
  670     $              cycle
  671               IF( h( k+1, k ).NE.zero ) THEN
  672                  tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
  673                  IF( tst1.EQ.zero ) THEN
  674                     IF( k.GE.ktop+1 )
  675     $                  tst1 = tst1 + abs( h( k, k-1 ) )
  676                     IF( k.GE.ktop+2 )
  677     $                  tst1 = tst1 + abs( h( k, k-2 ) )
  678                     IF( k.GE.ktop+3 )
  679     $                  tst1 = tst1 + abs( h( k, k-3 ) )
  680                     IF( k.LE.kbot-2 )
  681     $                  tst1 = tst1 + abs( h( k+2, k+1 ) )
  682                     IF( k.LE.kbot-3 )
  683     $                  tst1 = tst1 + abs( h( k+3, k+1 ) )
  684                     IF( k.LE.kbot-4 )
  685     $                  tst1 = tst1 + abs( h( k+4, k+1 ) )
  686                  END IF
  687                  IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
  688     $                 THEN
  689                     h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
  690                     h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
  691                     h11 = max( abs( h( k+1, k+1 ) ),
  692     $                     abs( h( k, k )-h( k+1, k+1 ) ) )
  693                     h22 = min( abs( h( k+1, k+1 ) ),
  694     $                     abs( h( k, k )-h( k+1, k+1 ) ) )
  695                     scl = h11 + h12
  696                     tst2 = h22*( h11 / scl )
  697
  698                     IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
  699     $                   max( smlnum, ulp*tst2 ) ) THEN
  700                        h( k+1, k ) = zero
  701                     END IF
  702                  END IF
  703               END IF
  704   80       CONTINUE
  705
  706
  707
  708            IF( accum ) THEN
  709               jbot = min( ndcol, kbot )
  710            ELSE IF( wantt ) THEN
  711               jbot = n
  712            ELSE
  713               jbot = kbot
  714            END IF
  715
  716            DO 100 m = mbot, mtop, -1
  717               k = krcol + 2*( m-1 )
  718               t1 = v( 1, m )
  719               t2 = t1*v( 2, m )
  720               t3 = t1*v( 3, m )
  721               DO 90 j = max( ktop, krcol + 2*m ), jbot
  722                  refsum = h( k+1, j ) + v( 2, m )*h( k+2, j )
  723     $                     + v( 3, m )*h( k+3, j )
  724                  h( k+1, j ) = h( k+1, j ) - refsum*t1
  725                  h( k+2, j ) = h( k+2, j ) - refsum*t2
  726                  h( k+3, j ) = h( k+3, j ) - refsum*t3
  727   90          CONTINUE
  728  100       CONTINUE
  729
  730
  731
  732            IF( accum ) THEN
  733
  734
  735
  736
  737
  738               DO 120 m = mbot, mtop, -1
  739                  k = krcol + 2*( m-1 )
  740                  kms = k - incol
  741                  i2 = max( 1, ktop-incol )
  742                  i2 = max( i2, kms-(krcol-incol)+1 )
  743                  i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
  744                  t1 = v( 1, m )
  745                  t2 = t1*v( 2, m )
  746                  t3 = t1*v( 3, m )
  747                  DO 110 j = i2, i4
  748                     refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
  749     $                        + v( 3, m )*u( j, kms+3 )
  750                     u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
  751                     u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
  752                     u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
  753  110             CONTINUE
  754  120          CONTINUE
  755            ELSE IF( wantz ) THEN
  756
  757
  758
  759
  760
  761               DO 140 m = mbot, mtop, -1
  762                  k = krcol + 2*( m-1 )
  763                  t1 = v( 1, m )
  764                  t2 = t1*v( 2, m )
  765                  t3 = t1*v( 3, m )
  766                  DO 130 j = iloz, ihiz
  767                     refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
  768     $                        + v( 3, m )*z( j, k+3 )
  769                     z( j, k+1 ) = z( j, k+1 ) - refsum*t1
  770                     z( j, k+2 ) = z( j, k+2 ) - refsum*t2
  771                     z( j, k+3 ) = z( j, k+3 ) - refsum*t3
  772  130             CONTINUE
  773  140          CONTINUE
  774            END IF
  775
  776
  777
  778  145    CONTINUE
  779
  780
  781
  782
  783
  784         IF( accum ) THEN
  785            IF( wantt ) THEN
  786               jtop = 1
  787               jbot = n
  788            ELSE
  789               jtop = ktop
  790               jbot = kbot
  791            END IF
  792            k1 = max( 1, ktop-incol )
  793            nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
  794
  795
  796
  797            DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
  798               jlen = min( nh, jbot-jcol+1 )
  799               CALL sgemm( 
'C', 
'N', nu, jlen, nu, one, u( k1, k1 ),
 
  800     $                     ldu, h( incol+k1, jcol ), ldh, zero, wh,
  801     $                     ldwh )
  802               CALL slacpy( 
'ALL', nu, jlen, wh, ldwh,
 
  803     $                      h( incol+k1, jcol ), ldh )
  804  150       CONTINUE
  805
  806
  807
  808            DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
  809               jlen = min( nv, max( ktop, incol )-jrow )
  810               CALL sgemm( 
'N', 
'N', jlen, nu, nu, one,
 
  811     $                     h( jrow, incol+k1 ), ldh, u( k1, k1 ),
  812     $                     ldu, zero, wv, ldwv )
  813               CALL slacpy( 
'ALL', jlen, nu, wv, ldwv,
 
  814     $                      h( jrow, incol+k1 ), ldh )
  815  160       CONTINUE
  816
  817
  818
  819            IF( wantz ) THEN
  820               DO 170 jrow = iloz, ihiz, nv
  821                  jlen = min( nv, ihiz-jrow+1 )
  822                  CALL sgemm( 
'N', 
'N', jlen, nu, nu, one,
 
  823     $                        z( jrow, incol+k1 ), ldz, u( k1, k1 ),
  824     $                        ldu, zero, wv, ldwv )
  825                  CALL slacpy( 
'ALL', jlen, nu, wv, ldwv,
 
  826     $                         z( jrow, incol+k1 ), ldz )
  827  170          CONTINUE
  828            END IF
  829         END IF
  830  180 CONTINUE
  831
  832
  833
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
 
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
 
real function slamch(cmach)
SLAMCH
 
subroutine slaqr1(n, h, ldh, sr1, si1, sr2, si2, v)
SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
 
subroutine slarfg(n, alpha, x, incx, tau)
SLARFG generates an elementary reflector (Householder matrix).
 
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
 
subroutine strmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
STRMM