Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed chains of bulges in multishift QR algorithms. ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014). 
  256      IMPLICIT NONE
  257
  258
  259
  260
  261
  262
  263      INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
  264     $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
  265      LOGICAL            WANTT, WANTZ
  266
  267
  268      COMPLEX            H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
  269     $                   WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
  270
  271
  272
  273
  274      COMPLEX            ZERO, ONE
  275      parameter( zero = ( 0.0e0, 0.0e0 ),
  276     $                   one = ( 1.0e0, 0.0e0 ) )
  277      REAL               RZERO, RONE
  278      parameter( rzero = 0.0e0, rone = 1.0e0 )
  279
  280
  281      COMPLEX            ALPHA, BETA, CDUM, REFSUM, T1, T2, T3
  282      REAL               H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
  283     $                   SMLNUM, TST1, TST2, ULP
  284      INTEGER            I2, I4, INCOL, J, JBOT, JCOL, JLEN,
  285     $                   JROW, JTOP, K, K1, KDU, KMS, KRCOL,
  286     $                   M, M22, MBOT, MTOP, NBMPS, NDCOL,
  287     $                   NS, NU
  288      LOGICAL            ACCUM, BMP22
  289
  290
  291      REAL               SLAMCH
  293
  294
  295
  296      INTRINSIC          abs, aimag, conjg, max, min, mod, real
  297
  298
  299      COMPLEX            VT( 3 )
  300
  301
  304
  305
  306      REAL               CABS1
  307
  308
  309      cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) )
  310
  311
  312
  313
  314
  315      IF( nshfts.LT.2 )
  316     $   RETURN
  317
  318
  319
  320
  321      IF( ktop.GE.kbot )
  322     $   RETURN
  323
  324
  325
  326
  327      ns = nshfts - mod( nshfts, 2 )
  328
  329
  330
  331      safmin = 
slamch( 
'SAFE MINIMUM' )
 
  332      safmax = rone / safmin
  333      ulp = 
slamch( 
'PRECISION' )
 
  334      smlnum = safmin*( real( n ) / ulp )
  335
  336
  337
  338
  339      accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
  340
  341
  342
  343      IF( ktop+2.LE.kbot )
  344     $   h( ktop+2, ktop ) = zero
  345
  346
  347
  348      nbmps = ns / 2
  349
  350
  351
  352      kdu = 4*nbmps
  353
  354
  355
  356      DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
  357
  358
  359
  360         IF( accum ) THEN
  361            jtop = max( ktop, incol )
  362         ELSE IF( wantt ) THEN
  363            jtop = 1
  364         ELSE
  365            jtop = ktop
  366         END IF
  367
  368         ndcol = incol + kdu
  369         IF( accum )
  370     $      
CALL claset( 
'ALL', kdu, kdu, zero, one, u, ldu )
 
  371
  372
  373
  374
  375
  376
  377
  378
  379
  380
  381
  382
  383
  384         DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
  385
  386
  387
  388
  389
  390
  391
  392
  393            mtop = max( 1, ( ktop-krcol ) / 2+1 )
  394            mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
  395            m22 = mbot + 1
  396            bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
  397     $              ( kbot-2 )
  398
  399
  400
  401
  402            IF ( bmp22 ) THEN
  403
  404
  405
  406
  407               k = krcol + 2*( m22-1 )
  408               IF( k.EQ.ktop-1 ) THEN
  409                  CALL claqr1( 2, h( k+1, k+1 ), ldh, s( 2*m22-1 ),
 
  410     $                         s( 2*m22 ), v( 1, m22 ) )
  411                  beta = v( 1, m22 )
  412                  CALL clarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
 
  413               ELSE
  414                  beta = h( k+1, k )
  415                  v( 2, m22 ) = h( k+2, k )
  416                  CALL clarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
 
  417                  h( k+1, k ) = beta
  418                  h( k+2, k ) = zero
  419               END IF
  420 
  421
  422
  423
  424
  425               t1 = v( 1, m22 )
  426               t2 = t1*conjg( v( 2, m22 ) )
  427               DO 30 j = jtop, min( kbot, k+3 )
  428                  refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
  429                  h( j, k+1 ) = h( j, k+1 ) - refsum*t1
  430                  h( j, k+2 ) = h( j, k+2 ) - refsum*t2
  431   30          CONTINUE
  432
  433
  434
  435
  436               IF( accum ) THEN
  437                  jbot = min( ndcol, kbot )
  438               ELSE IF( wantt ) THEN
  439                  jbot = n
  440               ELSE
  441                  jbot = kbot
  442               END IF
  443               t1 = conjg( v( 1, m22 ) )
  444               t2 = t1*v( 2, m22 )
  445               DO 40 j = k+1, jbot
  446                  refsum = h( k+1, j ) +
  447     $                     conjg( v( 2, m22 ) )*h( k+2, j )
  448                  h( k+1, j ) = h( k+1, j ) - refsum*t1
  449                  h( k+2, j ) = h( k+2, j ) - refsum*t2
  450   40          CONTINUE
  451
  452
  453
  454
  455
  456
  457
  458
  459
  460
  461               IF( k.GE.ktop) THEN
  462                  IF( h( k+1, k ).NE.zero ) THEN
  463                     tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
  464                     IF( tst1.EQ.rzero ) THEN
  465                        IF( k.GE.ktop+1 )
  466     $                     tst1 = tst1 + cabs1( h( k, k-1 ) )
  467                        IF( k.GE.ktop+2 )
  468     $                     tst1 = tst1 + cabs1( h( k, k-2 ) )
  469                        IF( k.GE.ktop+3 )
  470     $                     tst1 = tst1 + cabs1( h( k, k-3 ) )
  471                        IF( k.LE.kbot-2 )
  472     $                     tst1 = tst1 + cabs1( h( k+2, k+1 ) )
  473                        IF( k.LE.kbot-3 )
  474     $                     tst1 = tst1 + cabs1( h( k+3, k+1 ) )
  475                        IF( k.LE.kbot-4 )
  476     $                     tst1 = tst1 + cabs1( h( k+4, k+1 ) )
  477                     END IF
  478                     IF( cabs1( h( k+1, k ) )
  479     $                   .LE.max( smlnum, ulp*tst1 ) ) THEN
  480                        h12 = max( cabs1( h( k+1, k ) ),
  481     $                        cabs1( h( k, k+1 ) ) )
  482                        h21 = min( cabs1( h( k+1, k ) ),
  483     $                        cabs1( h( k, k+1 ) ) )
  484                        h11 = max( cabs1( h( k+1, k+1 ) ),
  485     $                        cabs1( h( k, k )-h( k+1, k+1 ) ) )
  486                        h22 = min( cabs1( h( k+1, k+1 ) ),
  487     $                        cabs1( h( k, k )-h( k+1, k+1 ) ) )
  488                        scl = h11 + h12
  489                        tst2 = h22*( h11 / scl )
  490
  491                        IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
  492     $                      max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
  493                     END IF
  494                  END IF
  495               END IF
  496
  497
  498
  499               IF( accum ) THEN
  500                  kms = k - incol
  501                  DO 50 j = max( 1, ktop-incol ), kdu
  502                     refsum = v( 1, m22 )*( u( j, kms+1 )+
  503     $                        v( 2, m22 )*u( j, kms+2 ) )
  504                     u( j, kms+1 ) = u( j, kms+1 ) - refsum
  505                     u( j, kms+2 ) = u( j, kms+2 ) -
  506     $                               refsum*conjg( v( 2, m22 ) )
  507  50                 CONTINUE
  508               ELSE IF( wantz ) THEN
  509                  DO 60 j = iloz, ihiz
  510                     refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
  511     $                        z( j, k+2 ) )
  512                     z( j, k+1 ) = z( j, k+1 ) - refsum
  513                     z( j, k+2 ) = z( j, k+2 ) -
  514     $                             refsum*conjg( v( 2, m22 ) )
  515  60              CONTINUE
  516               END IF
  517            END IF
  518
  519
  520
  521            DO 80 m = mbot, mtop, -1
  522               k = krcol + 2*( m-1 )
  523               IF( k.EQ.ktop-1 ) THEN
  524                  CALL claqr1( 3, h( ktop, ktop ), ldh, s( 2*m-1 ),
 
  525     $                         s( 2*m ), v( 1, m ) )
  526                  alpha = v( 1, m )
  527                  CALL clarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
 
  528               ELSE
  529
  530
  531
  532
  533
  534                  t1 = v( 1, m )
  535                  t2 = t1*conjg( v( 2, m ) )
  536                  t3 = t1*conjg( v( 3, m ) )
  537                  refsum = v( 3, m )*h( k+3, k+2 )
  538                  h( k+3, k   ) = -refsum*t1
  539                  h( k+3, k+1 ) = -refsum*t2
  540                  h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*t3
  541
  542
  543
  544
  545                  beta      = h( k+1, k )
  546                  v( 2, m ) = h( k+2, k )
  547                  v( 3, m ) = h( k+3, k )
  548                  CALL clarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
 
  549
  550
  551
  552
  553
  554
  555                  IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
  556     $                zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
  557
  558
  559
  560                     h( k+1, k ) = beta
  561                     h( k+2, k ) = zero
  562                     h( k+3, k ) = zero
  563                  ELSE
  564
  565
  566
  567
  568
  569
  570
  571                     CALL claqr1( 3, h( k+1, k+1 ), ldh, s( 2*m-1 ),
 
  572     $                            s( 2*m ), vt )
  573                     alpha = vt( 1 )
  574                     CALL clarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
 
  575                     t1 = conjg( vt( 1 ) )
  576                     t2 = t1*vt( 2 )
  577                     t3 = t1*vt( 3 )
  578                     refsum = h( k+1, k )+conjg( vt( 2 ) )*h( k+2, k )
  579
  580                     IF( cabs1( h( k+2, k )-refsum*t2 )+
  581     $                   cabs1( refsum*t3 ).GT.ulp*
  582     $                   ( cabs1( h( k, k ) )+cabs1( h( k+1,
  583     $                   k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) THEN
  584
  585
  586
  587
  588
  589                        h( k+1, k ) = beta
  590                        h( k+2, k ) = zero
  591                        h( k+3, k ) = zero
  592                     ELSE
  593
  594
  595
  596
  597
  598
  599                        h( k+1, k ) = h( k+1, k ) - refsum*t1
  600                        h( k+2, k ) = zero
  601                        h( k+3, k ) = zero
  602                        v( 1, m ) = vt( 1 )
  603                        v( 2, m ) = vt( 2 )
  604                        v( 3, m ) = vt( 3 )
  605                     END IF
  606                  END IF
  607               END IF
  608
  609
  610
  611
  612
  613
  614
  615               t1 = v( 1, m )
  616               t2 = t1*conjg( v( 2, m ) )
  617               t3 = t1*conjg( v( 3, m ) )
  618               DO 70 j = jtop, min( kbot, k+3 )
  619                  refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
  620     $                     + v( 3, m )*h( j, k+3 )
  621                  h( j, k+1 ) = h( j, k+1 ) - refsum*t1
  622                  h( j, k+2 ) = h( j, k+2 ) - refsum*t2
  623                  h( j, k+3 ) = h( j, k+3 ) - refsum*t3
  624   70          CONTINUE
  625
  626
  627
  628
  629               t1 = conjg( v( 1, m ) )
  630               t2 = t1*v( 2, m )
  631               t3 = t1*v( 3, m )
  632               refsum = h( k+1, k+1 ) + conjg( v( 2, m ) )*h( k+2, k+1 )
  633     $                  + conjg( v( 3, m ) )*h( k+3, k+1 )
  634               h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
  635               h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
  636               h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
  637
  638
  639
  640
  641
  642
  643
  644
  645
  646
  647               IF( k.LT.ktop)
  648     $              cycle
  649               IF( h( k+1, k ).NE.zero ) THEN
  650                  tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
  651                  IF( tst1.EQ.rzero ) THEN
  652                     IF( k.GE.ktop+1 )
  653     $                  tst1 = tst1 + cabs1( h( k, k-1 ) )
  654                     IF( k.GE.ktop+2 )
  655     $                  tst1 = tst1 + cabs1( h( k, k-2 ) )
  656                     IF( k.GE.ktop+3 )
  657     $                  tst1 = tst1 + cabs1( h( k, k-3 ) )
  658                     IF( k.LE.kbot-2 )
  659     $                  tst1 = tst1 + cabs1( h( k+2, k+1 ) )
  660                     IF( k.LE.kbot-3 )
  661     $                  tst1 = tst1 + cabs1( h( k+3, k+1 ) )
  662                     IF( k.LE.kbot-4 )
  663     $                  tst1 = tst1 + cabs1( h( k+4, k+1 ) )
  664                  END IF
  665                  IF( cabs1( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
  666     $                 THEN
  667                     h12 = max( cabs1( h( k+1, k ) ),
  668     $                     cabs1( h( k, k+1 ) ) )
  669                     h21 = min( cabs1( h( k+1, k ) ),
  670     $                     cabs1( h( k, k+1 ) ) )
  671                     h11 = max( cabs1( h( k+1, k+1 ) ),
  672     $                     cabs1( h( k, k )-h( k+1, k+1 ) ) )
  673                     h22 = min( cabs1( h( k+1, k+1 ) ),
  674     $                     cabs1( h( k, k )-h( k+1, k+1 ) ) )
  675                     scl = h11 + h12
  676                     tst2 = h22*( h11 / scl )
  677
  678                     IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
  679     $                   max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
  680                  END IF
  681               END IF
  682   80       CONTINUE
  683
  684
  685
  686            IF( accum ) THEN
  687               jbot = min( ndcol, kbot )
  688            ELSE IF( wantt ) THEN
  689               jbot = n
  690            ELSE
  691               jbot = kbot
  692            END IF
  693
  694            DO 100 m = mbot, mtop, -1
  695               k = krcol + 2*( m-1 )
  696               t1 = conjg( v( 1, m ) )
  697               t2 = t1*v( 2, m )
  698               t3 = t1*v( 3, m )
  699               DO 90 j = max( ktop, krcol + 2*m ), jbot
  700                  refsum = h( k+1, j ) + conjg( v( 2, m ) )*
  701     $                     h( k+2, j ) + conjg( v( 3, m ) )*h( k+3, j )
  702                  h( k+1, j ) = h( k+1, j ) - refsum*t1
  703                  h( k+2, j ) = h( k+2, j ) - refsum*t2
  704                  h( k+3, j ) = h( k+3, j ) - refsum*t3
  705   90          CONTINUE
  706  100       CONTINUE
  707
  708
  709
  710            IF( accum ) THEN
  711
  712
  713
  714
  715
  716               DO 120 m = mbot, mtop, -1
  717                  k = krcol + 2*( m-1 )
  718                  kms = k - incol
  719                  i2 = max( 1, ktop-incol )
  720                  i2 = max( i2, kms-(krcol-incol)+1 )
  721                  i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
  722                  t1 = v( 1, m )
  723                  t2 = t1*conjg( v( 2, m ) )
  724                  t3 = t1*conjg( v( 3, m ) )
  725                  DO 110 j = i2, i4
  726                     refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
  727     $                        + v( 3, m )*u( j, kms+3 )
  728                     u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
  729                     u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
  730                     u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
  731  110             CONTINUE
  732  120          CONTINUE
  733            ELSE IF( wantz ) THEN
  734
  735
  736
  737
  738
  739               DO 140 m = mbot, mtop, -1
  740                  k = krcol + 2*( m-1 )
  741                  t1 = v( 1, m )
  742                  t2 = t1*conjg( v( 2, m ) )
  743                  t3 = t1*conjg( v( 3, m ) )
  744                  DO 130 j = iloz, ihiz
  745                     refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
  746     $                        + v( 3, m )*z( j, k+3 )
  747                     z( j, k+1 ) = z( j, k+1 ) - refsum*t1
  748                     z( j, k+2 ) = z( j, k+2 ) - refsum*t2
  749                     z( j, k+3 ) = z( j, k+3 ) - refsum*t3
  750  130             CONTINUE
  751  140          CONTINUE
  752            END IF
  753
  754
  755
  756  145    CONTINUE
  757
  758
  759
  760
  761
  762         IF( accum ) THEN
  763            IF( wantt ) THEN
  764               jtop = 1
  765               jbot = n
  766            ELSE
  767               jtop = ktop
  768               jbot = kbot
  769            END IF
  770            k1 = max( 1, ktop-incol )
  771            nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
  772
  773
  774
  775            DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
  776               jlen = min( nh, jbot-jcol+1 )
  777               CALL cgemm( 
'C', 
'N', nu, jlen, nu, one, u( k1, k1 ),
 
  778     $                     ldu, h( incol+k1, jcol ), ldh, zero, wh,
  779     $                     ldwh )
  780               CALL clacpy( 
'ALL', nu, jlen, wh, ldwh,
 
  781     $                      h( incol+k1, jcol ), ldh )
  782  150       CONTINUE
  783
  784
  785
  786            DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
  787               jlen = min( nv, max( ktop, incol )-jrow )
  788               CALL cgemm( 
'N', 
'N', jlen, nu, nu, one,
 
  789     $                     h( jrow, incol+k1 ), ldh, u( k1, k1 ),
  790     $                     ldu, zero, wv, ldwv )
  791               CALL clacpy( 
'ALL', jlen, nu, wv, ldwv,
 
  792     $                      h( jrow, incol+k1 ), ldh )
  793  160       CONTINUE
  794
  795
  796
  797            IF( wantz ) THEN
  798               DO 170 jrow = iloz, ihiz, nv
  799                  jlen = min( nv, ihiz-jrow+1 )
  800                  CALL cgemm( 
'N', 
'N', jlen, nu, nu, one,
 
  801     $                        z( jrow, incol+k1 ), ldz, u( k1, k1 ),
  802     $                        ldu, zero, wv, ldwv )
  803                  CALL clacpy( 
'ALL', jlen, nu, wv, ldwv,
 
  804     $                         z( jrow, incol+k1 ), ldz )
  805  170          CONTINUE
  806            END IF
  807         END IF
  808  180 CONTINUE
  809
  810
  811
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
 
subroutine clacpy(uplo, m, n, a, lda, b, ldb)
CLACPY copies all or part of one two-dimensional array to another.
 
real function slamch(cmach)
SLAMCH
 
subroutine claqr1(n, h, ldh, s1, s2, v)
CLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
 
subroutine clarfg(n, alpha, x, incx, tau)
CLARFG generates an elementary reflector (Householder matrix).
 
subroutine claset(uplo, m, n, alpha, beta, a, lda)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
 
subroutine ctrmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
CTRMM