Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed chains of bulges in multishift QR algorithms. ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
257 IMPLICIT NONE
258
259
260
261
262
263
264 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
265 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
266 LOGICAL WANTT, WANTZ
267
268
269 COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
270 $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
271
272
273
274
275 COMPLEX*16 ZERO, ONE
276 parameter( zero = ( 0.0d0, 0.0d0 ),
277 $ one = ( 1.0d0, 0.0d0 ) )
278 DOUBLE PRECISION RZERO, RONE
279 parameter( rzero = 0.0d0, rone = 1.0d0 )
280
281
282 COMPLEX*16 ALPHA, BETA, CDUM, REFSUM, T1, T2, T3
283 DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
284 $ SMLNUM, TST1, TST2, ULP
285 INTEGER I2, I4, INCOL, J, JBOT, JCOL, JLEN,
286 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
287 $ M, M22, MBOT, MTOP, NBMPS, NDCOL,
288 $ NS, NU
289 LOGICAL ACCUM, BMP22
290
291
292 DOUBLE PRECISION DLAMCH
294
295
296
297 INTRINSIC abs, dble, dconjg, dimag, max, min, mod
298
299
300 COMPLEX*16 VT( 3 )
301
302
305
306
307 DOUBLE PRECISION CABS1
308
309
310 cabs1( cdum ) = abs( dble( cdum ) ) + abs( dimag( cdum ) )
311
312
313
314
315
316 IF( nshfts.LT.2 )
317 $ RETURN
318
319
320
321
322 IF( ktop.GE.kbot )
323 $ RETURN
324
325
326
327
328 ns = nshfts - mod( nshfts, 2 )
329
330
331
332 safmin =
dlamch(
'SAFE MINIMUM' )
333 safmax = rone / safmin
334 CALL dlabad( safmin, safmax )
335 ulp =
dlamch(
'PRECISION' )
336 smlnum = safmin*( dble( n ) / ulp )
337
338
339
340
341 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
342
343
344
345 IF( ktop+2.LE.kbot )
346 $ h( ktop+2, ktop ) = zero
347
348
349
350 nbmps = ns / 2
351
352
353
354 kdu = 4*nbmps
355
356
357
358 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
359
360
361
362 IF( accum ) THEN
363 jtop = max( ktop, incol )
364 ELSE IF( wantt ) THEN
365 jtop = 1
366 ELSE
367 jtop = ktop
368 END IF
369
370 ndcol = incol + kdu
371 IF( accum )
372 $
CALL zlaset(
'ALL', kdu, kdu, zero, one, u, ldu )
373
374
375
376
377
378
379
380
381
382
383
384
385
386 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
387
388
389
390
391
392
393
394
395 mtop = max( 1, ( ktop-krcol ) / 2+1 )
396 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
397 m22 = mbot + 1
398 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
399 $ ( kbot-2 )
400
401
402
403
404 IF ( bmp22 ) THEN
405
406
407
408
409 k = krcol + 2*( m22-1 )
410 IF( k.EQ.ktop-1 ) THEN
411 CALL zlaqr1( 2, h( k+1, k+1 ), ldh, s( 2*m22-1 ),
412 $ s( 2*m22 ), v( 1, m22 ) )
413 beta = v( 1, m22 )
414 CALL zlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
415 ELSE
416 beta = h( k+1, k )
417 v( 2, m22 ) = h( k+2, k )
418 CALL zlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
419 h( k+1, k ) = beta
420 h( k+2, k ) = zero
421 END IF
422
423
424
425
426
427 t1 = v( 1, m22 )
428 t2 = t1*dconjg( v( 2, m22 ) )
429 DO 30 j = jtop, min( kbot, k+3 )
430 refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
431 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
432 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
433 30 CONTINUE
434
435
436
437
438 IF( accum ) THEN
439 jbot = min( ndcol, kbot )
440 ELSE IF( wantt ) THEN
441 jbot = n
442 ELSE
443 jbot = kbot
444 END IF
445 t1 = dconjg( v( 1, m22 ) )
446 t2 = t1*v( 2, m22 )
447 DO 40 j = k+1, jbot
448 refsum = h( k+1, j ) +
449 $ dconjg( v( 2, m22 ) )*h( k+2, j )
450 h( k+1, j ) = h( k+1, j ) - refsum*t1
451 h( k+2, j ) = h( k+2, j ) - refsum*t2
452 40 CONTINUE
453
454
455
456
457
458
459
460
461
462
463 IF( k.GE.ktop ) THEN
464 IF( h( k+1, k ).NE.zero ) THEN
465 tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
466 IF( tst1.EQ.rzero ) THEN
467 IF( k.GE.ktop+1 )
468 $ tst1 = tst1 + cabs1( h( k, k-1 ) )
469 IF( k.GE.ktop+2 )
470 $ tst1 = tst1 + cabs1( h( k, k-2 ) )
471 IF( k.GE.ktop+3 )
472 $ tst1 = tst1 + cabs1( h( k, k-3 ) )
473 IF( k.LE.kbot-2 )
474 $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
475 IF( k.LE.kbot-3 )
476 $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
477 IF( k.LE.kbot-4 )
478 $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
479 END IF
480 IF( cabs1( h( k+1, k ) )
481 $ .LE.max( smlnum, ulp*tst1 ) ) THEN
482 h12 = max( cabs1( h( k+1, k ) ),
483 $ cabs1( h( k, k+1 ) ) )
484 h21 = min( cabs1( h( k+1, k ) ),
485 $ cabs1( h( k, k+1 ) ) )
486 h11 = max( cabs1( h( k+1, k+1 ) ),
487 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
488 h22 = min( cabs1( h( k+1, k+1 ) ),
489 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
490 scl = h11 + h12
491 tst2 = h22*( h11 / scl )
492
493 IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
494 $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
495 END IF
496 END IF
497 END IF
498
499
500
501 IF( accum ) THEN
502 kms = k - incol
503 DO 50 j = max( 1, ktop-incol ), kdu
504 refsum = v( 1, m22 )*( u( j, kms+1 )+
505 $ v( 2, m22 )*u( j, kms+2 ) )
506 u( j, kms+1 ) = u( j, kms+1 ) - refsum
507 u( j, kms+2 ) = u( j, kms+2 ) -
508 $ refsum*dconjg( v( 2, m22 ) )
509 50 CONTINUE
510 ELSE IF( wantz ) THEN
511 DO 60 j = iloz, ihiz
512 refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
513 $ z( j, k+2 ) )
514 z( j, k+1 ) = z( j, k+1 ) - refsum
515 z( j, k+2 ) = z( j, k+2 ) -
516 $ refsum*dconjg( v( 2, m22 ) )
517 60 CONTINUE
518 END IF
519 END IF
520
521
522
523 DO 80 m = mbot, mtop, -1
524 k = krcol + 2*( m-1 )
525 IF( k.EQ.ktop-1 ) THEN
526 CALL zlaqr1( 3, h( ktop, ktop ), ldh, s( 2*m-1 ),
527 $ s( 2*m ), v( 1, m ) )
528 alpha = v( 1, m )
529 CALL zlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
530 ELSE
531
532
533
534
535
536 refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 )
537 h( k+3, k ) = -refsum
538 h( k+3, k+1 ) = -refsum*dconjg( v( 2, m ) )
539 h( k+3, k+2 ) = h( k+3, k+2 ) -
540 $ refsum*dconjg( v( 3, m ) )
541
542
543
544
545 beta = h( k+1, k )
546 v( 2, m ) = h( k+2, k )
547 v( 3, m ) = h( k+3, k )
548 CALL zlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
549
550
551
552
553
554
555 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
556 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
557
558
559
560 h( k+1, k ) = beta
561 h( k+2, k ) = zero
562 h( k+3, k ) = zero
563 ELSE
564
565
566
567
568
569
570
571 CALL zlaqr1( 3, h( k+1, k+1 ), ldh, s( 2*m-1 ),
572 $ s( 2*m ), vt )
573 alpha = vt( 1 )
574 CALL zlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
575 refsum = dconjg( vt( 1 ) )*
576 $ ( h( k+1, k )+dconjg( vt( 2 ) )*
577 $ h( k+2, k ) )
578
579 IF( cabs1( h( k+2, k )-refsum*vt( 2 ) )+
580 $ cabs1( refsum*vt( 3 ) ).GT.ulp*
581 $ ( cabs1( h( k, k ) )+cabs1( h( k+1,
582 $ k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) THEN
583
584
585
586
587
588 h( k+1, k ) = beta
589 h( k+2, k ) = zero
590 h( k+3, k ) = zero
591 ELSE
592
593
594
595
596
597
598 h( k+1, k ) = h( k+1, k ) - refsum
599 h( k+2, k ) = zero
600 h( k+3, k ) = zero
601 v( 1, m ) = vt( 1 )
602 v( 2, m ) = vt( 2 )
603 v( 3, m ) = vt( 3 )
604 END IF
605 END IF
606 END IF
607
608
609
610
611
612
613
614 t1 = v( 1, m )
615 t2 = t1*dconjg( v( 2, m ) )
616 t3 = t1*dconjg( v( 3, m ) )
617 DO 70 j = jtop, min( kbot, k+3 )
618 refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
619 $ + v( 3, m )*h( j, k+3 )
620 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
621 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
622 h( j, k+3 ) = h( j, k+3 ) - refsum*t3
623 70 CONTINUE
624
625
626
627
628 t1 = dconjg( v( 1, m ) )
629 t2 = t1*v( 2, m )
630 t3 = t1*v( 3, m )
631 refsum = h( k+1, k+1 )
632 $ + dconjg( v( 2, m ) )*h( k+2, k+1 )
633 $ + dconjg( v( 3, m ) )*h( k+3, k+1 )
634 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
635 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
636 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
637
638
639
640
641
642
643
644
645
646
647 IF( k.LT.ktop)
648 $ cycle
649 IF( h( k+1, k ).NE.zero ) THEN
650 tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
651 IF( tst1.EQ.rzero ) THEN
652 IF( k.GE.ktop+1 )
653 $ tst1 = tst1 + cabs1( h( k, k-1 ) )
654 IF( k.GE.ktop+2 )
655 $ tst1 = tst1 + cabs1( h( k, k-2 ) )
656 IF( k.GE.ktop+3 )
657 $ tst1 = tst1 + cabs1( h( k, k-3 ) )
658 IF( k.LE.kbot-2 )
659 $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
660 IF( k.LE.kbot-3 )
661 $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
662 IF( k.LE.kbot-4 )
663 $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
664 END IF
665 IF( cabs1( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
666 $ THEN
667 h12 = max( cabs1( h( k+1, k ) ),
668 $ cabs1( h( k, k+1 ) ) )
669 h21 = min( cabs1( h( k+1, k ) ),
670 $ cabs1( h( k, k+1 ) ) )
671 h11 = max( cabs1( h( k+1, k+1 ) ),
672 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
673 h22 = min( cabs1( h( k+1, k+1 ) ),
674 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
675 scl = h11 + h12
676 tst2 = h22*( h11 / scl )
677
678 IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
679 $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
680 END IF
681 END IF
682 80 CONTINUE
683
684
685
686 IF( accum ) THEN
687 jbot = min( ndcol, kbot )
688 ELSE IF( wantt ) THEN
689 jbot = n
690 ELSE
691 jbot = kbot
692 END IF
693
694 DO 100 m = mbot, mtop, -1
695 k = krcol + 2*( m-1 )
696 t1 = dconjg( v( 1, m ) )
697 t2 = t1*v( 2, m )
698 t3 = t1*v( 3, m )
699 DO 90 j = max( ktop, krcol + 2*m ), jbot
700 refsum = h( k+1, j ) + dconjg( v( 2, m ) )*h( k+2, j )
701 $ + dconjg( v( 3, m ) )*h( k+3, j )
702 h( k+1, j ) = h( k+1, j ) - refsum*t1
703 h( k+2, j ) = h( k+2, j ) - refsum*t2
704 h( k+3, j ) = h( k+3, j ) - refsum*t3
705 90 CONTINUE
706 100 CONTINUE
707
708
709
710 IF( accum ) THEN
711
712
713
714
715
716 DO 120 m = mbot, mtop, -1
717 k = krcol + 2*( m-1 )
718 kms = k - incol
719 i2 = max( 1, ktop-incol )
720 i2 = max( i2, kms-(krcol-incol)+1 )
721 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
722 t1 = v( 1, m )
723 t2 = t1*dconjg( v( 2, m ) )
724 t3 = t1*dconjg( v( 3, m ) )
725 DO 110 j = i2, i4
726 refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
727 $ + v( 3, m )*u( j, kms+3 )
728 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
729 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
730 u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
731 110 CONTINUE
732 120 CONTINUE
733 ELSE IF( wantz ) THEN
734
735
736
737
738
739 DO 140 m = mbot, mtop, -1
740 k = krcol + 2*( m-1 )
741 t1 = v( 1, m )
742 t2 = t1*dconjg( v( 2, m ) )
743 t3 = t1*dconjg( v( 3, m ) )
744 DO 130 j = iloz, ihiz
745 refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
746 $ + v( 3, m )*z( j, k+3 )
747 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
748 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
749 z( j, k+3 ) = z( j, k+3 ) - refsum*t3
750 130 CONTINUE
751 140 CONTINUE
752 END IF
753
754
755
756 145 CONTINUE
757
758
759
760
761
762 IF( accum ) THEN
763 IF( wantt ) THEN
764 jtop = 1
765 jbot = n
766 ELSE
767 jtop = ktop
768 jbot = kbot
769 END IF
770 k1 = max( 1, ktop-incol )
771 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
772
773
774
775 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
776 jlen = min( nh, jbot-jcol+1 )
777 CALL zgemm(
'C',
'N', nu, jlen, nu, one, u( k1, k1 ),
778 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
779 $ ldwh )
780 CALL zlacpy(
'ALL', nu, jlen, wh, ldwh,
781 $ h( incol+k1, jcol ), ldh )
782 150 CONTINUE
783
784
785
786 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
787 jlen = min( nv, max( ktop, incol )-jrow )
788 CALL zgemm(
'N',
'N', jlen, nu, nu, one,
789 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
790 $ ldu, zero, wv, ldwv )
791 CALL zlacpy(
'ALL', jlen, nu, wv, ldwv,
792 $ h( jrow, incol+k1 ), ldh )
793 160 CONTINUE
794
795
796
797 IF( wantz ) THEN
798 DO 170 jrow = iloz, ihiz, nv
799 jlen = min( nv, ihiz-jrow+1 )
800 CALL zgemm(
'N',
'N', jlen, nu, nu, one,
801 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
802 $ ldu, zero, wv, ldwv )
803 CALL zlacpy(
'ALL', jlen, nu, wv, ldwv,
804 $ z( jrow, incol+k1 ), ldz )
805 170 CONTINUE
806 END IF
807 END IF
808 180 CONTINUE
809
810
811
double precision function dlamch(CMACH)
DLAMCH
subroutine dlabad(SMALL, LARGE)
DLABAD
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
subroutine ztrmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
ZTRMM
subroutine zlacpy(UPLO, M, N, A, LDA, B, LDB)
ZLACPY copies all or part of one two-dimensional array to another.
subroutine zlaqr1(N, H, LDH, S1, S2, V)
ZLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
subroutine zlarfg(N, ALPHA, X, INCX, TAU)
ZLARFG generates an elementary reflector (Householder matrix).