Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed chains of bulges in multishift QR algorithms. ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
256 IMPLICIT NONE
257
258
259
260
261
262
263 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
264 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
265 LOGICAL WANTT, WANTZ
266
267
268 COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
269 $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
270
271
272
273
274 COMPLEX*16 ZERO, ONE
275 parameter( zero = ( 0.0d0, 0.0d0 ),
276 $ one = ( 1.0d0, 0.0d0 ) )
277 DOUBLE PRECISION RZERO, RONE
278 parameter( rzero = 0.0d0, rone = 1.0d0 )
279
280
281 COMPLEX*16 ALPHA, BETA, CDUM, REFSUM, T1, T2, T3
282 DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
283 $ SMLNUM, TST1, TST2, ULP
284 INTEGER I2, I4, INCOL, J, JBOT, JCOL, JLEN,
285 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
286 $ M, M22, MBOT, MTOP, NBMPS, NDCOL,
287 $ NS, NU
288 LOGICAL ACCUM, BMP22
289
290
291 DOUBLE PRECISION DLAMCH
293
294
295
296 INTRINSIC abs, dble, dconjg, dimag, max, min, mod
297
298
299 COMPLEX*16 VT( 3 )
300
301
304
305
306 DOUBLE PRECISION CABS1
307
308
309 cabs1( cdum ) = abs( dble( cdum ) ) + abs( dimag( cdum ) )
310
311
312
313
314
315 IF( nshfts.LT.2 )
316 $ RETURN
317
318
319
320
321 IF( ktop.GE.kbot )
322 $ RETURN
323
324
325
326
327 ns = nshfts - mod( nshfts, 2 )
328
329
330
331 safmin =
dlamch(
'SAFE MINIMUM' )
332 safmax = rone / safmin
333 ulp =
dlamch(
'PRECISION' )
334 smlnum = safmin*( dble( n ) / ulp )
335
336
337
338
339 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
340
341
342
343 IF( ktop+2.LE.kbot )
344 $ h( ktop+2, ktop ) = zero
345
346
347
348 nbmps = ns / 2
349
350
351
352 kdu = 4*nbmps
353
354
355
356 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
357
358
359
360 IF( accum ) THEN
361 jtop = max( ktop, incol )
362 ELSE IF( wantt ) THEN
363 jtop = 1
364 ELSE
365 jtop = ktop
366 END IF
367
368 ndcol = incol + kdu
369 IF( accum )
370 $
CALL zlaset(
'ALL', kdu, kdu, zero, one, u, ldu )
371
372
373
374
375
376
377
378
379
380
381
382
383
384 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
385
386
387
388
389
390
391
392
393 mtop = max( 1, ( ktop-krcol ) / 2+1 )
394 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
395 m22 = mbot + 1
396 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
397 $ ( kbot-2 )
398
399
400
401
402 IF ( bmp22 ) THEN
403
404
405
406
407 k = krcol + 2*( m22-1 )
408 IF( k.EQ.ktop-1 ) THEN
409 CALL zlaqr1( 2, h( k+1, k+1 ), ldh, s( 2*m22-1 ),
410 $ s( 2*m22 ), v( 1, m22 ) )
411 beta = v( 1, m22 )
412 CALL zlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
413 ELSE
414 beta = h( k+1, k )
415 v( 2, m22 ) = h( k+2, k )
416 CALL zlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
417 h( k+1, k ) = beta
418 h( k+2, k ) = zero
419 END IF
420
421
422
423
424
425 t1 = v( 1, m22 )
426 t2 = t1*dconjg( v( 2, m22 ) )
427 DO 30 j = jtop, min( kbot, k+3 )
428 refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
429 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
430 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
431 30 CONTINUE
432
433
434
435
436 IF( accum ) THEN
437 jbot = min( ndcol, kbot )
438 ELSE IF( wantt ) THEN
439 jbot = n
440 ELSE
441 jbot = kbot
442 END IF
443 t1 = dconjg( v( 1, m22 ) )
444 t2 = t1*v( 2, m22 )
445 DO 40 j = k+1, jbot
446 refsum = h( k+1, j ) +
447 $ dconjg( v( 2, m22 ) )*h( k+2, j )
448 h( k+1, j ) = h( k+1, j ) - refsum*t1
449 h( k+2, j ) = h( k+2, j ) - refsum*t2
450 40 CONTINUE
451
452
453
454
455
456
457
458
459
460
461 IF( k.GE.ktop ) THEN
462 IF( h( k+1, k ).NE.zero ) THEN
463 tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
464 IF( tst1.EQ.rzero ) THEN
465 IF( k.GE.ktop+1 )
466 $ tst1 = tst1 + cabs1( h( k, k-1 ) )
467 IF( k.GE.ktop+2 )
468 $ tst1 = tst1 + cabs1( h( k, k-2 ) )
469 IF( k.GE.ktop+3 )
470 $ tst1 = tst1 + cabs1( h( k, k-3 ) )
471 IF( k.LE.kbot-2 )
472 $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
473 IF( k.LE.kbot-3 )
474 $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
475 IF( k.LE.kbot-4 )
476 $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
477 END IF
478 IF( cabs1( h( k+1, k ) )
479 $ .LE.max( smlnum, ulp*tst1 ) ) THEN
480 h12 = max( cabs1( h( k+1, k ) ),
481 $ cabs1( h( k, k+1 ) ) )
482 h21 = min( cabs1( h( k+1, k ) ),
483 $ cabs1( h( k, k+1 ) ) )
484 h11 = max( cabs1( h( k+1, k+1 ) ),
485 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
486 h22 = min( cabs1( h( k+1, k+1 ) ),
487 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
488 scl = h11 + h12
489 tst2 = h22*( h11 / scl )
490
491 IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
492 $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
493 END IF
494 END IF
495 END IF
496
497
498
499 IF( accum ) THEN
500 kms = k - incol
501 DO 50 j = max( 1, ktop-incol ), kdu
502 refsum = v( 1, m22 )*( u( j, kms+1 )+
503 $ v( 2, m22 )*u( j, kms+2 ) )
504 u( j, kms+1 ) = u( j, kms+1 ) - refsum
505 u( j, kms+2 ) = u( j, kms+2 ) -
506 $ refsum*dconjg( v( 2, m22 ) )
507 50 CONTINUE
508 ELSE IF( wantz ) THEN
509 DO 60 j = iloz, ihiz
510 refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
511 $ z( j, k+2 ) )
512 z( j, k+1 ) = z( j, k+1 ) - refsum
513 z( j, k+2 ) = z( j, k+2 ) -
514 $ refsum*dconjg( v( 2, m22 ) )
515 60 CONTINUE
516 END IF
517 END IF
518
519
520
521 DO 80 m = mbot, mtop, -1
522 k = krcol + 2*( m-1 )
523 IF( k.EQ.ktop-1 ) THEN
524 CALL zlaqr1( 3, h( ktop, ktop ), ldh, s( 2*m-1 ),
525 $ s( 2*m ), v( 1, m ) )
526 alpha = v( 1, m )
527 CALL zlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
528 ELSE
529
530
531
532
533
534 t1 = v( 1, m )
535 t2 = t1*dconjg( v( 2, m ) )
536 t3 = t1*dconjg( v( 3, m ) )
537 refsum = v( 3, m )*h( k+3, k+2 )
538 h( k+3, k ) = -refsum*t1
539 h( k+3, k+1 ) = -refsum*t2
540 h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*t3
541
542
543
544
545 beta = h( k+1, k )
546 v( 2, m ) = h( k+2, k )
547 v( 3, m ) = h( k+3, k )
548 CALL zlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
549
550
551
552
553
554
555 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
556 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
557
558
559
560 h( k+1, k ) = beta
561 h( k+2, k ) = zero
562 h( k+3, k ) = zero
563 ELSE
564
565
566
567
568
569
570
571 CALL zlaqr1( 3, h( k+1, k+1 ), ldh, s( 2*m-1 ),
572 $ s( 2*m ), vt )
573 alpha = vt( 1 )
574 CALL zlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
575 t1 = dconjg( vt( 1 ) )
576 t2 = t1*vt( 2 )
577 t3 = t1*vt( 3 )
578 refsum = h( k+1, k )+dconjg( vt( 2 ) )*h( k+2, k )
579
580 IF( cabs1( h( k+2, k )-refsum*t2 )+
581 $ cabs1( refsum*t3 ).GT.ulp*
582 $ ( cabs1( h( k, k ) )+cabs1( h( k+1,
583 $ k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) THEN
584
585
586
587
588
589 h( k+1, k ) = beta
590 h( k+2, k ) = zero
591 h( k+3, k ) = zero
592 ELSE
593
594
595
596
597
598
599 h( k+1, k ) = h( k+1, k ) - refsum*t1
600 h( k+2, k ) = zero
601 h( k+3, k ) = zero
602 v( 1, m ) = vt( 1 )
603 v( 2, m ) = vt( 2 )
604 v( 3, m ) = vt( 3 )
605 END IF
606 END IF
607 END IF
608
609
610
611
612
613
614
615 t1 = v( 1, m )
616 t2 = t1*dconjg( v( 2, m ) )
617 t3 = t1*dconjg( v( 3, m ) )
618 DO 70 j = jtop, min( kbot, k+3 )
619 refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
620 $ + v( 3, m )*h( j, k+3 )
621 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
622 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
623 h( j, k+3 ) = h( j, k+3 ) - refsum*t3
624 70 CONTINUE
625
626
627
628
629 t1 = dconjg( v( 1, m ) )
630 t2 = t1*v( 2, m )
631 t3 = t1*v( 3, m )
632 refsum = h( k+1, k+1 )
633 $ + dconjg( v( 2, m ) )*h( k+2, k+1 )
634 $ + dconjg( v( 3, m ) )*h( k+3, k+1 )
635 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
636 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
637 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
638
639
640
641
642
643
644
645
646
647
648 IF( k.LT.ktop)
649 $ cycle
650 IF( h( k+1, k ).NE.zero ) THEN
651 tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
652 IF( tst1.EQ.rzero ) THEN
653 IF( k.GE.ktop+1 )
654 $ tst1 = tst1 + cabs1( h( k, k-1 ) )
655 IF( k.GE.ktop+2 )
656 $ tst1 = tst1 + cabs1( h( k, k-2 ) )
657 IF( k.GE.ktop+3 )
658 $ tst1 = tst1 + cabs1( h( k, k-3 ) )
659 IF( k.LE.kbot-2 )
660 $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
661 IF( k.LE.kbot-3 )
662 $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
663 IF( k.LE.kbot-4 )
664 $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
665 END IF
666 IF( cabs1( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
667 $ THEN
668 h12 = max( cabs1( h( k+1, k ) ),
669 $ cabs1( h( k, k+1 ) ) )
670 h21 = min( cabs1( h( k+1, k ) ),
671 $ cabs1( h( k, k+1 ) ) )
672 h11 = max( cabs1( h( k+1, k+1 ) ),
673 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
674 h22 = min( cabs1( h( k+1, k+1 ) ),
675 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
676 scl = h11 + h12
677 tst2 = h22*( h11 / scl )
678
679 IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
680 $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
681 END IF
682 END IF
683 80 CONTINUE
684
685
686
687 IF( accum ) THEN
688 jbot = min( ndcol, kbot )
689 ELSE IF( wantt ) THEN
690 jbot = n
691 ELSE
692 jbot = kbot
693 END IF
694
695 DO 100 m = mbot, mtop, -1
696 k = krcol + 2*( m-1 )
697 t1 = dconjg( v( 1, m ) )
698 t2 = t1*v( 2, m )
699 t3 = t1*v( 3, m )
700 DO 90 j = max( ktop, krcol + 2*m ), jbot
701 refsum = h( k+1, j ) + dconjg( v( 2, m ) )*h( k+2, j )
702 $ + dconjg( v( 3, m ) )*h( k+3, j )
703 h( k+1, j ) = h( k+1, j ) - refsum*t1
704 h( k+2, j ) = h( k+2, j ) - refsum*t2
705 h( k+3, j ) = h( k+3, j ) - refsum*t3
706 90 CONTINUE
707 100 CONTINUE
708
709
710
711 IF( accum ) THEN
712
713
714
715
716
717 DO 120 m = mbot, mtop, -1
718 k = krcol + 2*( m-1 )
719 kms = k - incol
720 i2 = max( 1, ktop-incol )
721 i2 = max( i2, kms-(krcol-incol)+1 )
722 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
723 t1 = v( 1, m )
724 t2 = t1*dconjg( v( 2, m ) )
725 t3 = t1*dconjg( v( 3, m ) )
726 DO 110 j = i2, i4
727 refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
728 $ + v( 3, m )*u( j, kms+3 )
729 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
730 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
731 u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
732 110 CONTINUE
733 120 CONTINUE
734 ELSE IF( wantz ) THEN
735
736
737
738
739
740 DO 140 m = mbot, mtop, -1
741 k = krcol + 2*( m-1 )
742 t1 = v( 1, m )
743 t2 = t1*dconjg( v( 2, m ) )
744 t3 = t1*dconjg( v( 3, m ) )
745 DO 130 j = iloz, ihiz
746 refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
747 $ + v( 3, m )*z( j, k+3 )
748 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
749 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
750 z( j, k+3 ) = z( j, k+3 ) - refsum*t3
751 130 CONTINUE
752 140 CONTINUE
753 END IF
754
755
756
757 145 CONTINUE
758
759
760
761
762
763 IF( accum ) THEN
764 IF( wantt ) THEN
765 jtop = 1
766 jbot = n
767 ELSE
768 jtop = ktop
769 jbot = kbot
770 END IF
771 k1 = max( 1, ktop-incol )
772 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
773
774
775
776 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
777 jlen = min( nh, jbot-jcol+1 )
778 CALL zgemm(
'C',
'N', nu, jlen, nu, one, u( k1, k1 ),
779 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
780 $ ldwh )
781 CALL zlacpy(
'ALL', nu, jlen, wh, ldwh,
782 $ h( incol+k1, jcol ), ldh )
783 150 CONTINUE
784
785
786
787 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
788 jlen = min( nv, max( ktop, incol )-jrow )
789 CALL zgemm(
'N',
'N', jlen, nu, nu, one,
790 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
791 $ ldu, zero, wv, ldwv )
792 CALL zlacpy(
'ALL', jlen, nu, wv, ldwv,
793 $ h( jrow, incol+k1 ), ldh )
794 160 CONTINUE
795
796
797
798 IF( wantz ) THEN
799 DO 170 jrow = iloz, ihiz, nv
800 jlen = min( nv, ihiz-jrow+1 )
801 CALL zgemm(
'N',
'N', jlen, nu, nu, one,
802 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
803 $ ldu, zero, wv, ldwv )
804 CALL zlacpy(
'ALL', jlen, nu, wv, ldwv,
805 $ z( jrow, incol+k1 ), ldz )
806 170 CONTINUE
807 END IF
808 END IF
809 180 CONTINUE
810
811
812
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
subroutine zlacpy(uplo, m, n, a, lda, b, ldb)
ZLACPY copies all or part of one two-dimensional array to another.
double precision function dlamch(cmach)
DLAMCH
subroutine zlaqr1(n, h, ldh, s1, s2, v)
ZLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
subroutine zlarfg(n, alpha, x, incx, tau)
ZLARFG generates an elementary reflector (Householder matrix).
subroutine zlaset(uplo, m, n, alpha, beta, a, lda)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
subroutine ztrmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
ZTRMM