Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed chains of bulges in multishift QR algorithms. ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
265 IMPLICIT NONE
266
267
268
269
270
271
272 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
273 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
274 LOGICAL WANTT, WANTZ
275
276
277 REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
278 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
279 $ Z( LDZ, * )
280
281
282
283
284 REAL ZERO, ONE
285 parameter( zero = 0.0e0, one = 1.0e0 )
286
287
288 REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
289 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, T1, T2,
290 $ T3, TST1, TST2, ULP
291 INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
292 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
293 $ M, M22, MBOT, MTOP, NBMPS, NDCOL,
294 $ NS, NU
295 LOGICAL ACCUM, BMP22
296
297
298 REAL SLAMCH
300
301
302
303 INTRINSIC abs, max, min, mod, real
304
305
306 REAL VT( 3 )
307
308
310
311
312
313
314
315 IF( nshfts.LT.2 )
316 $ RETURN
317
318
319
320
321 IF( ktop.GE.kbot )
322 $ RETURN
323
324
325
326
327
328
329 DO 10 i = 1, nshfts - 2, 2
330 IF( si( i ).NE.-si( i+1 ) ) THEN
331
332 swap = sr( i )
333 sr( i ) = sr( i+1 )
334 sr( i+1 ) = sr( i+2 )
335 sr( i+2 ) = swap
336
337 swap = si( i )
338 si( i ) = si( i+1 )
339 si( i+1 ) = si( i+2 )
340 si( i+2 ) = swap
341 END IF
342 10 CONTINUE
343
344
345
346
347
348
349 ns = nshfts - mod( nshfts, 2 )
350
351
352
353 safmin =
slamch(
'SAFE MINIMUM' )
354 safmax = one / safmin
355 ulp =
slamch(
'PRECISION' )
356 smlnum = safmin*( real( n ) / ulp )
357
358
359
360
361 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
362
363
364
365 IF( ktop+2.LE.kbot )
366 $ h( ktop+2, ktop ) = zero
367
368
369
370 nbmps = ns / 2
371
372
373
374 kdu = 4*nbmps
375
376
377
378 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
379
380
381
382 IF( accum ) THEN
383 jtop = max( ktop, incol )
384 ELSE IF( wantt ) THEN
385 jtop = 1
386 ELSE
387 jtop = ktop
388 END IF
389
390 ndcol = incol + kdu
391 IF( accum )
392 $
CALL slaset(
'ALL', kdu, kdu, zero, one, u, ldu )
393
394
395
396
397
398
399
400
401
402
403
404
405
406 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
407
408
409
410
411
412
413
414
415 mtop = max( 1, ( ktop-krcol ) / 2+1 )
416 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
417 m22 = mbot + 1
418 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
419 $ ( kbot-2 )
420
421
422
423
424 IF ( bmp22 ) THEN
425
426
427
428
429 k = krcol + 2*( m22-1 )
430 IF( k.EQ.ktop-1 ) THEN
431 CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
432 $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
433 $ v( 1, m22 ) )
434 beta = v( 1, m22 )
435 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
436 ELSE
437 beta = h( k+1, k )
438 v( 2, m22 ) = h( k+2, k )
439 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
440 h( k+1, k ) = beta
441 h( k+2, k ) = zero
442 END IF
443
444
445
446
447
448 t1 = v( 1, m22 )
449 t2 = t1*v( 2, m22 )
450 DO 30 j = jtop, min( kbot, k+3 )
451 refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
452 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
453 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
454 30 CONTINUE
455
456
457
458
459 IF( accum ) THEN
460 jbot = min( ndcol, kbot )
461 ELSE IF( wantt ) THEN
462 jbot = n
463 ELSE
464 jbot = kbot
465 END IF
466 t1 = v( 1, m22 )
467 t2 = t1*v( 2, m22 )
468 DO 40 j = k+1, jbot
469 refsum = h( k+1, j ) + v( 2, m22 )*h( k+2, j )
470 h( k+1, j ) = h( k+1, j ) - refsum*t1
471 h( k+2, j ) = h( k+2, j ) - refsum*t2
472 40 CONTINUE
473
474
475
476
477
478
479
480
481
482
483 IF( k.GE.ktop ) THEN
484 IF( h( k+1, k ).NE.zero ) THEN
485 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
486 IF( tst1.EQ.zero ) THEN
487 IF( k.GE.ktop+1 )
488 $ tst1 = tst1 + abs( h( k, k-1 ) )
489 IF( k.GE.ktop+2 )
490 $ tst1 = tst1 + abs( h( k, k-2 ) )
491 IF( k.GE.ktop+3 )
492 $ tst1 = tst1 + abs( h( k, k-3 ) )
493 IF( k.LE.kbot-2 )
494 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
495 IF( k.LE.kbot-3 )
496 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
497 IF( k.LE.kbot-4 )
498 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
499 END IF
500 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
501 $ THEN
502 h12 = max( abs( h( k+1, k ) ),
503 $ abs( h( k, k+1 ) ) )
504 h21 = min( abs( h( k+1, k ) ),
505 $ abs( h( k, k+1 ) ) )
506 h11 = max( abs( h( k+1, k+1 ) ),
507 $ abs( h( k, k )-h( k+1, k+1 ) ) )
508 h22 = min( abs( h( k+1, k+1 ) ),
509 $ abs( h( k, k )-h( k+1, k+1 ) ) )
510 scl = h11 + h12
511 tst2 = h22*( h11 / scl )
512
513 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
514 $ max( smlnum, ulp*tst2 ) ) THEN
515 h( k+1, k ) = zero
516 END IF
517 END IF
518 END IF
519 END IF
520
521
522
523 IF( accum ) THEN
524 kms = k - incol
525 t1 = v( 1, m22 )
526 t2 = t1*v( 2, m22 )
527 DO 50 j = max( 1, ktop-incol ), kdu
528 refsum = u( j, kms+1 ) + v( 2, m22 )*u( j, kms+2 )
529 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
530 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
531 50 CONTINUE
532 ELSE IF( wantz ) THEN
533 t1 = v( 1, m22 )
534 t2 = t1*v( 2, m22 )
535 DO 60 j = iloz, ihiz
536 refsum = z( j, k+1 )+v( 2, m22 )*z( j, k+2 )
537 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
538 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
539 60 CONTINUE
540 END IF
541 END IF
542
543
544
545 DO 80 m = mbot, mtop, -1
546 k = krcol + 2*( m-1 )
547 IF( k.EQ.ktop-1 ) THEN
548 CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
549 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
550 $ v( 1, m ) )
551 alpha = v( 1, m )
552 CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
553 ELSE
554
555
556
557
558
559 t1 = v( 1, m )
560 t2 = t1*v( 2, m )
561 t3 = t1*v( 3, m )
562 refsum = v( 3, m )*h( k+3, k+2 )
563 h( k+3, k ) = -refsum*t1
564 h( k+3, k+1 ) = -refsum*t2
565 h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*t3
566
567
568
569
570 beta = h( k+1, k )
571 v( 2, m ) = h( k+2, k )
572 v( 3, m ) = h( k+3, k )
573 CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
574
575
576
577
578
579
580 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
581 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
582
583
584
585 h( k+1, k ) = beta
586 h( k+2, k ) = zero
587 h( k+3, k ) = zero
588 ELSE
589
590
591
592
593
594
595
596 CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
597 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
598 $ vt )
599 alpha = vt( 1 )
600 CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
601 t1 = vt( 1 )
602 t2 = t1*vt( 2 )
603 t3 = t2*vt( 3 )
604 refsum = h( k+1, k )+vt( 2 )*h( k+2, k )
605
606 IF( abs( h( k+2, k )-refsum*t2 )+
607 $ abs( refsum*t3 ).GT.ulp*
608 $ ( abs( h( k, k ) )+abs( h( k+1,
609 $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
610
611
612
613
614
615 h( k+1, k ) = beta
616 h( k+2, k ) = zero
617 h( k+3, k ) = zero
618 ELSE
619
620
621
622
623
624
625 h( k+1, k ) = h( k+1, k ) - refsum*t1
626 h( k+2, k ) = zero
627 h( k+3, k ) = zero
628 v( 1, m ) = vt( 1 )
629 v( 2, m ) = vt( 2 )
630 v( 3, m ) = vt( 3 )
631 END IF
632 END IF
633 END IF
634
635
636
637
638
639
640
641 t1 = v( 1, m )
642 t2 = t1*v( 2, m )
643 t3 = t1*v( 3, m )
644 DO 70 j = jtop, min( kbot, k+3 )
645 refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
646 $ + v( 3, m )*h( j, k+3 )
647 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
648 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
649 h( j, k+3 ) = h( j, k+3 ) - refsum*t3
650 70 CONTINUE
651
652
653
654
655 refsum = h( k+1, k+1 ) + v( 2, m )*h( k+2, k+1 )
656 $ + v( 3, m )*h( k+3, k+1 )
657 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
658 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
659 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
660
661
662
663
664
665
666
667
668
669
670 IF( k.LT.ktop)
671 $ cycle
672 IF( h( k+1, k ).NE.zero ) THEN
673 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
674 IF( tst1.EQ.zero ) THEN
675 IF( k.GE.ktop+1 )
676 $ tst1 = tst1 + abs( h( k, k-1 ) )
677 IF( k.GE.ktop+2 )
678 $ tst1 = tst1 + abs( h( k, k-2 ) )
679 IF( k.GE.ktop+3 )
680 $ tst1 = tst1 + abs( h( k, k-3 ) )
681 IF( k.LE.kbot-2 )
682 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
683 IF( k.LE.kbot-3 )
684 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
685 IF( k.LE.kbot-4 )
686 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
687 END IF
688 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
689 $ THEN
690 h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
691 h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
692 h11 = max( abs( h( k+1, k+1 ) ),
693 $ abs( h( k, k )-h( k+1, k+1 ) ) )
694 h22 = min( abs( h( k+1, k+1 ) ),
695 $ abs( h( k, k )-h( k+1, k+1 ) ) )
696 scl = h11 + h12
697 tst2 = h22*( h11 / scl )
698
699 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
700 $ max( smlnum, ulp*tst2 ) ) THEN
701 h( k+1, k ) = zero
702 END IF
703 END IF
704 END IF
705 80 CONTINUE
706
707
708
709 IF( accum ) THEN
710 jbot = min( ndcol, kbot )
711 ELSE IF( wantt ) THEN
712 jbot = n
713 ELSE
714 jbot = kbot
715 END IF
716
717 DO 100 m = mbot, mtop, -1
718 k = krcol + 2*( m-1 )
719 t1 = v( 1, m )
720 t2 = t1*v( 2, m )
721 t3 = t1*v( 3, m )
722 DO 90 j = max( ktop, krcol + 2*m ), jbot
723 refsum = h( k+1, j ) + v( 2, m )*h( k+2, j )
724 $ + v( 3, m )*h( k+3, j )
725 h( k+1, j ) = h( k+1, j ) - refsum*t1
726 h( k+2, j ) = h( k+2, j ) - refsum*t2
727 h( k+3, j ) = h( k+3, j ) - refsum*t3
728 90 CONTINUE
729 100 CONTINUE
730
731
732
733 IF( accum ) THEN
734
735
736
737
738
739 DO 120 m = mbot, mtop, -1
740 k = krcol + 2*( m-1 )
741 kms = k - incol
742 i2 = max( 1, ktop-incol )
743 i2 = max( i2, kms-(krcol-incol)+1 )
744 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
745 t1 = v( 1, m )
746 t2 = t1*v( 2, m )
747 t3 = t1*v( 3, m )
748 DO 110 j = i2, i4
749 refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
750 $ + v( 3, m )*u( j, kms+3 )
751 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
752 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
753 u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
754 110 CONTINUE
755 120 CONTINUE
756 ELSE IF( wantz ) THEN
757
758
759
760
761
762 DO 140 m = mbot, mtop, -1
763 k = krcol + 2*( m-1 )
764 t1 = v( 1, m )
765 t2 = t1*v( 2, m )
766 t3 = t1*v( 3, m )
767 DO 130 j = iloz, ihiz
768 refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
769 $ + v( 3, m )*z( j, k+3 )
770 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
771 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
772 z( j, k+3 ) = z( j, k+3 ) - refsum*t3
773 130 CONTINUE
774 140 CONTINUE
775 END IF
776
777
778
779 145 CONTINUE
780
781
782
783
784
785 IF( accum ) THEN
786 IF( wantt ) THEN
787 jtop = 1
788 jbot = n
789 ELSE
790 jtop = ktop
791 jbot = kbot
792 END IF
793 k1 = max( 1, ktop-incol )
794 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
795
796
797
798 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
799 jlen = min( nh, jbot-jcol+1 )
800 CALL sgemm(
'C',
'N', nu, jlen, nu, one, u( k1, k1 ),
801 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
802 $ ldwh )
803 CALL slacpy(
'ALL', nu, jlen, wh, ldwh,
804 $ h( incol+k1, jcol ), ldh )
805 150 CONTINUE
806
807
808
809 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
810 jlen = min( nv, max( ktop, incol )-jrow )
811 CALL sgemm(
'N',
'N', jlen, nu, nu, one,
812 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
813 $ ldu, zero, wv, ldwv )
814 CALL slacpy(
'ALL', jlen, nu, wv, ldwv,
815 $ h( jrow, incol+k1 ), ldh )
816 160 CONTINUE
817
818
819
820 IF( wantz ) THEN
821 DO 170 jrow = iloz, ihiz, nv
822 jlen = min( nv, ihiz-jrow+1 )
823 CALL sgemm(
'N',
'N', jlen, nu, nu, one,
824 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
825 $ ldu, zero, wv, ldwv )
826 CALL slacpy(
'ALL', jlen, nu, wv, ldwv,
827 $ z( jrow, incol+k1 ), ldz )
828 170 CONTINUE
829 END IF
830 END IF
831 180 CONTINUE
832
833
834
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
real function slamch(cmach)
SLAMCH
subroutine slaqr1(n, h, ldh, sr1, si1, sr2, si2, v)
SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
subroutine slarfg(n, alpha, x, incx, tau)
SLARFG generates an elementary reflector (Householder matrix).
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
subroutine strmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
STRMM