Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed chains of bulges in multishift QR algorithms. ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
263 IMPLICIT NONE
264
265
266
267
268
269
270 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
271 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
272 LOGICAL WANTT, WANTZ
273
274
275 REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
276 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
277 $ Z( LDZ, * )
278
279
280
281
282 REAL ZERO, ONE
283 parameter( zero = 0.0e0, one = 1.0e0 )
284
285
286 REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
287 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, T1, T2,
288 $ T3, TST1, TST2, ULP
289 INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
290 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
291 $ M, M22, MBOT, MTOP, NBMPS, NDCOL,
292 $ NS, NU
293 LOGICAL ACCUM, BMP22
294
295
296 REAL SLAMCH
298
299
300
301 INTRINSIC abs, max, min, mod, real
302
303
304 REAL VT( 3 )
305
306
309
310
311
312
313
314 IF( nshfts.LT.2 )
315 $ RETURN
316
317
318
319
320 IF( ktop.GE.kbot )
321 $ RETURN
322
323
324
325
326
327
328 DO 10 i = 1, nshfts - 2, 2
329 IF( si( i ).NE.-si( i+1 ) ) THEN
330
331 swap = sr( i )
332 sr( i ) = sr( i+1 )
333 sr( i+1 ) = sr( i+2 )
334 sr( i+2 ) = swap
335
336 swap = si( i )
337 si( i ) = si( i+1 )
338 si( i+1 ) = si( i+2 )
339 si( i+2 ) = swap
340 END IF
341 10 CONTINUE
342
343
344
345
346
347
348 ns = nshfts - mod( nshfts, 2 )
349
350
351
352 safmin =
slamch(
'SAFE MINIMUM' )
353 safmax = one / safmin
354 ulp =
slamch(
'PRECISION' )
355 smlnum = safmin*( real( n ) / ulp )
356
357
358
359
360 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
361
362
363
364 IF( ktop+2.LE.kbot )
365 $ h( ktop+2, ktop ) = zero
366
367
368
369 nbmps = ns / 2
370
371
372
373 kdu = 4*nbmps
374
375
376
377 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
378
379
380
381 IF( accum ) THEN
382 jtop = max( ktop, incol )
383 ELSE IF( wantt ) THEN
384 jtop = 1
385 ELSE
386 jtop = ktop
387 END IF
388
389 ndcol = incol + kdu
390 IF( accum )
391 $
CALL slaset(
'ALL', kdu, kdu, zero, one, u, ldu )
392
393
394
395
396
397
398
399
400
401
402
403
404
405 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
406
407
408
409
410
411
412
413
414 mtop = max( 1, ( ktop-krcol ) / 2+1 )
415 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
416 m22 = mbot + 1
417 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
418 $ ( kbot-2 )
419
420
421
422
423 IF ( bmp22 ) THEN
424
425
426
427
428 k = krcol + 2*( m22-1 )
429 IF( k.EQ.ktop-1 ) THEN
430 CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
431 $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
432 $ v( 1, m22 ) )
433 beta = v( 1, m22 )
434 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
435 ELSE
436 beta = h( k+1, k )
437 v( 2, m22 ) = h( k+2, k )
438 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
439 h( k+1, k ) = beta
440 h( k+2, k ) = zero
441 END IF
442
443
444
445
446
447 t1 = v( 1, m22 )
448 t2 = t1*v( 2, m22 )
449 DO 30 j = jtop, min( kbot, k+3 )
450 refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
451 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
452 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
453 30 CONTINUE
454
455
456
457
458 IF( accum ) THEN
459 jbot = min( ndcol, kbot )
460 ELSE IF( wantt ) THEN
461 jbot = n
462 ELSE
463 jbot = kbot
464 END IF
465 t1 = v( 1, m22 )
466 t2 = t1*v( 2, m22 )
467 DO 40 j = k+1, jbot
468 refsum = h( k+1, j ) + v( 2, m22 )*h( k+2, j )
469 h( k+1, j ) = h( k+1, j ) - refsum*t1
470 h( k+2, j ) = h( k+2, j ) - refsum*t2
471 40 CONTINUE
472
473
474
475
476
477
478
479
480
481
482 IF( k.GE.ktop ) THEN
483 IF( h( k+1, k ).NE.zero ) THEN
484 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
485 IF( tst1.EQ.zero ) THEN
486 IF( k.GE.ktop+1 )
487 $ tst1 = tst1 + abs( h( k, k-1 ) )
488 IF( k.GE.ktop+2 )
489 $ tst1 = tst1 + abs( h( k, k-2 ) )
490 IF( k.GE.ktop+3 )
491 $ tst1 = tst1 + abs( h( k, k-3 ) )
492 IF( k.LE.kbot-2 )
493 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
494 IF( k.LE.kbot-3 )
495 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
496 IF( k.LE.kbot-4 )
497 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
498 END IF
499 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
500 $ THEN
501 h12 = max( abs( h( k+1, k ) ),
502 $ abs( h( k, k+1 ) ) )
503 h21 = min( abs( h( k+1, k ) ),
504 $ abs( h( k, k+1 ) ) )
505 h11 = max( abs( h( k+1, k+1 ) ),
506 $ abs( h( k, k )-h( k+1, k+1 ) ) )
507 h22 = min( abs( h( k+1, k+1 ) ),
508 $ abs( h( k, k )-h( k+1, k+1 ) ) )
509 scl = h11 + h12
510 tst2 = h22*( h11 / scl )
511
512 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
513 $ max( smlnum, ulp*tst2 ) ) THEN
514 h( k+1, k ) = zero
515 END IF
516 END IF
517 END IF
518 END IF
519
520
521
522 IF( accum ) THEN
523 kms = k - incol
524 t1 = v( 1, m22 )
525 t2 = t1*v( 2, m22 )
526 DO 50 j = max( 1, ktop-incol ), kdu
527 refsum = u( j, kms+1 ) + v( 2, m22 )*u( j, kms+2 )
528 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
529 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
530 50 CONTINUE
531 ELSE IF( wantz ) THEN
532 t1 = v( 1, m22 )
533 t2 = t1*v( 2, m22 )
534 DO 60 j = iloz, ihiz
535 refsum = z( j, k+1 )+v( 2, m22 )*z( j, k+2 )
536 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
537 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
538 60 CONTINUE
539 END IF
540 END IF
541
542
543
544 DO 80 m = mbot, mtop, -1
545 k = krcol + 2*( m-1 )
546 IF( k.EQ.ktop-1 ) THEN
547 CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
548 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
549 $ v( 1, m ) )
550 alpha = v( 1, m )
551 CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
552 ELSE
553
554
555
556
557
558 t1 = v( 1, m )
559 t2 = t1*v( 2, m )
560 t3 = t1*v( 3, m )
561 refsum = v( 3, m )*h( k+3, k+2 )
562 h( k+3, k ) = -refsum*t1
563 h( k+3, k+1 ) = -refsum*t2
564 h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*t3
565
566
567
568
569 beta = h( k+1, k )
570 v( 2, m ) = h( k+2, k )
571 v( 3, m ) = h( k+3, k )
572 CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
573
574
575
576
577
578
579 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
580 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
581
582
583
584 h( k+1, k ) = beta
585 h( k+2, k ) = zero
586 h( k+3, k ) = zero
587 ELSE
588
589
590
591
592
593
594
595 CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
596 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
597 $ vt )
598 alpha = vt( 1 )
599 CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
600 t1 = vt( 1 )
601 t2 = t1*vt( 2 )
602 t3 = t2*vt( 3 )
603 refsum = h( k+1, k )+vt( 2 )*h( k+2, k )
604
605 IF( abs( h( k+2, k )-refsum*t2 )+
606 $ abs( refsum*t3 ).GT.ulp*
607 $ ( abs( h( k, k ) )+abs( h( k+1,
608 $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
609
610
611
612
613
614 h( k+1, k ) = beta
615 h( k+2, k ) = zero
616 h( k+3, k ) = zero
617 ELSE
618
619
620
621
622
623
624 h( k+1, k ) = h( k+1, k ) - refsum*t1
625 h( k+2, k ) = zero
626 h( k+3, k ) = zero
627 v( 1, m ) = vt( 1 )
628 v( 2, m ) = vt( 2 )
629 v( 3, m ) = vt( 3 )
630 END IF
631 END IF
632 END IF
633
634
635
636
637
638
639
640 t1 = v( 1, m )
641 t2 = t1*v( 2, m )
642 t3 = t1*v( 3, m )
643 DO 70 j = jtop, min( kbot, k+3 )
644 refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
645 $ + v( 3, m )*h( j, k+3 )
646 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
647 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
648 h( j, k+3 ) = h( j, k+3 ) - refsum*t3
649 70 CONTINUE
650
651
652
653
654 refsum = h( k+1, k+1 ) + v( 2, m )*h( k+2, k+1 )
655 $ + v( 3, m )*h( k+3, k+1 )
656 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
657 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
658 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
659
660
661
662
663
664
665
666
667
668
669 IF( k.LT.ktop)
670 $ cycle
671 IF( h( k+1, k ).NE.zero ) THEN
672 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
673 IF( tst1.EQ.zero ) THEN
674 IF( k.GE.ktop+1 )
675 $ tst1 = tst1 + abs( h( k, k-1 ) )
676 IF( k.GE.ktop+2 )
677 $ tst1 = tst1 + abs( h( k, k-2 ) )
678 IF( k.GE.ktop+3 )
679 $ tst1 = tst1 + abs( h( k, k-3 ) )
680 IF( k.LE.kbot-2 )
681 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
682 IF( k.LE.kbot-3 )
683 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
684 IF( k.LE.kbot-4 )
685 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
686 END IF
687 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
688 $ THEN
689 h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
690 h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
691 h11 = max( abs( h( k+1, k+1 ) ),
692 $ abs( h( k, k )-h( k+1, k+1 ) ) )
693 h22 = min( abs( h( k+1, k+1 ) ),
694 $ abs( h( k, k )-h( k+1, k+1 ) ) )
695 scl = h11 + h12
696 tst2 = h22*( h11 / scl )
697
698 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
699 $ max( smlnum, ulp*tst2 ) ) THEN
700 h( k+1, k ) = zero
701 END IF
702 END IF
703 END IF
704 80 CONTINUE
705
706
707
708 IF( accum ) THEN
709 jbot = min( ndcol, kbot )
710 ELSE IF( wantt ) THEN
711 jbot = n
712 ELSE
713 jbot = kbot
714 END IF
715
716 DO 100 m = mbot, mtop, -1
717 k = krcol + 2*( m-1 )
718 t1 = v( 1, m )
719 t2 = t1*v( 2, m )
720 t3 = t1*v( 3, m )
721 DO 90 j = max( ktop, krcol + 2*m ), jbot
722 refsum = h( k+1, j ) + v( 2, m )*h( k+2, j )
723 $ + v( 3, m )*h( k+3, j )
724 h( k+1, j ) = h( k+1, j ) - refsum*t1
725 h( k+2, j ) = h( k+2, j ) - refsum*t2
726 h( k+3, j ) = h( k+3, j ) - refsum*t3
727 90 CONTINUE
728 100 CONTINUE
729
730
731
732 IF( accum ) THEN
733
734
735
736
737
738 DO 120 m = mbot, mtop, -1
739 k = krcol + 2*( m-1 )
740 kms = k - incol
741 i2 = max( 1, ktop-incol )
742 i2 = max( i2, kms-(krcol-incol)+1 )
743 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
744 t1 = v( 1, m )
745 t2 = t1*v( 2, m )
746 t3 = t1*v( 3, m )
747 DO 110 j = i2, i4
748 refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
749 $ + v( 3, m )*u( j, kms+3 )
750 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
751 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
752 u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
753 110 CONTINUE
754 120 CONTINUE
755 ELSE IF( wantz ) THEN
756
757
758
759
760
761 DO 140 m = mbot, mtop, -1
762 k = krcol + 2*( m-1 )
763 t1 = v( 1, m )
764 t2 = t1*v( 2, m )
765 t3 = t1*v( 3, m )
766 DO 130 j = iloz, ihiz
767 refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
768 $ + v( 3, m )*z( j, k+3 )
769 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
770 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
771 z( j, k+3 ) = z( j, k+3 ) - refsum*t3
772 130 CONTINUE
773 140 CONTINUE
774 END IF
775
776
777
778 145 CONTINUE
779
780
781
782
783
784 IF( accum ) THEN
785 IF( wantt ) THEN
786 jtop = 1
787 jbot = n
788 ELSE
789 jtop = ktop
790 jbot = kbot
791 END IF
792 k1 = max( 1, ktop-incol )
793 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
794
795
796
797 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
798 jlen = min( nh, jbot-jcol+1 )
799 CALL sgemm(
'C',
'N', nu, jlen, nu, one, u( k1, k1 ),
800 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
801 $ ldwh )
802 CALL slacpy(
'ALL', nu, jlen, wh, ldwh,
803 $ h( incol+k1, jcol ), ldh )
804 150 CONTINUE
805
806
807
808 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
809 jlen = min( nv, max( ktop, incol )-jrow )
810 CALL sgemm(
'N',
'N', jlen, nu, nu, one,
811 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
812 $ ldu, zero, wv, ldwv )
813 CALL slacpy(
'ALL', jlen, nu, wv, ldwv,
814 $ h( jrow, incol+k1 ), ldh )
815 160 CONTINUE
816
817
818
819 IF( wantz ) THEN
820 DO 170 jrow = iloz, ihiz, nv
821 jlen = min( nv, ihiz-jrow+1 )
822 CALL sgemm(
'N',
'N', jlen, nu, nu, one,
823 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
824 $ ldu, zero, wv, ldwv )
825 CALL slacpy(
'ALL', jlen, nu, wv, ldwv,
826 $ z( jrow, incol+k1 ), ldz )
827 170 CONTINUE
828 END IF
829 END IF
830 180 CONTINUE
831
832
833
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
real function slamch(cmach)
SLAMCH
subroutine slaqr1(n, h, ldh, sr1, si1, sr2, si2, v)
SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
subroutine slarfg(n, alpha, x, incx, tau)
SLARFG generates an elementary reflector (Householder matrix).
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
subroutine strmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
STRMM