Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed chains of bulges in multishift QR algorithms. ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
265 IMPLICIT NONE
266
267
268
269
270
271
272 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
273 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
274 LOGICAL WANTT, WANTZ
275
276
277 REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
278 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
279 $ Z( LDZ, * )
280
281
282
283
284 REAL ZERO, ONE
285 parameter( zero = 0.0e0, one = 1.0e0 )
286
287
288 REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
289 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, T1, T2,
290 $ T3, TST1, TST2, ULP
291 INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
292 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
293 $ M, M22, MBOT, MTOP, NBMPS, NDCOL,
294 $ NS, NU
295 LOGICAL ACCUM, BMP22
296
297
298 REAL SLAMCH
300
301
302
303 INTRINSIC abs, max, min, mod, real
304
305
306 REAL VT( 3 )
307
308
311
312
313
314
315
316 IF( nshfts.LT.2 )
317 $ RETURN
318
319
320
321
322 IF( ktop.GE.kbot )
323 $ RETURN
324
325
326
327
328
329
330 DO 10 i = 1, nshfts - 2, 2
331 IF( si( i ).NE.-si( i+1 ) ) THEN
332
333 swap = sr( i )
334 sr( i ) = sr( i+1 )
335 sr( i+1 ) = sr( i+2 )
336 sr( i+2 ) = swap
337
338 swap = si( i )
339 si( i ) = si( i+1 )
340 si( i+1 ) = si( i+2 )
341 si( i+2 ) = swap
342 END IF
343 10 CONTINUE
344
345
346
347
348
349
350 ns = nshfts - mod( nshfts, 2 )
351
352
353
354 safmin =
slamch(
'SAFE MINIMUM' )
355 safmax = one / safmin
356 CALL slabad( safmin, safmax )
357 ulp =
slamch(
'PRECISION' )
358 smlnum = safmin*( real( n ) / ulp )
359
360
361
362
363 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
364
365
366
367 IF( ktop+2.LE.kbot )
368 $ h( ktop+2, ktop ) = zero
369
370
371
372 nbmps = ns / 2
373
374
375
376 kdu = 4*nbmps
377
378
379
380 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
381
382
383
384 IF( accum ) THEN
385 jtop = max( ktop, incol )
386 ELSE IF( wantt ) THEN
387 jtop = 1
388 ELSE
389 jtop = ktop
390 END IF
391
392 ndcol = incol + kdu
393 IF( accum )
394 $
CALL slaset(
'ALL', kdu, kdu, zero, one, u, ldu )
395
396
397
398
399
400
401
402
403
404
405
406
407
408 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
409
410
411
412
413
414
415
416
417 mtop = max( 1, ( ktop-krcol ) / 2+1 )
418 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
419 m22 = mbot + 1
420 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
421 $ ( kbot-2 )
422
423
424
425
426 IF ( bmp22 ) THEN
427
428
429
430
431 k = krcol + 2*( m22-1 )
432 IF( k.EQ.ktop-1 ) THEN
433 CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
434 $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
435 $ v( 1, m22 ) )
436 beta = v( 1, m22 )
437 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
438 ELSE
439 beta = h( k+1, k )
440 v( 2, m22 ) = h( k+2, k )
441 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
442 h( k+1, k ) = beta
443 h( k+2, k ) = zero
444 END IF
445
446
447
448
449
450 t1 = v( 1, m22 )
451 t2 = t1*v( 2, m22 )
452 DO 30 j = jtop, min( kbot, k+3 )
453 refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
454 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
455 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
456 30 CONTINUE
457
458
459
460
461 IF( accum ) THEN
462 jbot = min( ndcol, kbot )
463 ELSE IF( wantt ) THEN
464 jbot = n
465 ELSE
466 jbot = kbot
467 END IF
468 t1 = v( 1, m22 )
469 t2 = t1*v( 2, m22 )
470 DO 40 j = k+1, jbot
471 refsum = h( k+1, j ) + v( 2, m22 )*h( k+2, j )
472 h( k+1, j ) = h( k+1, j ) - refsum*t1
473 h( k+2, j ) = h( k+2, j ) - refsum*t2
474 40 CONTINUE
475
476
477
478
479
480
481
482
483
484
485 IF( k.GE.ktop ) THEN
486 IF( h( k+1, k ).NE.zero ) THEN
487 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
488 IF( tst1.EQ.zero ) THEN
489 IF( k.GE.ktop+1 )
490 $ tst1 = tst1 + abs( h( k, k-1 ) )
491 IF( k.GE.ktop+2 )
492 $ tst1 = tst1 + abs( h( k, k-2 ) )
493 IF( k.GE.ktop+3 )
494 $ tst1 = tst1 + abs( h( k, k-3 ) )
495 IF( k.LE.kbot-2 )
496 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
497 IF( k.LE.kbot-3 )
498 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
499 IF( k.LE.kbot-4 )
500 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
501 END IF
502 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
503 $ THEN
504 h12 = max( abs( h( k+1, k ) ),
505 $ abs( h( k, k+1 ) ) )
506 h21 = min( abs( h( k+1, k ) ),
507 $ abs( h( k, k+1 ) ) )
508 h11 = max( abs( h( k+1, k+1 ) ),
509 $ abs( h( k, k )-h( k+1, k+1 ) ) )
510 h22 = min( abs( h( k+1, k+1 ) ),
511 $ abs( h( k, k )-h( k+1, k+1 ) ) )
512 scl = h11 + h12
513 tst2 = h22*( h11 / scl )
514
515 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
516 $ max( smlnum, ulp*tst2 ) ) THEN
517 h( k+1, k ) = zero
518 END IF
519 END IF
520 END IF
521 END IF
522
523
524
525 IF( accum ) THEN
526 kms = k - incol
527 t1 = v( 1, m22 )
528 t2 = t1*v( 2, m22 )
529 DO 50 j = max( 1, ktop-incol ), kdu
530 refsum = u( j, kms+1 ) + v( 2, m22 )*u( j, kms+2 )
531 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
532 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
533 50 CONTINUE
534 ELSE IF( wantz ) THEN
535 t1 = v( 1, m22 )
536 t2 = t1*v( 2, m22 )
537 DO 60 j = iloz, ihiz
538 refsum = z( j, k+1 )+v( 2, m22 )*z( j, k+2 )
539 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
540 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
541 60 CONTINUE
542 END IF
543 END IF
544
545
546
547 DO 80 m = mbot, mtop, -1
548 k = krcol + 2*( m-1 )
549 IF( k.EQ.ktop-1 ) THEN
550 CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
551 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
552 $ v( 1, m ) )
553 alpha = v( 1, m )
554 CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
555 ELSE
556
557
558
559
560
561 refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 )
562 h( k+3, k ) = -refsum
563 h( k+3, k+1 ) = -refsum*v( 2, m )
564 h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3, m )
565
566
567
568
569 beta = h( k+1, k )
570 v( 2, m ) = h( k+2, k )
571 v( 3, m ) = h( k+3, k )
572 CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
573
574
575
576
577
578
579 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
580 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
581
582
583
584 h( k+1, k ) = beta
585 h( k+2, k ) = zero
586 h( k+3, k ) = zero
587 ELSE
588
589
590
591
592
593
594
595 CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
596 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
597 $ vt )
598 alpha = vt( 1 )
599 CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
600 refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
601 $ h( k+2, k ) )
602
603 IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
604 $ abs( refsum*vt( 3 ) ).GT.ulp*
605 $ ( abs( h( k, k ) )+abs( h( k+1,
606 $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
607
608
609
610
611
612 h( k+1, k ) = beta
613 h( k+2, k ) = zero
614 h( k+3, k ) = zero
615 ELSE
616
617
618
619
620
621
622 h( k+1, k ) = h( k+1, k ) - refsum
623 h( k+2, k ) = zero
624 h( k+3, k ) = zero
625 v( 1, m ) = vt( 1 )
626 v( 2, m ) = vt( 2 )
627 v( 3, m ) = vt( 3 )
628 END IF
629 END IF
630 END IF
631
632
633
634
635
636
637
638 t1 = v( 1, m )
639 t2 = t1*v( 2, m )
640 t3 = t1*v( 3, m )
641 DO 70 j = jtop, min( kbot, k+3 )
642 refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
643 $ + v( 3, m )*h( j, k+3 )
644 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
645 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
646 h( j, k+3 ) = h( j, k+3 ) - refsum*t3
647 70 CONTINUE
648
649
650
651
652 refsum = h( k+1, k+1 ) + v( 2, m )*h( k+2, k+1 )
653 $ + v( 3, m )*h( k+3, k+1 )
654 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
655 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
656 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
657
658
659
660
661
662
663
664
665
666
667 IF( k.LT.ktop)
668 $ cycle
669 IF( h( k+1, k ).NE.zero ) THEN
670 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
671 IF( tst1.EQ.zero ) THEN
672 IF( k.GE.ktop+1 )
673 $ tst1 = tst1 + abs( h( k, k-1 ) )
674 IF( k.GE.ktop+2 )
675 $ tst1 = tst1 + abs( h( k, k-2 ) )
676 IF( k.GE.ktop+3 )
677 $ tst1 = tst1 + abs( h( k, k-3 ) )
678 IF( k.LE.kbot-2 )
679 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
680 IF( k.LE.kbot-3 )
681 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
682 IF( k.LE.kbot-4 )
683 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
684 END IF
685 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
686 $ THEN
687 h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
688 h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
689 h11 = max( abs( h( k+1, k+1 ) ),
690 $ abs( h( k, k )-h( k+1, k+1 ) ) )
691 h22 = min( abs( h( k+1, k+1 ) ),
692 $ abs( h( k, k )-h( k+1, k+1 ) ) )
693 scl = h11 + h12
694 tst2 = h22*( h11 / scl )
695
696 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
697 $ max( smlnum, ulp*tst2 ) ) THEN
698 h( k+1, k ) = zero
699 END IF
700 END IF
701 END IF
702 80 CONTINUE
703
704
705
706 IF( accum ) THEN
707 jbot = min( ndcol, kbot )
708 ELSE IF( wantt ) THEN
709 jbot = n
710 ELSE
711 jbot = kbot
712 END IF
713
714 DO 100 m = mbot, mtop, -1
715 k = krcol + 2*( m-1 )
716 t1 = v( 1, m )
717 t2 = t1*v( 2, m )
718 t3 = t1*v( 3, m )
719 DO 90 j = max( ktop, krcol + 2*m ), jbot
720 refsum = h( k+1, j ) + v( 2, m )*h( k+2, j )
721 $ + v( 3, m )*h( k+3, j )
722 h( k+1, j ) = h( k+1, j ) - refsum*t1
723 h( k+2, j ) = h( k+2, j ) - refsum*t2
724 h( k+3, j ) = h( k+3, j ) - refsum*t3
725 90 CONTINUE
726 100 CONTINUE
727
728
729
730 IF( accum ) THEN
731
732
733
734
735
736 DO 120 m = mbot, mtop, -1
737 k = krcol + 2*( m-1 )
738 kms = k - incol
739 i2 = max( 1, ktop-incol )
740 i2 = max( i2, kms-(krcol-incol)+1 )
741 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
742 t1 = v( 1, m )
743 t2 = t1*v( 2, m )
744 t3 = t1*v( 3, m )
745 DO 110 j = i2, i4
746 refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
747 $ + v( 3, m )*u( j, kms+3 )
748 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
749 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
750 u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
751 110 CONTINUE
752 120 CONTINUE
753 ELSE IF( wantz ) THEN
754
755
756
757
758
759 DO 140 m = mbot, mtop, -1
760 k = krcol + 2*( m-1 )
761 t1 = v( 1, m )
762 t2 = t1*v( 2, m )
763 t3 = t1*v( 3, m )
764 DO 130 j = iloz, ihiz
765 refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
766 $ + v( 3, m )*z( j, k+3 )
767 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
768 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
769 z( j, k+3 ) = z( j, k+3 ) - refsum*t3
770 130 CONTINUE
771 140 CONTINUE
772 END IF
773
774
775
776 145 CONTINUE
777
778
779
780
781
782 IF( accum ) THEN
783 IF( wantt ) THEN
784 jtop = 1
785 jbot = n
786 ELSE
787 jtop = ktop
788 jbot = kbot
789 END IF
790 k1 = max( 1, ktop-incol )
791 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
792
793
794
795 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
796 jlen = min( nh, jbot-jcol+1 )
797 CALL sgemm(
'C',
'N', nu, jlen, nu, one, u( k1, k1 ),
798 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
799 $ ldwh )
800 CALL slacpy(
'ALL', nu, jlen, wh, ldwh,
801 $ h( incol+k1, jcol ), ldh )
802 150 CONTINUE
803
804
805
806 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
807 jlen = min( nv, max( ktop, incol )-jrow )
808 CALL sgemm(
'N',
'N', jlen, nu, nu, one,
809 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
810 $ ldu, zero, wv, ldwv )
811 CALL slacpy(
'ALL', jlen, nu, wv, ldwv,
812 $ h( jrow, incol+k1 ), ldh )
813 160 CONTINUE
814
815
816
817 IF( wantz ) THEN
818 DO 170 jrow = iloz, ihiz, nv
819 jlen = min( nv, ihiz-jrow+1 )
820 CALL sgemm(
'N',
'N', jlen, nu, nu, one,
821 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
822 $ ldu, zero, wv, ldwv )
823 CALL slacpy(
'ALL', jlen, nu, wv, ldwv,
824 $ z( jrow, incol+k1 ), ldz )
825 170 CONTINUE
826 END IF
827 END IF
828 180 CONTINUE
829
830
831
subroutine slabad(SMALL, LARGE)
SLABAD
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
subroutine slarfg(N, ALPHA, X, INCX, TAU)
SLARFG generates an elementary reflector (Householder matrix).
subroutine slaqr1(N, H, LDH, SR1, SI1, SR2, SI2, V)
SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
subroutine strmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
STRMM
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
real function slamch(CMACH)
SLAMCH