Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed chains of bulges in multishift QR algorithms. ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
256 IMPLICIT NONE
257
258
259
260
261
262
263 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
264 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
265 LOGICAL WANTT, WANTZ
266
267
268 COMPLEX H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
269 $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
270
271
272
273
274 COMPLEX ZERO, ONE
275 parameter( zero = ( 0.0e0, 0.0e0 ),
276 $ one = ( 1.0e0, 0.0e0 ) )
277 REAL RZERO, RONE
278 parameter( rzero = 0.0e0, rone = 1.0e0 )
279
280
281 COMPLEX ALPHA, BETA, CDUM, REFSUM, T1, T2, T3
282 REAL H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
283 $ SMLNUM, TST1, TST2, ULP
284 INTEGER I2, I4, INCOL, J, JBOT, JCOL, JLEN,
285 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
286 $ M, M22, MBOT, MTOP, NBMPS, NDCOL,
287 $ NS, NU
288 LOGICAL ACCUM, BMP22
289
290
291 REAL SLAMCH
293
294
295
296 INTRINSIC abs, aimag, conjg, max, min, mod, real
297
298
299 COMPLEX VT( 3 )
300
301
304
305
306 REAL CABS1
307
308
309 cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) )
310
311
312
313
314
315 IF( nshfts.LT.2 )
316 $ RETURN
317
318
319
320
321 IF( ktop.GE.kbot )
322 $ RETURN
323
324
325
326
327 ns = nshfts - mod( nshfts, 2 )
328
329
330
331 safmin =
slamch(
'SAFE MINIMUM' )
332 safmax = rone / safmin
333 ulp =
slamch(
'PRECISION' )
334 smlnum = safmin*( real( n ) / ulp )
335
336
337
338
339 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
340
341
342
343 IF( ktop+2.LE.kbot )
344 $ h( ktop+2, ktop ) = zero
345
346
347
348 nbmps = ns / 2
349
350
351
352 kdu = 4*nbmps
353
354
355
356 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
357
358
359
360 IF( accum ) THEN
361 jtop = max( ktop, incol )
362 ELSE IF( wantt ) THEN
363 jtop = 1
364 ELSE
365 jtop = ktop
366 END IF
367
368 ndcol = incol + kdu
369 IF( accum )
370 $
CALL claset(
'ALL', kdu, kdu, zero, one, u, ldu )
371
372
373
374
375
376
377
378
379
380
381
382
383
384 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
385
386
387
388
389
390
391
392
393 mtop = max( 1, ( ktop-krcol ) / 2+1 )
394 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
395 m22 = mbot + 1
396 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
397 $ ( kbot-2 )
398
399
400
401
402 IF ( bmp22 ) THEN
403
404
405
406
407 k = krcol + 2*( m22-1 )
408 IF( k.EQ.ktop-1 ) THEN
409 CALL claqr1( 2, h( k+1, k+1 ), ldh, s( 2*m22-1 ),
410 $ s( 2*m22 ), v( 1, m22 ) )
411 beta = v( 1, m22 )
412 CALL clarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
413 ELSE
414 beta = h( k+1, k )
415 v( 2, m22 ) = h( k+2, k )
416 CALL clarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
417 h( k+1, k ) = beta
418 h( k+2, k ) = zero
419 END IF
420
421
422
423
424
425 t1 = v( 1, m22 )
426 t2 = t1*conjg( v( 2, m22 ) )
427 DO 30 j = jtop, min( kbot, k+3 )
428 refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
429 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
430 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
431 30 CONTINUE
432
433
434
435
436 IF( accum ) THEN
437 jbot = min( ndcol, kbot )
438 ELSE IF( wantt ) THEN
439 jbot = n
440 ELSE
441 jbot = kbot
442 END IF
443 t1 = conjg( v( 1, m22 ) )
444 t2 = t1*v( 2, m22 )
445 DO 40 j = k+1, jbot
446 refsum = h( k+1, j ) +
447 $ conjg( v( 2, m22 ) )*h( k+2, j )
448 h( k+1, j ) = h( k+1, j ) - refsum*t1
449 h( k+2, j ) = h( k+2, j ) - refsum*t2
450 40 CONTINUE
451
452
453
454
455
456
457
458
459
460
461 IF( k.GE.ktop) THEN
462 IF( h( k+1, k ).NE.zero ) THEN
463 tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
464 IF( tst1.EQ.rzero ) THEN
465 IF( k.GE.ktop+1 )
466 $ tst1 = tst1 + cabs1( h( k, k-1 ) )
467 IF( k.GE.ktop+2 )
468 $ tst1 = tst1 + cabs1( h( k, k-2 ) )
469 IF( k.GE.ktop+3 )
470 $ tst1 = tst1 + cabs1( h( k, k-3 ) )
471 IF( k.LE.kbot-2 )
472 $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
473 IF( k.LE.kbot-3 )
474 $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
475 IF( k.LE.kbot-4 )
476 $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
477 END IF
478 IF( cabs1( h( k+1, k ) )
479 $ .LE.max( smlnum, ulp*tst1 ) ) THEN
480 h12 = max( cabs1( h( k+1, k ) ),
481 $ cabs1( h( k, k+1 ) ) )
482 h21 = min( cabs1( h( k+1, k ) ),
483 $ cabs1( h( k, k+1 ) ) )
484 h11 = max( cabs1( h( k+1, k+1 ) ),
485 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
486 h22 = min( cabs1( h( k+1, k+1 ) ),
487 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
488 scl = h11 + h12
489 tst2 = h22*( h11 / scl )
490
491 IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
492 $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
493 END IF
494 END IF
495 END IF
496
497
498
499 IF( accum ) THEN
500 kms = k - incol
501 DO 50 j = max( 1, ktop-incol ), kdu
502 refsum = v( 1, m22 )*( u( j, kms+1 )+
503 $ v( 2, m22 )*u( j, kms+2 ) )
504 u( j, kms+1 ) = u( j, kms+1 ) - refsum
505 u( j, kms+2 ) = u( j, kms+2 ) -
506 $ refsum*conjg( v( 2, m22 ) )
507 50 CONTINUE
508 ELSE IF( wantz ) THEN
509 DO 60 j = iloz, ihiz
510 refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
511 $ z( j, k+2 ) )
512 z( j, k+1 ) = z( j, k+1 ) - refsum
513 z( j, k+2 ) = z( j, k+2 ) -
514 $ refsum*conjg( v( 2, m22 ) )
515 60 CONTINUE
516 END IF
517 END IF
518
519
520
521 DO 80 m = mbot, mtop, -1
522 k = krcol + 2*( m-1 )
523 IF( k.EQ.ktop-1 ) THEN
524 CALL claqr1( 3, h( ktop, ktop ), ldh, s( 2*m-1 ),
525 $ s( 2*m ), v( 1, m ) )
526 alpha = v( 1, m )
527 CALL clarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
528 ELSE
529
530
531
532
533
534 t1 = v( 1, m )
535 t2 = t1*conjg( v( 2, m ) )
536 t3 = t1*conjg( v( 3, m ) )
537 refsum = v( 3, m )*h( k+3, k+2 )
538 h( k+3, k ) = -refsum*t1
539 h( k+3, k+1 ) = -refsum*t2
540 h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*t3
541
542
543
544
545 beta = h( k+1, k )
546 v( 2, m ) = h( k+2, k )
547 v( 3, m ) = h( k+3, k )
548 CALL clarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
549
550
551
552
553
554
555 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
556 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
557
558
559
560 h( k+1, k ) = beta
561 h( k+2, k ) = zero
562 h( k+3, k ) = zero
563 ELSE
564
565
566
567
568
569
570
571 CALL claqr1( 3, h( k+1, k+1 ), ldh, s( 2*m-1 ),
572 $ s( 2*m ), vt )
573 alpha = vt( 1 )
574 CALL clarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
575 t1 = conjg( vt( 1 ) )
576 t2 = t1*vt( 2 )
577 t3 = t1*vt( 3 )
578 refsum = h( k+1, k )+conjg( vt( 2 ) )*h( k+2, k )
579
580 IF( cabs1( h( k+2, k )-refsum*t2 )+
581 $ cabs1( refsum*t3 ).GT.ulp*
582 $ ( cabs1( h( k, k ) )+cabs1( h( k+1,
583 $ k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) THEN
584
585
586
587
588
589 h( k+1, k ) = beta
590 h( k+2, k ) = zero
591 h( k+3, k ) = zero
592 ELSE
593
594
595
596
597
598
599 h( k+1, k ) = h( k+1, k ) - refsum*t1
600 h( k+2, k ) = zero
601 h( k+3, k ) = zero
602 v( 1, m ) = vt( 1 )
603 v( 2, m ) = vt( 2 )
604 v( 3, m ) = vt( 3 )
605 END IF
606 END IF
607 END IF
608
609
610
611
612
613
614
615 t1 = v( 1, m )
616 t2 = t1*conjg( v( 2, m ) )
617 t3 = t1*conjg( v( 3, m ) )
618 DO 70 j = jtop, min( kbot, k+3 )
619 refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
620 $ + v( 3, m )*h( j, k+3 )
621 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
622 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
623 h( j, k+3 ) = h( j, k+3 ) - refsum*t3
624 70 CONTINUE
625
626
627
628
629 t1 = conjg( v( 1, m ) )
630 t2 = t1*v( 2, m )
631 t3 = t1*v( 3, m )
632 refsum = h( k+1, k+1 ) + conjg( v( 2, m ) )*h( k+2, k+1 )
633 $ + conjg( v( 3, m ) )*h( k+3, k+1 )
634 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
635 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
636 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
637
638
639
640
641
642
643
644
645
646
647 IF( k.LT.ktop)
648 $ cycle
649 IF( h( k+1, k ).NE.zero ) THEN
650 tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
651 IF( tst1.EQ.rzero ) THEN
652 IF( k.GE.ktop+1 )
653 $ tst1 = tst1 + cabs1( h( k, k-1 ) )
654 IF( k.GE.ktop+2 )
655 $ tst1 = tst1 + cabs1( h( k, k-2 ) )
656 IF( k.GE.ktop+3 )
657 $ tst1 = tst1 + cabs1( h( k, k-3 ) )
658 IF( k.LE.kbot-2 )
659 $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
660 IF( k.LE.kbot-3 )
661 $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
662 IF( k.LE.kbot-4 )
663 $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
664 END IF
665 IF( cabs1( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
666 $ THEN
667 h12 = max( cabs1( h( k+1, k ) ),
668 $ cabs1( h( k, k+1 ) ) )
669 h21 = min( cabs1( h( k+1, k ) ),
670 $ cabs1( h( k, k+1 ) ) )
671 h11 = max( cabs1( h( k+1, k+1 ) ),
672 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
673 h22 = min( cabs1( h( k+1, k+1 ) ),
674 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
675 scl = h11 + h12
676 tst2 = h22*( h11 / scl )
677
678 IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
679 $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
680 END IF
681 END IF
682 80 CONTINUE
683
684
685
686 IF( accum ) THEN
687 jbot = min( ndcol, kbot )
688 ELSE IF( wantt ) THEN
689 jbot = n
690 ELSE
691 jbot = kbot
692 END IF
693
694 DO 100 m = mbot, mtop, -1
695 k = krcol + 2*( m-1 )
696 t1 = conjg( v( 1, m ) )
697 t2 = t1*v( 2, m )
698 t3 = t1*v( 3, m )
699 DO 90 j = max( ktop, krcol + 2*m ), jbot
700 refsum = h( k+1, j ) + conjg( v( 2, m ) )*
701 $ h( k+2, j ) + conjg( v( 3, m ) )*h( k+3, j )
702 h( k+1, j ) = h( k+1, j ) - refsum*t1
703 h( k+2, j ) = h( k+2, j ) - refsum*t2
704 h( k+3, j ) = h( k+3, j ) - refsum*t3
705 90 CONTINUE
706 100 CONTINUE
707
708
709
710 IF( accum ) THEN
711
712
713
714
715
716 DO 120 m = mbot, mtop, -1
717 k = krcol + 2*( m-1 )
718 kms = k - incol
719 i2 = max( 1, ktop-incol )
720 i2 = max( i2, kms-(krcol-incol)+1 )
721 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
722 t1 = v( 1, m )
723 t2 = t1*conjg( v( 2, m ) )
724 t3 = t1*conjg( v( 3, m ) )
725 DO 110 j = i2, i4
726 refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
727 $ + v( 3, m )*u( j, kms+3 )
728 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
729 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
730 u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
731 110 CONTINUE
732 120 CONTINUE
733 ELSE IF( wantz ) THEN
734
735
736
737
738
739 DO 140 m = mbot, mtop, -1
740 k = krcol + 2*( m-1 )
741 t1 = v( 1, m )
742 t2 = t1*conjg( v( 2, m ) )
743 t3 = t1*conjg( v( 3, m ) )
744 DO 130 j = iloz, ihiz
745 refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
746 $ + v( 3, m )*z( j, k+3 )
747 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
748 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
749 z( j, k+3 ) = z( j, k+3 ) - refsum*t3
750 130 CONTINUE
751 140 CONTINUE
752 END IF
753
754
755
756 145 CONTINUE
757
758
759
760
761
762 IF( accum ) THEN
763 IF( wantt ) THEN
764 jtop = 1
765 jbot = n
766 ELSE
767 jtop = ktop
768 jbot = kbot
769 END IF
770 k1 = max( 1, ktop-incol )
771 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
772
773
774
775 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
776 jlen = min( nh, jbot-jcol+1 )
777 CALL cgemm(
'C',
'N', nu, jlen, nu, one, u( k1, k1 ),
778 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
779 $ ldwh )
780 CALL clacpy(
'ALL', nu, jlen, wh, ldwh,
781 $ h( incol+k1, jcol ), ldh )
782 150 CONTINUE
783
784
785
786 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
787 jlen = min( nv, max( ktop, incol )-jrow )
788 CALL cgemm(
'N',
'N', jlen, nu, nu, one,
789 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
790 $ ldu, zero, wv, ldwv )
791 CALL clacpy(
'ALL', jlen, nu, wv, ldwv,
792 $ h( jrow, incol+k1 ), ldh )
793 160 CONTINUE
794
795
796
797 IF( wantz ) THEN
798 DO 170 jrow = iloz, ihiz, nv
799 jlen = min( nv, ihiz-jrow+1 )
800 CALL cgemm(
'N',
'N', jlen, nu, nu, one,
801 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
802 $ ldu, zero, wv, ldwv )
803 CALL clacpy(
'ALL', jlen, nu, wv, ldwv,
804 $ z( jrow, incol+k1 ), ldz )
805 170 CONTINUE
806 END IF
807 END IF
808 180 CONTINUE
809
810
811
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
subroutine clacpy(uplo, m, n, a, lda, b, ldb)
CLACPY copies all or part of one two-dimensional array to another.
real function slamch(cmach)
SLAMCH
subroutine claqr1(n, h, ldh, s1, s2, v)
CLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
subroutine clarfg(n, alpha, x, incx, tau)
CLARFG generates an elementary reflector (Householder matrix).
subroutine claset(uplo, m, n, alpha, beta, a, lda)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
subroutine ctrmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
CTRMM