229
230
231
232
233
234 IMPLICIT NONE
235
236
237 CHARACTER COMPQ, COMPZ
238 INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
239
240
241 DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
242 $ Z( LDZ, * ), WORK( * )
243
244
245
246
247
248 DOUBLE PRECISION ZERO, ONE
249 parameter( zero = 0.0d+0, one = 1.0d+0 )
250
251
252 LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
253 CHARACTER*1 COMPQ2, COMPZ2
254 INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
255 $ KACC22, LEN, LWKOPT, N2NB, NB, NBLST, NBMIN,
256 $ NH, NNB, NX, PPW, PPWO, PW, TOP, TOPQ
257 DOUBLE PRECISION C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
258
259
260 LOGICAL LSAME
261 INTEGER ILAENV
263
264
268
269
270 INTRINSIC dble, max
271
272
273
274
275
276 info = 0
277 nb =
ilaenv( 1,
'DGGHD3',
' ', n, ilo, ihi, -1 )
278 nh = ihi - ilo + 1
279 IF( nh.LE.1 ) THEN
280 lwkopt = 1
281 ELSE
282 lwkopt = 6*n*nb
283 END IF
284 work( 1 ) = dble( lwkopt )
285 initq =
lsame( compq,
'I' )
286 wantq = initq .OR.
lsame( compq,
'V' )
287 initz =
lsame( compz,
'I' )
288 wantz = initz .OR.
lsame( compz,
'V' )
289 lquery = ( lwork.EQ.-1 )
290
291 IF( .NOT.
lsame( compq,
'N' ) .AND. .NOT.wantq )
THEN
292 info = -1
293 ELSE IF( .NOT.
lsame( compz,
'N' ) .AND. .NOT.wantz )
THEN
294 info = -2
295 ELSE IF( n.LT.0 ) THEN
296 info = -3
297 ELSE IF( ilo.LT.1 ) THEN
298 info = -4
299 ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN
300 info = -5
301 ELSE IF( lda.LT.max( 1, n ) ) THEN
302 info = -7
303 ELSE IF( ldb.LT.max( 1, n ) ) THEN
304 info = -9
305 ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN
306 info = -11
307 ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN
308 info = -13
309 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
310 info = -15
311 END IF
312 IF( info.NE.0 ) THEN
313 CALL xerbla(
'DGGHD3', -info )
314 RETURN
315 ELSE IF( lquery ) THEN
316 RETURN
317 END IF
318
319
320
321 IF( initq )
322 $
CALL dlaset(
'All', n, n, zero, one, q, ldq )
323 IF( initz )
324 $
CALL dlaset(
'All', n, n, zero, one, z, ldz )
325
326
327
328 IF( n.GT.1 )
329 $
CALL dlaset(
'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )
330
331
332
333 IF( nh.LE.1 ) THEN
334 work( 1 ) = one
335 RETURN
336 END IF
337
338
339
340 nbmin =
ilaenv( 2,
'DGGHD3',
' ', n, ilo, ihi, -1 )
341 IF( nb.GT.1 .AND. nb.LT.nh ) THEN
342
343
344
345 nx = max( nb,
ilaenv( 3,
'DGGHD3',
' ', n, ilo, ihi, -1 ) )
346 IF( nx.LT.nh ) THEN
347
348
349
350 IF( lwork.LT.lwkopt ) THEN
351
352
353
354
355
356 nbmin = max( 2,
ilaenv( 2,
'DGGHD3',
' ', n, ilo, ihi,
357 $ -1 ) )
358 IF( lwork.GE.6*n*nbmin ) THEN
359 nb = lwork / ( 6*n )
360 ELSE
361 nb = 1
362 END IF
363 END IF
364 END IF
365 END IF
366
367 IF( nb.LT.nbmin .OR. nb.GE.nh ) THEN
368
369
370
371 jcol = ilo
372
373 ELSE
374
375
376
377 kacc22 =
ilaenv( 16,
'DGGHD3',
' ', n, ilo, ihi, -1 )
378 blk22 = kacc22.EQ.2
379 DO jcol = ilo, ihi-2, nb
380 nnb = min( nb, ihi-jcol-1 )
381
382
383
384
385
386
387
388 n2nb = ( ihi-jcol-1 ) / nnb - 1
389 nblst = ihi - jcol - n2nb*nnb
390 CALL dlaset(
'All', nblst, nblst, zero, one, work,
391 $ nblst )
392 pw = nblst * nblst + 1
393 DO i = 1, n2nb
394 CALL dlaset(
'All', 2*nnb, 2*nnb, zero, one,
395 $ work( pw ), 2*nnb )
396 pw = pw + 4*nnb*nnb
397 END DO
398
399
400
401 DO j = jcol, jcol+nnb-1
402
403
404
405
406 DO i = ihi, j+2, -1
407 temp = a( i-1, j )
408 CALL dlartg( temp, a( i, j ), c, s, a( i-1, j ) )
409 a( i, j ) = c
410 b( i, j ) = s
411 END DO
412
413
414
415 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
416 len = 2 + j - jcol
417 jrow = j + n2nb*nnb + 2
418 DO i = ihi, jrow, -1
419 c = a( i, j )
420 s = b( i, j )
421 DO jj = ppw, ppw+len-1
422 temp = work( jj + nblst )
423 work( jj + nblst ) = c*temp - s*work( jj )
424 work( jj ) = s*temp + c*work( jj )
425 END DO
426 len = len + 1
427 ppw = ppw - nblst - 1
428 END DO
429
430 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
431 j0 = jrow - nnb
432 DO jrow = j0, j+2, -nnb
433 ppw = ppwo
434 len = 2 + j - jcol
435 DO i = jrow+nnb-1, jrow, -1
436 c = a( i, j )
437 s = b( i, j )
438 DO jj = ppw, ppw+len-1
439 temp = work( jj + 2*nnb )
440 work( jj + 2*nnb ) = c*temp - s*work( jj )
441 work( jj ) = s*temp + c*work( jj )
442 END DO
443 len = len + 1
444 ppw = ppw - 2*nnb - 1
445 END DO
446 ppwo = ppwo + 4*nnb*nnb
447 END DO
448
449
450
451
452 IF( jcol.LE.2 ) THEN
453 top = 0
454 ELSE
455 top = jcol
456 END IF
457
458
459
460
461 DO jj = n, j+1, -1
462
463
464
465 DO i = min( jj+1, ihi ), j+2, -1
466 c = a( i, j )
467 s = b( i, j )
468 temp = b( i, jj )
469 b( i, jj ) = c*temp - s*b( i-1, jj )
470 b( i-1, jj ) = s*temp + c*b( i-1, jj )
471 END DO
472
473
474
475 IF( jj.LT.ihi ) THEN
476 temp = b( jj+1, jj+1 )
477 CALL dlartg( temp, b( jj+1, jj ), c, s,
478 $ b( jj+1, jj+1 ) )
479 b( jj+1, jj ) = zero
480 CALL drot( jj-top, b( top+1, jj+1 ), 1,
481 $ b( top+1, jj ), 1, c, s )
482 a( jj+1, j ) = c
483 b( jj+1, j ) = -s
484 END IF
485 END DO
486
487
488
489
490
491
492
493
494 jj = mod( ihi-j-1, 3 )
495 DO i = ihi-j-3, jj+1, -3
496 c = a( j+1+i, j )
497 s = -b( j+1+i, j )
498 c1 = a( j+2+i, j )
499 s1 = -b( j+2+i, j )
500 c2 = a( j+3+i, j )
501 s2 = -b( j+3+i, j )
502
503 DO k = top+1, ihi
504 temp = a( k, j+i )
505 temp1 = a( k, j+i+1 )
506 temp2 = a( k, j+i+2 )
507 temp3 = a( k, j+i+3 )
508 a( k, j+i+3 ) = c2*temp3 + s2*temp2
509 temp2 = -s2*temp3 + c2*temp2
510 a( k, j+i+2 ) = c1*temp2 + s1*temp1
511 temp1 = -s1*temp2 + c1*temp1
512 a( k, j+i+1 ) = c*temp1 + s*temp
513 a( k, j+i ) = -s*temp1 + c*temp
514 END DO
515 END DO
516
517 IF( jj.GT.0 ) THEN
518 DO i = jj, 1, -1
519 CALL drot( ihi-top, a( top+1, j+i+1 ), 1,
520 $ a( top+1, j+i ), 1, a( j+1+i, j ),
521 $ -b( j+1+i, j ) )
522 END DO
523 END IF
524
525
526
527 IF ( j .LT. jcol + nnb - 1 ) THEN
528 len = 1 + j - jcol
529
530
531
532
533
534
535
536
537
538
539
540 jrow = ihi - nblst + 1
541 CALL dgemv(
'Transpose', nblst, len, one, work,
542 $ nblst, a( jrow, j+1 ), 1, zero,
543 $ work( pw ), 1 )
544 ppw = pw + len
545 DO i = jrow, jrow+nblst-len-1
546 work( ppw ) = a( i, j+1 )
547 ppw = ppw + 1
548 END DO
549 CALL dtrmv(
'Lower',
'Transpose',
'Non-unit',
550 $ nblst-len, work( len*nblst + 1 ), nblst,
551 $ work( pw+len ), 1 )
552 CALL dgemv(
'Transpose', len, nblst-len, one,
553 $ work( (len+1)*nblst - len + 1 ), nblst,
554 $ a( jrow+nblst-len, j+1 ), 1, one,
555 $ work( pw+len ), 1 )
556 ppw = pw
557 DO i = jrow, jrow+nblst-1
558 a( i, j+1 ) = work( ppw )
559 ppw = ppw + 1
560 END DO
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575 ppwo = 1 + nblst*nblst
576 j0 = jrow - nnb
577 DO jrow = j0, jcol+1, -nnb
578 ppw = pw + len
579 DO i = jrow, jrow+nnb-1
580 work( ppw ) = a( i, j+1 )
581 ppw = ppw + 1
582 END DO
583 ppw = pw
584 DO i = jrow+nnb, jrow+nnb+len-1
585 work( ppw ) = a( i, j+1 )
586 ppw = ppw + 1
587 END DO
588 CALL dtrmv(
'Upper',
'Transpose',
'Non-unit',
589 $ len,
590 $ work( ppwo + nnb ), 2*nnb, work( pw ),
591 $ 1 )
592 CALL dtrmv(
'Lower',
'Transpose',
'Non-unit',
593 $ nnb,
594 $ work( ppwo + 2*len*nnb ),
595 $ 2*nnb, work( pw + len ), 1 )
596 CALL dgemv(
'Transpose', nnb, len, one,
597 $ work( ppwo ), 2*nnb, a( jrow, j+1 ), 1,
598 $ one, work( pw ), 1 )
599 CALL dgemv(
'Transpose', len, nnb, one,
600 $ work( ppwo + 2*len*nnb + nnb ), 2*nnb,
601 $ a( jrow+nnb, j+1 ), 1, one,
602 $ work( pw+len ), 1 )
603 ppw = pw
604 DO i = jrow, jrow+len+nnb-1
605 a( i, j+1 ) = work( ppw )
606 ppw = ppw + 1
607 END DO
608 ppwo = ppwo + 4*nnb*nnb
609 END DO
610 END IF
611 END DO
612
613
614
615 cola = n - jcol - nnb + 1
616 j = ihi - nblst + 1
617 CALL dgemm(
'Transpose',
'No Transpose', nblst,
618 $ cola, nblst, one, work, nblst,
619 $ a( j, jcol+nnb ), lda, zero, work( pw ),
620 $ nblst )
621 CALL dlacpy(
'All', nblst, cola, work( pw ), nblst,
622 $ a( j, jcol+nnb ), lda )
623 ppwo = nblst*nblst + 1
624 j0 = j - nnb
625 DO j = j0, jcol+1, -nnb
626 IF ( blk22 ) THEN
627
628
629
630
631
632
633
634
635
636
637 CALL dorm22(
'Left',
'Transpose', 2*nnb, cola, nnb,
638 $ nnb, work( ppwo ), 2*nnb,
639 $ a( j, jcol+nnb ), lda, work( pw ),
640 $ lwork-pw+1, ierr )
641 ELSE
642
643
644
645 CALL dgemm(
'Transpose',
'No Transpose', 2*nnb,
646 $ cola, 2*nnb, one, work( ppwo ), 2*nnb,
647 $ a( j, jcol+nnb ), lda, zero, work( pw ),
648 $ 2*nnb )
649 CALL dlacpy(
'All', 2*nnb, cola, work( pw ), 2*nnb,
650 $ a( j, jcol+nnb ), lda )
651 END IF
652 ppwo = ppwo + 4*nnb*nnb
653 END DO
654
655
656
657 IF( wantq ) THEN
658 j = ihi - nblst + 1
659 IF ( initq ) THEN
660 topq = max( 2, j - jcol + 1 )
661 nh = ihi - topq + 1
662 ELSE
663 topq = 1
664 nh = n
665 END IF
666 CALL dgemm(
'No Transpose',
'No Transpose', nh,
667 $ nblst, nblst, one, q( topq, j ), ldq,
668 $ work, nblst, zero, work( pw ), nh )
669 CALL dlacpy(
'All', nh, nblst, work( pw ), nh,
670 $ q( topq, j ), ldq )
671 ppwo = nblst*nblst + 1
672 j0 = j - nnb
673 DO j = j0, jcol+1, -nnb
674 IF ( initq ) THEN
675 topq = max( 2, j - jcol + 1 )
676 nh = ihi - topq + 1
677 END IF
678 IF ( blk22 ) THEN
679
680
681
682 CALL dorm22(
'Right',
'No Transpose', nh, 2*nnb,
683 $ nnb, nnb, work( ppwo ), 2*nnb,
684 $ q( topq, j ), ldq, work( pw ),
685 $ lwork-pw+1, ierr )
686 ELSE
687
688
689
690 CALL dgemm(
'No Transpose',
'No Transpose', nh,
691 $ 2*nnb, 2*nnb, one, q( topq, j ), ldq,
692 $ work( ppwo ), 2*nnb, zero, work( pw ),
693 $ nh )
694 CALL dlacpy(
'All', nh, 2*nnb, work( pw ), nh,
695 $ q( topq, j ), ldq )
696 END IF
697 ppwo = ppwo + 4*nnb*nnb
698 END DO
699 END IF
700
701
702
703 IF ( wantz .OR. top.GT.0 ) THEN
704
705
706
707
708 CALL dlaset(
'All', nblst, nblst, zero, one, work,
709 $ nblst )
710 pw = nblst * nblst + 1
711 DO i = 1, n2nb
712 CALL dlaset(
'All', 2*nnb, 2*nnb, zero, one,
713 $ work( pw ), 2*nnb )
714 pw = pw + 4*nnb*nnb
715 END DO
716
717
718
719 DO j = jcol, jcol+nnb-1
720 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
721 len = 2 + j - jcol
722 jrow = j + n2nb*nnb + 2
723 DO i = ihi, jrow, -1
724 c = a( i, j )
725 a( i, j ) = zero
726 s = b( i, j )
727 b( i, j ) = zero
728 DO jj = ppw, ppw+len-1
729 temp = work( jj + nblst )
730 work( jj + nblst ) = c*temp - s*work( jj )
731 work( jj ) = s*temp + c*work( jj )
732 END DO
733 len = len + 1
734 ppw = ppw - nblst - 1
735 END DO
736
737 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
738 j0 = jrow - nnb
739 DO jrow = j0, j+2, -nnb
740 ppw = ppwo
741 len = 2 + j - jcol
742 DO i = jrow+nnb-1, jrow, -1
743 c = a( i, j )
744 a( i, j ) = zero
745 s = b( i, j )
746 b( i, j ) = zero
747 DO jj = ppw, ppw+len-1
748 temp = work( jj + 2*nnb )
749 work( jj + 2*nnb ) = c*temp - s*work( jj )
750 work( jj ) = s*temp + c*work( jj )
751 END DO
752 len = len + 1
753 ppw = ppw - 2*nnb - 1
754 END DO
755 ppwo = ppwo + 4*nnb*nnb
756 END DO
757 END DO
758 ELSE
759
760 CALL dlaset(
'Lower', ihi - jcol - 1, nnb, zero, zero,
761 $ a( jcol + 2, jcol ), lda )
762 CALL dlaset(
'Lower', ihi - jcol - 1, nnb, zero, zero,
763 $ b( jcol + 2, jcol ), ldb )
764 END IF
765
766
767
768 IF ( top.GT.0 ) THEN
769 j = ihi - nblst + 1
770 CALL dgemm(
'No Transpose',
'No Transpose', top,
771 $ nblst, nblst, one, a( 1, j ), lda,
772 $ work, nblst, zero, work( pw ), top )
773 CALL dlacpy(
'All', top, nblst, work( pw ), top,
774 $ a( 1, j ), lda )
775 ppwo = nblst*nblst + 1
776 j0 = j - nnb
777 DO j = j0, jcol+1, -nnb
778 IF ( blk22 ) THEN
779
780
781
782 CALL dorm22(
'Right',
'No Transpose', top,
783 $ 2*nnb,
784 $ nnb, nnb, work( ppwo ), 2*nnb,
785 $ a( 1, j ), lda, work( pw ),
786 $ lwork-pw+1, ierr )
787 ELSE
788
789
790
791 CALL dgemm(
'No Transpose',
'No Transpose', top,
792 $ 2*nnb, 2*nnb, one, a( 1, j ), lda,
793 $ work( ppwo ), 2*nnb, zero,
794 $ work( pw ), top )
795 CALL dlacpy(
'All', top, 2*nnb, work( pw ), top,
796 $ a( 1, j ), lda )
797 END IF
798 ppwo = ppwo + 4*nnb*nnb
799 END DO
800
801 j = ihi - nblst + 1
802 CALL dgemm(
'No Transpose',
'No Transpose', top,
803 $ nblst, nblst, one, b( 1, j ), ldb,
804 $ work, nblst, zero, work( pw ), top )
805 CALL dlacpy(
'All', top, nblst, work( pw ), top,
806 $ b( 1, j ), ldb )
807 ppwo = nblst*nblst + 1
808 j0 = j - nnb
809 DO j = j0, jcol+1, -nnb
810 IF ( blk22 ) THEN
811
812
813
814 CALL dorm22(
'Right',
'No Transpose', top,
815 $ 2*nnb,
816 $ nnb, nnb, work( ppwo ), 2*nnb,
817 $ b( 1, j ), ldb, work( pw ),
818 $ lwork-pw+1, ierr )
819 ELSE
820
821
822
823 CALL dgemm(
'No Transpose',
'No Transpose', top,
824 $ 2*nnb, 2*nnb, one, b( 1, j ), ldb,
825 $ work( ppwo ), 2*nnb, zero,
826 $ work( pw ), top )
827 CALL dlacpy(
'All', top, 2*nnb, work( pw ), top,
828 $ b( 1, j ), ldb )
829 END IF
830 ppwo = ppwo + 4*nnb*nnb
831 END DO
832 END IF
833
834
835
836 IF( wantz ) THEN
837 j = ihi - nblst + 1
838 IF ( initq ) THEN
839 topq = max( 2, j - jcol + 1 )
840 nh = ihi - topq + 1
841 ELSE
842 topq = 1
843 nh = n
844 END IF
845 CALL dgemm(
'No Transpose',
'No Transpose', nh,
846 $ nblst, nblst, one, z( topq, j ), ldz,
847 $ work, nblst, zero, work( pw ), nh )
848 CALL dlacpy(
'All', nh, nblst, work( pw ), nh,
849 $ z( topq, j ), ldz )
850 ppwo = nblst*nblst + 1
851 j0 = j - nnb
852 DO j = j0, jcol+1, -nnb
853 IF ( initq ) THEN
854 topq = max( 2, j - jcol + 1 )
855 nh = ihi - topq + 1
856 END IF
857 IF ( blk22 ) THEN
858
859
860
861 CALL dorm22(
'Right',
'No Transpose', nh, 2*nnb,
862 $ nnb, nnb, work( ppwo ), 2*nnb,
863 $ z( topq, j ), ldz, work( pw ),
864 $ lwork-pw+1, ierr )
865 ELSE
866
867
868
869 CALL dgemm(
'No Transpose',
'No Transpose', nh,
870 $ 2*nnb, 2*nnb, one, z( topq, j ), ldz,
871 $ work( ppwo ), 2*nnb, zero, work( pw ),
872 $ nh )
873 CALL dlacpy(
'All', nh, 2*nnb, work( pw ), nh,
874 $ z( topq, j ), ldz )
875 END IF
876 ppwo = ppwo + 4*nnb*nnb
877 END DO
878 END IF
879 END DO
880 END IF
881
882
883
884
885 compq2 = compq
886 compz2 = compz
887 IF ( jcol.NE.ilo ) THEN
888 IF ( wantq )
889 $ compq2 = 'V'
890 IF ( wantz )
891 $ compz2 = 'V'
892 END IF
893
894 IF ( jcol.LT.ihi )
895 $
CALL dgghrd( compq2, compz2, n, jcol, ihi, a, lda, b, ldb,
896 $ q,
897 $ ldq, z, ldz, ierr )
898
899 work( 1 ) = dble( lwkopt )
900
901 RETURN
902
903
904
subroutine xerbla(srname, info)
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
subroutine dgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
DGEMV
subroutine dgghrd(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, info)
DGGHRD
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
subroutine dlartg(f, g, c, s, r)
DLARTG generates a plane rotation with real cosine and real sine.
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
logical function lsame(ca, cb)
LSAME
subroutine drot(n, dx, incx, dy, incy, c, s)
DROT
subroutine dtrmv(uplo, trans, diag, n, a, lda, x, incx)
DTRMV
subroutine dorm22(side, trans, m, n, n1, n2, q, ldq, c, ldc, work, lwork, info)
DORM22 multiplies a general matrix by a banded orthogonal matrix.