230
231
232
233
234
235 IMPLICIT NONE
236
237
238 CHARACTER COMPQ, COMPZ
239 INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
240
241
242 DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
243 $ Z( LDZ, * ), WORK( * )
244
245
246
247
248
249 DOUBLE PRECISION ZERO, ONE
250 parameter( zero = 0.0d+0, one = 1.0d+0 )
251
252
253 LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
254 CHARACTER*1 COMPQ2, COMPZ2
255 INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
256 $ KACC22, LEN, LWKOPT, N2NB, NB, NBLST, NBMIN,
257 $ NH, NNB, NX, PPW, PPWO, PW, TOP, TOPQ
258 DOUBLE PRECISION C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
259
260
261 LOGICAL LSAME
262 INTEGER ILAENV
264
265
268
269
270 INTRINSIC dble, max
271
272
273
274
275
276 info = 0
277 nb =
ilaenv( 1,
'DGGHD3',
' ', n, ilo, ihi, -1 )
278 lwkopt = max( 6*n*nb, 1 )
279 work( 1 ) = dble( lwkopt )
280 initq =
lsame( compq,
'I' )
281 wantq = initq .OR.
lsame( compq,
'V' )
282 initz =
lsame( compz,
'I' )
283 wantz = initz .OR.
lsame( compz,
'V' )
284 lquery = ( lwork.EQ.-1 )
285
286 IF( .NOT.
lsame( compq,
'N' ) .AND. .NOT.wantq )
THEN
287 info = -1
288 ELSE IF( .NOT.
lsame( compz,
'N' ) .AND. .NOT.wantz )
THEN
289 info = -2
290 ELSE IF( n.LT.0 ) THEN
291 info = -3
292 ELSE IF( ilo.LT.1 ) THEN
293 info = -4
294 ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN
295 info = -5
296 ELSE IF( lda.LT.max( 1, n ) ) THEN
297 info = -7
298 ELSE IF( ldb.LT.max( 1, n ) ) THEN
299 info = -9
300 ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN
301 info = -11
302 ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN
303 info = -13
304 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
305 info = -15
306 END IF
307 IF( info.NE.0 ) THEN
308 CALL xerbla(
'DGGHD3', -info )
309 RETURN
310 ELSE IF( lquery ) THEN
311 RETURN
312 END IF
313
314
315
316 IF( initq )
317 $
CALL dlaset(
'All', n, n, zero, one, q, ldq )
318 IF( initz )
319 $
CALL dlaset(
'All', n, n, zero, one, z, ldz )
320
321
322
323 IF( n.GT.1 )
324 $
CALL dlaset(
'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )
325
326
327
328 nh = ihi - ilo + 1
329 IF( nh.LE.1 ) THEN
330 work( 1 ) = one
331 RETURN
332 END IF
333
334
335
336 nbmin =
ilaenv( 2,
'DGGHD3',
' ', n, ilo, ihi, -1 )
337 IF( nb.GT.1 .AND. nb.LT.nh ) THEN
338
339
340
341 nx = max( nb,
ilaenv( 3,
'DGGHD3',
' ', n, ilo, ihi, -1 ) )
342 IF( nx.LT.nh ) THEN
343
344
345
346 IF( lwork.LT.lwkopt ) THEN
347
348
349
350
351
352 nbmin = max( 2,
ilaenv( 2,
'DGGHD3',
' ', n, ilo, ihi,
353 $ -1 ) )
354 IF( lwork.GE.6*n*nbmin ) THEN
355 nb = lwork / ( 6*n )
356 ELSE
357 nb = 1
358 END IF
359 END IF
360 END IF
361 END IF
362
363 IF( nb.LT.nbmin .OR. nb.GE.nh ) THEN
364
365
366
367 jcol = ilo
368
369 ELSE
370
371
372
373 kacc22 =
ilaenv( 16,
'DGGHD3',
' ', n, ilo, ihi, -1 )
374 blk22 = kacc22.EQ.2
375 DO jcol = ilo, ihi-2, nb
376 nnb = min( nb, ihi-jcol-1 )
377
378
379
380
381
382
383
384 n2nb = ( ihi-jcol-1 ) / nnb - 1
385 nblst = ihi - jcol - n2nb*nnb
386 CALL dlaset(
'All', nblst, nblst, zero, one, work, nblst )
387 pw = nblst * nblst + 1
388 DO i = 1, n2nb
389 CALL dlaset(
'All', 2*nnb, 2*nnb, zero, one,
390 $ work( pw ), 2*nnb )
391 pw = pw + 4*nnb*nnb
392 END DO
393
394
395
396 DO j = jcol, jcol+nnb-1
397
398
399
400
401 DO i = ihi, j+2, -1
402 temp = a( i-1, j )
403 CALL dlartg( temp, a( i, j ), c, s, a( i-1, j ) )
404 a( i, j ) = c
405 b( i, j ) = s
406 END DO
407
408
409
410 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
411 len = 2 + j - jcol
412 jrow = j + n2nb*nnb + 2
413 DO i = ihi, jrow, -1
414 c = a( i, j )
415 s = b( i, j )
416 DO jj = ppw, ppw+len-1
417 temp = work( jj + nblst )
418 work( jj + nblst ) = c*temp - s*work( jj )
419 work( jj ) = s*temp + c*work( jj )
420 END DO
421 len = len + 1
422 ppw = ppw - nblst - 1
423 END DO
424
425 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
426 j0 = jrow - nnb
427 DO jrow = j0, j+2, -nnb
428 ppw = ppwo
429 len = 2 + j - jcol
430 DO i = jrow+nnb-1, jrow, -1
431 c = a( i, j )
432 s = b( i, j )
433 DO jj = ppw, ppw+len-1
434 temp = work( jj + 2*nnb )
435 work( jj + 2*nnb ) = c*temp - s*work( jj )
436 work( jj ) = s*temp + c*work( jj )
437 END DO
438 len = len + 1
439 ppw = ppw - 2*nnb - 1
440 END DO
441 ppwo = ppwo + 4*nnb*nnb
442 END DO
443
444
445
446
447 IF( jcol.LE.2 ) THEN
448 top = 0
449 ELSE
450 top = jcol
451 END IF
452
453
454
455
456 DO jj = n, j+1, -1
457
458
459
460 DO i = min( jj+1, ihi ), j+2, -1
461 c = a( i, j )
462 s = b( i, j )
463 temp = b( i, jj )
464 b( i, jj ) = c*temp - s*b( i-1, jj )
465 b( i-1, jj ) = s*temp + c*b( i-1, jj )
466 END DO
467
468
469
470 IF( jj.LT.ihi ) THEN
471 temp = b( jj+1, jj+1 )
472 CALL dlartg( temp, b( jj+1, jj ), c, s,
473 $ b( jj+1, jj+1 ) )
474 b( jj+1, jj ) = zero
475 CALL drot( jj-top, b( top+1, jj+1 ), 1,
476 $ b( top+1, jj ), 1, c, s )
477 a( jj+1, j ) = c
478 b( jj+1, j ) = -s
479 END IF
480 END DO
481
482
483
484
485
486
487
488
489 jj = mod( ihi-j-1, 3 )
490 DO i = ihi-j-3, jj+1, -3
491 c = a( j+1+i, j )
492 s = -b( j+1+i, j )
493 c1 = a( j+2+i, j )
494 s1 = -b( j+2+i, j )
495 c2 = a( j+3+i, j )
496 s2 = -b( j+3+i, j )
497
498 DO k = top+1, ihi
499 temp = a( k, j+i )
500 temp1 = a( k, j+i+1 )
501 temp2 = a( k, j+i+2 )
502 temp3 = a( k, j+i+3 )
503 a( k, j+i+3 ) = c2*temp3 + s2*temp2
504 temp2 = -s2*temp3 + c2*temp2
505 a( k, j+i+2 ) = c1*temp2 + s1*temp1
506 temp1 = -s1*temp2 + c1*temp1
507 a( k, j+i+1 ) = c*temp1 + s*temp
508 a( k, j+i ) = -s*temp1 + c*temp
509 END DO
510 END DO
511
512 IF( jj.GT.0 ) THEN
513 DO i = jj, 1, -1
514 CALL drot( ihi-top, a( top+1, j+i+1 ), 1,
515 $ a( top+1, j+i ), 1, a( j+1+i, j ),
516 $ -b( j+1+i, j ) )
517 END DO
518 END IF
519
520
521
522 IF ( j .LT. jcol + nnb - 1 ) THEN
523 len = 1 + j - jcol
524
525
526
527
528
529
530
531
532
533
534
535 jrow = ihi - nblst + 1
536 CALL dgemv(
'Transpose', nblst, len, one, work,
537 $ nblst, a( jrow, j+1 ), 1, zero,
538 $ work( pw ), 1 )
539 ppw = pw + len
540 DO i = jrow, jrow+nblst-len-1
541 work( ppw ) = a( i, j+1 )
542 ppw = ppw + 1
543 END DO
544 CALL dtrmv(
'Lower',
'Transpose',
'Non-unit',
545 $ nblst-len, work( len*nblst + 1 ), nblst,
546 $ work( pw+len ), 1 )
547 CALL dgemv(
'Transpose', len, nblst-len, one,
548 $ work( (len+1)*nblst - len + 1 ), nblst,
549 $ a( jrow+nblst-len, j+1 ), 1, one,
550 $ work( pw+len ), 1 )
551 ppw = pw
552 DO i = jrow, jrow+nblst-1
553 a( i, j+1 ) = work( ppw )
554 ppw = ppw + 1
555 END DO
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570 ppwo = 1 + nblst*nblst
571 j0 = jrow - nnb
572 DO jrow = j0, jcol+1, -nnb
573 ppw = pw + len
574 DO i = jrow, jrow+nnb-1
575 work( ppw ) = a( i, j+1 )
576 ppw = ppw + 1
577 END DO
578 ppw = pw
579 DO i = jrow+nnb, jrow+nnb+len-1
580 work( ppw ) = a( i, j+1 )
581 ppw = ppw + 1
582 END DO
583 CALL dtrmv(
'Upper',
'Transpose',
'Non-unit', len,
584 $ work( ppwo + nnb ), 2*nnb, work( pw ),
585 $ 1 )
586 CALL dtrmv(
'Lower',
'Transpose',
'Non-unit', nnb,
587 $ work( ppwo + 2*len*nnb ),
588 $ 2*nnb, work( pw + len ), 1 )
589 CALL dgemv(
'Transpose', nnb, len, one,
590 $ work( ppwo ), 2*nnb, a( jrow, j+1 ), 1,
591 $ one, work( pw ), 1 )
592 CALL dgemv(
'Transpose', len, nnb, one,
593 $ work( ppwo + 2*len*nnb + nnb ), 2*nnb,
594 $ a( jrow+nnb, j+1 ), 1, one,
595 $ work( pw+len ), 1 )
596 ppw = pw
597 DO i = jrow, jrow+len+nnb-1
598 a( i, j+1 ) = work( ppw )
599 ppw = ppw + 1
600 END DO
601 ppwo = ppwo + 4*nnb*nnb
602 END DO
603 END IF
604 END DO
605
606
607
608 cola = n - jcol - nnb + 1
609 j = ihi - nblst + 1
610 CALL dgemm(
'Transpose',
'No Transpose', nblst,
611 $ cola, nblst, one, work, nblst,
612 $ a( j, jcol+nnb ), lda, zero, work( pw ),
613 $ nblst )
614 CALL dlacpy(
'All', nblst, cola, work( pw ), nblst,
615 $ a( j, jcol+nnb ), lda )
616 ppwo = nblst*nblst + 1
617 j0 = j - nnb
618 DO j = j0, jcol+1, -nnb
619 IF ( blk22 ) THEN
620
621
622
623
624
625
626
627
628
629
630 CALL dorm22(
'Left',
'Transpose', 2*nnb, cola, nnb,
631 $ nnb, work( ppwo ), 2*nnb,
632 $ a( j, jcol+nnb ), lda, work( pw ),
633 $ lwork-pw+1, ierr )
634 ELSE
635
636
637
638 CALL dgemm(
'Transpose',
'No Transpose', 2*nnb,
639 $ cola, 2*nnb, one, work( ppwo ), 2*nnb,
640 $ a( j, jcol+nnb ), lda, zero, work( pw ),
641 $ 2*nnb )
642 CALL dlacpy(
'All', 2*nnb, cola, work( pw ), 2*nnb,
643 $ a( j, jcol+nnb ), lda )
644 END IF
645 ppwo = ppwo + 4*nnb*nnb
646 END DO
647
648
649
650 IF( wantq ) THEN
651 j = ihi - nblst + 1
652 IF ( initq ) THEN
653 topq = max( 2, j - jcol + 1 )
654 nh = ihi - topq + 1
655 ELSE
656 topq = 1
657 nh = n
658 END IF
659 CALL dgemm(
'No Transpose',
'No Transpose', nh,
660 $ nblst, nblst, one, q( topq, j ), ldq,
661 $ work, nblst, zero, work( pw ), nh )
662 CALL dlacpy(
'All', nh, nblst, work( pw ), nh,
663 $ q( topq, j ), ldq )
664 ppwo = nblst*nblst + 1
665 j0 = j - nnb
666 DO j = j0, jcol+1, -nnb
667 IF ( initq ) THEN
668 topq = max( 2, j - jcol + 1 )
669 nh = ihi - topq + 1
670 END IF
671 IF ( blk22 ) THEN
672
673
674
675 CALL dorm22(
'Right',
'No Transpose', nh, 2*nnb,
676 $ nnb, nnb, work( ppwo ), 2*nnb,
677 $ q( topq, j ), ldq, work( pw ),
678 $ lwork-pw+1, ierr )
679 ELSE
680
681
682
683 CALL dgemm(
'No Transpose',
'No Transpose', nh,
684 $ 2*nnb, 2*nnb, one, q( topq, j ), ldq,
685 $ work( ppwo ), 2*nnb, zero, work( pw ),
686 $ nh )
687 CALL dlacpy(
'All', nh, 2*nnb, work( pw ), nh,
688 $ q( topq, j ), ldq )
689 END IF
690 ppwo = ppwo + 4*nnb*nnb
691 END DO
692 END IF
693
694
695
696 IF ( wantz .OR. top.GT.0 ) THEN
697
698
699
700
701 CALL dlaset(
'All', nblst, nblst, zero, one, work,
702 $ nblst )
703 pw = nblst * nblst + 1
704 DO i = 1, n2nb
705 CALL dlaset(
'All', 2*nnb, 2*nnb, zero, one,
706 $ work( pw ), 2*nnb )
707 pw = pw + 4*nnb*nnb
708 END DO
709
710
711
712 DO j = jcol, jcol+nnb-1
713 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
714 len = 2 + j - jcol
715 jrow = j + n2nb*nnb + 2
716 DO i = ihi, jrow, -1
717 c = a( i, j )
718 a( i, j ) = zero
719 s = b( i, j )
720 b( i, j ) = zero
721 DO jj = ppw, ppw+len-1
722 temp = work( jj + nblst )
723 work( jj + nblst ) = c*temp - s*work( jj )
724 work( jj ) = s*temp + c*work( jj )
725 END DO
726 len = len + 1
727 ppw = ppw - nblst - 1
728 END DO
729
730 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
731 j0 = jrow - nnb
732 DO jrow = j0, j+2, -nnb
733 ppw = ppwo
734 len = 2 + j - jcol
735 DO i = jrow+nnb-1, jrow, -1
736 c = a( i, j )
737 a( i, j ) = zero
738 s = b( i, j )
739 b( i, j ) = zero
740 DO jj = ppw, ppw+len-1
741 temp = work( jj + 2*nnb )
742 work( jj + 2*nnb ) = c*temp - s*work( jj )
743 work( jj ) = s*temp + c*work( jj )
744 END DO
745 len = len + 1
746 ppw = ppw - 2*nnb - 1
747 END DO
748 ppwo = ppwo + 4*nnb*nnb
749 END DO
750 END DO
751 ELSE
752
753 CALL dlaset(
'Lower', ihi - jcol - 1, nnb, zero, zero,
754 $ a( jcol + 2, jcol ), lda )
755 CALL dlaset(
'Lower', ihi - jcol - 1, nnb, zero, zero,
756 $ b( jcol + 2, jcol ), ldb )
757 END IF
758
759
760
761 IF ( top.GT.0 ) THEN
762 j = ihi - nblst + 1
763 CALL dgemm(
'No Transpose',
'No Transpose', top,
764 $ nblst, nblst, one, a( 1, j ), lda,
765 $ work, nblst, zero, work( pw ), top )
766 CALL dlacpy(
'All', top, nblst, work( pw ), top,
767 $ a( 1, j ), lda )
768 ppwo = nblst*nblst + 1
769 j0 = j - nnb
770 DO j = j0, jcol+1, -nnb
771 IF ( blk22 ) THEN
772
773
774
775 CALL dorm22(
'Right',
'No Transpose', top, 2*nnb,
776 $ nnb, nnb, work( ppwo ), 2*nnb,
777 $ a( 1, j ), lda, work( pw ),
778 $ lwork-pw+1, ierr )
779 ELSE
780
781
782
783 CALL dgemm(
'No Transpose',
'No Transpose', top,
784 $ 2*nnb, 2*nnb, one, a( 1, j ), lda,
785 $ work( ppwo ), 2*nnb, zero,
786 $ work( pw ), top )
787 CALL dlacpy(
'All', top, 2*nnb, work( pw ), top,
788 $ a( 1, j ), lda )
789 END IF
790 ppwo = ppwo + 4*nnb*nnb
791 END DO
792
793 j = ihi - nblst + 1
794 CALL dgemm(
'No Transpose',
'No Transpose', top,
795 $ nblst, nblst, one, b( 1, j ), ldb,
796 $ work, nblst, zero, work( pw ), top )
797 CALL dlacpy(
'All', top, nblst, work( pw ), top,
798 $ b( 1, j ), ldb )
799 ppwo = nblst*nblst + 1
800 j0 = j - nnb
801 DO j = j0, jcol+1, -nnb
802 IF ( blk22 ) THEN
803
804
805
806 CALL dorm22(
'Right',
'No Transpose', top, 2*nnb,
807 $ nnb, nnb, work( ppwo ), 2*nnb,
808 $ b( 1, j ), ldb, work( pw ),
809 $ lwork-pw+1, ierr )
810 ELSE
811
812
813
814 CALL dgemm(
'No Transpose',
'No Transpose', top,
815 $ 2*nnb, 2*nnb, one, b( 1, j ), ldb,
816 $ work( ppwo ), 2*nnb, zero,
817 $ work( pw ), top )
818 CALL dlacpy(
'All', top, 2*nnb, work( pw ), top,
819 $ b( 1, j ), ldb )
820 END IF
821 ppwo = ppwo + 4*nnb*nnb
822 END DO
823 END IF
824
825
826
827 IF( wantz ) THEN
828 j = ihi - nblst + 1
829 IF ( initq ) THEN
830 topq = max( 2, j - jcol + 1 )
831 nh = ihi - topq + 1
832 ELSE
833 topq = 1
834 nh = n
835 END IF
836 CALL dgemm(
'No Transpose',
'No Transpose', nh,
837 $ nblst, nblst, one, z( topq, j ), ldz,
838 $ work, nblst, zero, work( pw ), nh )
839 CALL dlacpy(
'All', nh, nblst, work( pw ), nh,
840 $ z( topq, j ), ldz )
841 ppwo = nblst*nblst + 1
842 j0 = j - nnb
843 DO j = j0, jcol+1, -nnb
844 IF ( initq ) THEN
845 topq = max( 2, j - jcol + 1 )
846 nh = ihi - topq + 1
847 END IF
848 IF ( blk22 ) THEN
849
850
851
852 CALL dorm22(
'Right',
'No Transpose', nh, 2*nnb,
853 $ nnb, nnb, work( ppwo ), 2*nnb,
854 $ z( topq, j ), ldz, work( pw ),
855 $ lwork-pw+1, ierr )
856 ELSE
857
858
859
860 CALL dgemm(
'No Transpose',
'No Transpose', nh,
861 $ 2*nnb, 2*nnb, one, z( topq, j ), ldz,
862 $ work( ppwo ), 2*nnb, zero, work( pw ),
863 $ nh )
864 CALL dlacpy(
'All', nh, 2*nnb, work( pw ), nh,
865 $ z( topq, j ), ldz )
866 END IF
867 ppwo = ppwo + 4*nnb*nnb
868 END DO
869 END IF
870 END DO
871 END IF
872
873
874
875
876 compq2 = compq
877 compz2 = compz
878 IF ( jcol.NE.ilo ) THEN
879 IF ( wantq )
880 $ compq2 = 'V'
881 IF ( wantz )
882 $ compz2 = 'V'
883 END IF
884
885 IF ( jcol.LT.ihi )
886 $
CALL dgghrd( compq2, compz2, n, jcol, ihi, a, lda, b, ldb, q,
887 $ ldq, z, ldz, ierr )
888 work( 1 ) = dble( lwkopt )
889
890 RETURN
891
892
893
subroutine xerbla(srname, info)
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
subroutine dgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
DGEMV
subroutine dgghrd(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, info)
DGGHRD
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
subroutine dlartg(f, g, c, s, r)
DLARTG generates a plane rotation with real cosine and real sine.
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
logical function lsame(ca, cb)
LSAME
subroutine drot(n, dx, incx, dy, incy, c, s)
DROT
subroutine dtrmv(uplo, trans, diag, n, a, lda, x, incx)
DTRMV
subroutine dorm22(side, trans, m, n, n1, n2, q, ldq, c, ldc, work, lwork, info)
DORM22 multiplies a general matrix by a banded orthogonal matrix.