229
230
231
232
233
234 IMPLICIT NONE
235
236
237 CHARACTER COMPQ, COMPZ
238 INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
239
240
241 REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
242 $ Z( LDZ, * ), WORK( * )
243
244
245
246
247
248 REAL ZERO, ONE
249 parameter( zero = 0.0e+0, one = 1.0e+0 )
250
251
252 LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
253 CHARACTER*1 COMPQ2, COMPZ2
254 INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
255 $ KACC22, LEN, LWKOPT, N2NB, NB, NBLST, NBMIN,
256 $ NH, NNB, NX, PPW, PPWO, PW, TOP, TOPQ
257 REAL C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
258
259
260 LOGICAL LSAME
261 INTEGER ILAENV
262 REAL SROUNDUP_LWORK
264
265
269
270
271 INTRINSIC max
272
273
274
275
276
277 info = 0
278 nb =
ilaenv( 1,
'SGGHD3',
' ', n, ilo, ihi, -1 )
279 nh = ihi - ilo + 1
280 IF( nh.LE.1 ) THEN
281 lwkopt = 1
282 ELSE
283 lwkopt = 6*n*nb
284 END IF
286 initq =
lsame( compq,
'I' )
287 wantq = initq .OR.
lsame( compq,
'V' )
288 initz =
lsame( compz,
'I' )
289 wantz = initz .OR.
lsame( compz,
'V' )
290 lquery = ( lwork.EQ.-1 )
291
292 IF( .NOT.
lsame( compq,
'N' ) .AND. .NOT.wantq )
THEN
293 info = -1
294 ELSE IF( .NOT.
lsame( compz,
'N' ) .AND. .NOT.wantz )
THEN
295 info = -2
296 ELSE IF( n.LT.0 ) THEN
297 info = -3
298 ELSE IF( ilo.LT.1 ) THEN
299 info = -4
300 ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN
301 info = -5
302 ELSE IF( lda.LT.max( 1, n ) ) THEN
303 info = -7
304 ELSE IF( ldb.LT.max( 1, n ) ) THEN
305 info = -9
306 ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN
307 info = -11
308 ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN
309 info = -13
310 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
311 info = -15
312 END IF
313 IF( info.NE.0 ) THEN
314 CALL xerbla(
'SGGHD3', -info )
315 RETURN
316 ELSE IF( lquery ) THEN
317 RETURN
318 END IF
319
320
321
322 IF( initq )
323 $
CALL slaset(
'All', n, n, zero, one, q, ldq )
324 IF( initz )
325 $
CALL slaset(
'All', n, n, zero, one, z, ldz )
326
327
328
329 IF( n.GT.1 )
330 $
CALL slaset(
'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )
331
332
333
334 IF( nh.LE.1 ) THEN
335 work( 1 ) = one
336 RETURN
337 END IF
338
339
340
341 nbmin =
ilaenv( 2,
'SGGHD3',
' ', n, ilo, ihi, -1 )
342 IF( nb.GT.1 .AND. nb.LT.nh ) THEN
343
344
345
346 nx = max( nb,
ilaenv( 3,
'SGGHD3',
' ', n, ilo, ihi, -1 ) )
347 IF( nx.LT.nh ) THEN
348
349
350
351 IF( lwork.LT.lwkopt ) THEN
352
353
354
355
356
357 nbmin = max( 2,
ilaenv( 2,
'SGGHD3',
' ', n, ilo, ihi,
358 $ -1 ) )
359 IF( lwork.GE.6*n*nbmin ) THEN
360 nb = lwork / ( 6*n )
361 ELSE
362 nb = 1
363 END IF
364 END IF
365 END IF
366 END IF
367
368 IF( nb.LT.nbmin .OR. nb.GE.nh ) THEN
369
370
371
372 jcol = ilo
373
374 ELSE
375
376
377
378 kacc22 =
ilaenv( 16,
'SGGHD3',
' ', n, ilo, ihi, -1 )
379 blk22 = kacc22.EQ.2
380 DO jcol = ilo, ihi-2, nb
381 nnb = min( nb, ihi-jcol-1 )
382
383
384
385
386
387
388
389 n2nb = ( ihi-jcol-1 ) / nnb - 1
390 nblst = ihi - jcol - n2nb*nnb
391 CALL slaset(
'All', nblst, nblst, zero, one, work,
392 $ nblst )
393 pw = nblst * nblst + 1
394 DO i = 1, n2nb
395 CALL slaset(
'All', 2*nnb, 2*nnb, zero, one,
396 $ work( pw ), 2*nnb )
397 pw = pw + 4*nnb*nnb
398 END DO
399
400
401
402 DO j = jcol, jcol+nnb-1
403
404
405
406
407 DO i = ihi, j+2, -1
408 temp = a( i-1, j )
409 CALL slartg( temp, a( i, j ), c, s, a( i-1, j ) )
410 a( i, j ) = c
411 b( i, j ) = s
412 END DO
413
414
415
416 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
417 len = 2 + j - jcol
418 jrow = j + n2nb*nnb + 2
419 DO i = ihi, jrow, -1
420 c = a( i, j )
421 s = b( i, j )
422 DO jj = ppw, ppw+len-1
423 temp = work( jj + nblst )
424 work( jj + nblst ) = c*temp - s*work( jj )
425 work( jj ) = s*temp + c*work( jj )
426 END DO
427 len = len + 1
428 ppw = ppw - nblst - 1
429 END DO
430
431 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
432 j0 = jrow - nnb
433 DO jrow = j0, j+2, -nnb
434 ppw = ppwo
435 len = 2 + j - jcol
436 DO i = jrow+nnb-1, jrow, -1
437 c = a( i, j )
438 s = b( i, j )
439 DO jj = ppw, ppw+len-1
440 temp = work( jj + 2*nnb )
441 work( jj + 2*nnb ) = c*temp - s*work( jj )
442 work( jj ) = s*temp + c*work( jj )
443 END DO
444 len = len + 1
445 ppw = ppw - 2*nnb - 1
446 END DO
447 ppwo = ppwo + 4*nnb*nnb
448 END DO
449
450
451
452
453 IF( jcol.LE.2 ) THEN
454 top = 0
455 ELSE
456 top = jcol
457 END IF
458
459
460
461
462 DO jj = n, j+1, -1
463
464
465
466 DO i = min( jj+1, ihi ), j+2, -1
467 c = a( i, j )
468 s = b( i, j )
469 temp = b( i, jj )
470 b( i, jj ) = c*temp - s*b( i-1, jj )
471 b( i-1, jj ) = s*temp + c*b( i-1, jj )
472 END DO
473
474
475
476 IF( jj.LT.ihi ) THEN
477 temp = b( jj+1, jj+1 )
478 CALL slartg( temp, b( jj+1, jj ), c, s,
479 $ b( jj+1, jj+1 ) )
480 b( jj+1, jj ) = zero
481 CALL srot( jj-top, b( top+1, jj+1 ), 1,
482 $ b( top+1, jj ), 1, c, s )
483 a( jj+1, j ) = c
484 b( jj+1, j ) = -s
485 END IF
486 END DO
487
488
489
490
491
492
493
494
495 jj = mod( ihi-j-1, 3 )
496 DO i = ihi-j-3, jj+1, -3
497 c = a( j+1+i, j )
498 s = -b( j+1+i, j )
499 c1 = a( j+2+i, j )
500 s1 = -b( j+2+i, j )
501 c2 = a( j+3+i, j )
502 s2 = -b( j+3+i, j )
503
504 DO k = top+1, ihi
505 temp = a( k, j+i )
506 temp1 = a( k, j+i+1 )
507 temp2 = a( k, j+i+2 )
508 temp3 = a( k, j+i+3 )
509 a( k, j+i+3 ) = c2*temp3 + s2*temp2
510 temp2 = -s2*temp3 + c2*temp2
511 a( k, j+i+2 ) = c1*temp2 + s1*temp1
512 temp1 = -s1*temp2 + c1*temp1
513 a( k, j+i+1 ) = c*temp1 + s*temp
514 a( k, j+i ) = -s*temp1 + c*temp
515 END DO
516 END DO
517
518 IF( jj.GT.0 ) THEN
519 DO i = jj, 1, -1
520 CALL srot( ihi-top, a( top+1, j+i+1 ), 1,
521 $ a( top+1, j+i ), 1, a( j+1+i, j ),
522 $ -b( j+1+i, j ) )
523 END DO
524 END IF
525
526
527
528 IF ( j .LT. jcol + nnb - 1 ) THEN
529 len = 1 + j - jcol
530
531
532
533
534
535
536
537
538
539
540
541 jrow = ihi - nblst + 1
542 CALL sgemv(
'Transpose', nblst, len, one, work,
543 $ nblst, a( jrow, j+1 ), 1, zero,
544 $ work( pw ), 1 )
545 ppw = pw + len
546 DO i = jrow, jrow+nblst-len-1
547 work( ppw ) = a( i, j+1 )
548 ppw = ppw + 1
549 END DO
550 CALL strmv(
'Lower',
'Transpose',
'Non-unit',
551 $ nblst-len, work( len*nblst + 1 ), nblst,
552 $ work( pw+len ), 1 )
553 CALL sgemv(
'Transpose', len, nblst-len, one,
554 $ work( (len+1)*nblst - len + 1 ), nblst,
555 $ a( jrow+nblst-len, j+1 ), 1, one,
556 $ work( pw+len ), 1 )
557 ppw = pw
558 DO i = jrow, jrow+nblst-1
559 a( i, j+1 ) = work( ppw )
560 ppw = ppw + 1
561 END DO
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576 ppwo = 1 + nblst*nblst
577 j0 = jrow - nnb
578 DO jrow = j0, jcol+1, -nnb
579 ppw = pw + len
580 DO i = jrow, jrow+nnb-1
581 work( ppw ) = a( i, j+1 )
582 ppw = ppw + 1
583 END DO
584 ppw = pw
585 DO i = jrow+nnb, jrow+nnb+len-1
586 work( ppw ) = a( i, j+1 )
587 ppw = ppw + 1
588 END DO
589 CALL strmv(
'Upper',
'Transpose',
'Non-unit',
590 $ len,
591 $ work( ppwo + nnb ), 2*nnb, work( pw ),
592 $ 1 )
593 CALL strmv(
'Lower',
'Transpose',
'Non-unit',
594 $ nnb,
595 $ work( ppwo + 2*len*nnb ),
596 $ 2*nnb, work( pw + len ), 1 )
597 CALL sgemv(
'Transpose', nnb, len, one,
598 $ work( ppwo ), 2*nnb, a( jrow, j+1 ), 1,
599 $ one, work( pw ), 1 )
600 CALL sgemv(
'Transpose', len, nnb, one,
601 $ work( ppwo + 2*len*nnb + nnb ), 2*nnb,
602 $ a( jrow+nnb, j+1 ), 1, one,
603 $ work( pw+len ), 1 )
604 ppw = pw
605 DO i = jrow, jrow+len+nnb-1
606 a( i, j+1 ) = work( ppw )
607 ppw = ppw + 1
608 END DO
609 ppwo = ppwo + 4*nnb*nnb
610 END DO
611 END IF
612 END DO
613
614
615
616 cola = n - jcol - nnb + 1
617 j = ihi - nblst + 1
618 CALL sgemm(
'Transpose',
'No Transpose', nblst,
619 $ cola, nblst, one, work, nblst,
620 $ a( j, jcol+nnb ), lda, zero, work( pw ),
621 $ nblst )
622 CALL slacpy(
'All', nblst, cola, work( pw ), nblst,
623 $ a( j, jcol+nnb ), lda )
624 ppwo = nblst*nblst + 1
625 j0 = j - nnb
626 DO j = j0, jcol+1, -nnb
627 IF ( blk22 ) THEN
628
629
630
631
632
633
634
635
636
637
638 CALL sorm22(
'Left',
'Transpose', 2*nnb, cola, nnb,
639 $ nnb, work( ppwo ), 2*nnb,
640 $ a( j, jcol+nnb ), lda, work( pw ),
641 $ lwork-pw+1, ierr )
642 ELSE
643
644
645
646 CALL sgemm(
'Transpose',
'No Transpose', 2*nnb,
647 $ cola, 2*nnb, one, work( ppwo ), 2*nnb,
648 $ a( j, jcol+nnb ), lda, zero, work( pw ),
649 $ 2*nnb )
650 CALL slacpy(
'All', 2*nnb, cola, work( pw ), 2*nnb,
651 $ a( j, jcol+nnb ), lda )
652 END IF
653 ppwo = ppwo + 4*nnb*nnb
654 END DO
655
656
657
658 IF( wantq ) THEN
659 j = ihi - nblst + 1
660 IF ( initq ) THEN
661 topq = max( 2, j - jcol + 1 )
662 nh = ihi - topq + 1
663 ELSE
664 topq = 1
665 nh = n
666 END IF
667 CALL sgemm(
'No Transpose',
'No Transpose', nh,
668 $ nblst, nblst, one, q( topq, j ), ldq,
669 $ work, nblst, zero, work( pw ), nh )
670 CALL slacpy(
'All', nh, nblst, work( pw ), nh,
671 $ q( topq, j ), ldq )
672 ppwo = nblst*nblst + 1
673 j0 = j - nnb
674 DO j = j0, jcol+1, -nnb
675 IF ( initq ) THEN
676 topq = max( 2, j - jcol + 1 )
677 nh = ihi - topq + 1
678 END IF
679 IF ( blk22 ) THEN
680
681
682
683 CALL sorm22(
'Right',
'No Transpose', nh, 2*nnb,
684 $ nnb, nnb, work( ppwo ), 2*nnb,
685 $ q( topq, j ), ldq, work( pw ),
686 $ lwork-pw+1, ierr )
687 ELSE
688
689
690
691 CALL sgemm(
'No Transpose',
'No Transpose', nh,
692 $ 2*nnb, 2*nnb, one, q( topq, j ), ldq,
693 $ work( ppwo ), 2*nnb, zero, work( pw ),
694 $ nh )
695 CALL slacpy(
'All', nh, 2*nnb, work( pw ), nh,
696 $ q( topq, j ), ldq )
697 END IF
698 ppwo = ppwo + 4*nnb*nnb
699 END DO
700 END IF
701
702
703
704 IF ( wantz .OR. top.GT.0 ) THEN
705
706
707
708
709 CALL slaset(
'All', nblst, nblst, zero, one, work,
710 $ nblst )
711 pw = nblst * nblst + 1
712 DO i = 1, n2nb
713 CALL slaset(
'All', 2*nnb, 2*nnb, zero, one,
714 $ work( pw ), 2*nnb )
715 pw = pw + 4*nnb*nnb
716 END DO
717
718
719
720 DO j = jcol, jcol+nnb-1
721 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
722 len = 2 + j - jcol
723 jrow = j + n2nb*nnb + 2
724 DO i = ihi, jrow, -1
725 c = a( i, j )
726 a( i, j ) = zero
727 s = b( i, j )
728 b( i, j ) = zero
729 DO jj = ppw, ppw+len-1
730 temp = work( jj + nblst )
731 work( jj + nblst ) = c*temp - s*work( jj )
732 work( jj ) = s*temp + c*work( jj )
733 END DO
734 len = len + 1
735 ppw = ppw - nblst - 1
736 END DO
737
738 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
739 j0 = jrow - nnb
740 DO jrow = j0, j+2, -nnb
741 ppw = ppwo
742 len = 2 + j - jcol
743 DO i = jrow+nnb-1, jrow, -1
744 c = a( i, j )
745 a( i, j ) = zero
746 s = b( i, j )
747 b( i, j ) = zero
748 DO jj = ppw, ppw+len-1
749 temp = work( jj + 2*nnb )
750 work( jj + 2*nnb ) = c*temp - s*work( jj )
751 work( jj ) = s*temp + c*work( jj )
752 END DO
753 len = len + 1
754 ppw = ppw - 2*nnb - 1
755 END DO
756 ppwo = ppwo + 4*nnb*nnb
757 END DO
758 END DO
759 ELSE
760
761 CALL slaset(
'Lower', ihi - jcol - 1, nnb, zero, zero,
762 $ a( jcol + 2, jcol ), lda )
763 CALL slaset(
'Lower', ihi - jcol - 1, nnb, zero, zero,
764 $ b( jcol + 2, jcol ), ldb )
765 END IF
766
767
768
769 IF ( top.GT.0 ) THEN
770 j = ihi - nblst + 1
771 CALL sgemm(
'No Transpose',
'No Transpose', top,
772 $ nblst, nblst, one, a( 1, j ), lda,
773 $ work, nblst, zero, work( pw ), top )
774 CALL slacpy(
'All', top, nblst, work( pw ), top,
775 $ a( 1, j ), lda )
776 ppwo = nblst*nblst + 1
777 j0 = j - nnb
778 DO j = j0, jcol+1, -nnb
779 IF ( blk22 ) THEN
780
781
782
783 CALL sorm22(
'Right',
'No Transpose', top,
784 $ 2*nnb,
785 $ nnb, nnb, work( ppwo ), 2*nnb,
786 $ a( 1, j ), lda, work( pw ),
787 $ lwork-pw+1, ierr )
788 ELSE
789
790
791
792 CALL sgemm(
'No Transpose',
'No Transpose', top,
793 $ 2*nnb, 2*nnb, one, a( 1, j ), lda,
794 $ work( ppwo ), 2*nnb, zero,
795 $ work( pw ), top )
796 CALL slacpy(
'All', top, 2*nnb, work( pw ), top,
797 $ a( 1, j ), lda )
798 END IF
799 ppwo = ppwo + 4*nnb*nnb
800 END DO
801
802 j = ihi - nblst + 1
803 CALL sgemm(
'No Transpose',
'No Transpose', top,
804 $ nblst, nblst, one, b( 1, j ), ldb,
805 $ work, nblst, zero, work( pw ), top )
806 CALL slacpy(
'All', top, nblst, work( pw ), top,
807 $ b( 1, j ), ldb )
808 ppwo = nblst*nblst + 1
809 j0 = j - nnb
810 DO j = j0, jcol+1, -nnb
811 IF ( blk22 ) THEN
812
813
814
815 CALL sorm22(
'Right',
'No Transpose', top,
816 $ 2*nnb,
817 $ nnb, nnb, work( ppwo ), 2*nnb,
818 $ b( 1, j ), ldb, work( pw ),
819 $ lwork-pw+1, ierr )
820 ELSE
821
822
823
824 CALL sgemm(
'No Transpose',
'No Transpose', top,
825 $ 2*nnb, 2*nnb, one, b( 1, j ), ldb,
826 $ work( ppwo ), 2*nnb, zero,
827 $ work( pw ), top )
828 CALL slacpy(
'All', top, 2*nnb, work( pw ), top,
829 $ b( 1, j ), ldb )
830 END IF
831 ppwo = ppwo + 4*nnb*nnb
832 END DO
833 END IF
834
835
836
837 IF( wantz ) THEN
838 j = ihi - nblst + 1
839 IF ( initq ) THEN
840 topq = max( 2, j - jcol + 1 )
841 nh = ihi - topq + 1
842 ELSE
843 topq = 1
844 nh = n
845 END IF
846 CALL sgemm(
'No Transpose',
'No Transpose', nh,
847 $ nblst, nblst, one, z( topq, j ), ldz,
848 $ work, nblst, zero, work( pw ), nh )
849 CALL slacpy(
'All', nh, nblst, work( pw ), nh,
850 $ z( topq, j ), ldz )
851 ppwo = nblst*nblst + 1
852 j0 = j - nnb
853 DO j = j0, jcol+1, -nnb
854 IF ( initq ) THEN
855 topq = max( 2, j - jcol + 1 )
856 nh = ihi - topq + 1
857 END IF
858 IF ( blk22 ) THEN
859
860
861
862 CALL sorm22(
'Right',
'No Transpose', nh, 2*nnb,
863 $ nnb, nnb, work( ppwo ), 2*nnb,
864 $ z( topq, j ), ldz, work( pw ),
865 $ lwork-pw+1, ierr )
866 ELSE
867
868
869
870 CALL sgemm(
'No Transpose',
'No Transpose', nh,
871 $ 2*nnb, 2*nnb, one, z( topq, j ), ldz,
872 $ work( ppwo ), 2*nnb, zero, work( pw ),
873 $ nh )
874 CALL slacpy(
'All', nh, 2*nnb, work( pw ), nh,
875 $ z( topq, j ), ldz )
876 END IF
877 ppwo = ppwo + 4*nnb*nnb
878 END DO
879 END IF
880 END DO
881 END IF
882
883
884
885
886 compq2 = compq
887 compz2 = compz
888 IF ( jcol.NE.ilo ) THEN
889 IF ( wantq )
890 $ compq2 = 'V'
891 IF ( wantz )
892 $ compz2 = 'V'
893 END IF
894
895 IF ( jcol.LT.ihi )
896 $
CALL sgghrd( compq2, compz2, n, jcol, ihi, a, lda, b, ldb,
897 $ q,
898 $ ldq, z, ldz, ierr )
899
901
902 RETURN
903
904
905
subroutine xerbla(srname, info)
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
subroutine sgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
SGEMV
subroutine sgghrd(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, info)
SGGHRD
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
subroutine slartg(f, g, c, s, r)
SLARTG generates a plane rotation with real cosine and real sine.
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
logical function lsame(ca, cb)
LSAME
subroutine srot(n, sx, incx, sy, incy, c, s)
SROT
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine strmv(uplo, trans, diag, n, a, lda, x, incx)
STRMV
subroutine sorm22(side, trans, m, n, n1, n2, q, ldq, c, ldc, work, lwork, info)
SORM22 multiplies a general matrix by a banded orthogonal matrix.