230
231
232
233
234
235 IMPLICIT NONE
236
237
238 CHARACTER COMPQ, COMPZ
239 INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
240
241
242 REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
243 $ Z( LDZ, * ), WORK( * )
244
245
246
247
248
249 REAL ZERO, ONE
250 parameter( zero = 0.0e+0, one = 1.0e+0 )
251
252
253 LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
254 CHARACTER*1 COMPQ2, COMPZ2
255 INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
256 $ KACC22, LEN, LWKOPT, N2NB, NB, NBLST, NBMIN,
257 $ NH, NNB, NX, PPW, PPWO, PW, TOP, TOPQ
258 REAL C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
259
260
261 LOGICAL LSAME
262 INTEGER ILAENV
263 REAL SROUNDUP_LWORK
265
266
269
270
271 INTRINSIC max
272
273
274
275
276
277 info = 0
278 nb =
ilaenv( 1,
'SGGHD3',
' ', n, ilo, ihi, -1 )
279 lwkopt = max( 6*n*nb, 1 )
281 initq =
lsame( compq,
'I' )
282 wantq = initq .OR.
lsame( compq,
'V' )
283 initz =
lsame( compz,
'I' )
284 wantz = initz .OR.
lsame( compz,
'V' )
285 lquery = ( lwork.EQ.-1 )
286
287 IF( .NOT.
lsame( compq,
'N' ) .AND. .NOT.wantq )
THEN
288 info = -1
289 ELSE IF( .NOT.
lsame( compz,
'N' ) .AND. .NOT.wantz )
THEN
290 info = -2
291 ELSE IF( n.LT.0 ) THEN
292 info = -3
293 ELSE IF( ilo.LT.1 ) THEN
294 info = -4
295 ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN
296 info = -5
297 ELSE IF( lda.LT.max( 1, n ) ) THEN
298 info = -7
299 ELSE IF( ldb.LT.max( 1, n ) ) THEN
300 info = -9
301 ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN
302 info = -11
303 ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN
304 info = -13
305 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
306 info = -15
307 END IF
308 IF( info.NE.0 ) THEN
309 CALL xerbla(
'SGGHD3', -info )
310 RETURN
311 ELSE IF( lquery ) THEN
312 RETURN
313 END IF
314
315
316
317 IF( initq )
318 $
CALL slaset(
'All', n, n, zero, one, q, ldq )
319 IF( initz )
320 $
CALL slaset(
'All', n, n, zero, one, z, ldz )
321
322
323
324 IF( n.GT.1 )
325 $
CALL slaset(
'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )
326
327
328
329 nh = ihi - ilo + 1
330 IF( nh.LE.1 ) THEN
331 work( 1 ) = one
332 RETURN
333 END IF
334
335
336
337 nbmin =
ilaenv( 2,
'SGGHD3',
' ', n, ilo, ihi, -1 )
338 IF( nb.GT.1 .AND. nb.LT.nh ) THEN
339
340
341
342 nx = max( nb,
ilaenv( 3,
'SGGHD3',
' ', n, ilo, ihi, -1 ) )
343 IF( nx.LT.nh ) THEN
344
345
346
347 IF( lwork.LT.lwkopt ) THEN
348
349
350
351
352
353 nbmin = max( 2,
ilaenv( 2,
'SGGHD3',
' ', n, ilo, ihi,
354 $ -1 ) )
355 IF( lwork.GE.6*n*nbmin ) THEN
356 nb = lwork / ( 6*n )
357 ELSE
358 nb = 1
359 END IF
360 END IF
361 END IF
362 END IF
363
364 IF( nb.LT.nbmin .OR. nb.GE.nh ) THEN
365
366
367
368 jcol = ilo
369
370 ELSE
371
372
373
374 kacc22 =
ilaenv( 16,
'SGGHD3',
' ', n, ilo, ihi, -1 )
375 blk22 = kacc22.EQ.2
376 DO jcol = ilo, ihi-2, nb
377 nnb = min( nb, ihi-jcol-1 )
378
379
380
381
382
383
384
385 n2nb = ( ihi-jcol-1 ) / nnb - 1
386 nblst = ihi - jcol - n2nb*nnb
387 CALL slaset(
'All', nblst, nblst, zero, one, work, nblst )
388 pw = nblst * nblst + 1
389 DO i = 1, n2nb
390 CALL slaset(
'All', 2*nnb, 2*nnb, zero, one,
391 $ work( pw ), 2*nnb )
392 pw = pw + 4*nnb*nnb
393 END DO
394
395
396
397 DO j = jcol, jcol+nnb-1
398
399
400
401
402 DO i = ihi, j+2, -1
403 temp = a( i-1, j )
404 CALL slartg( temp, a( i, j ), c, s, a( i-1, j ) )
405 a( i, j ) = c
406 b( i, j ) = s
407 END DO
408
409
410
411 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
412 len = 2 + j - jcol
413 jrow = j + n2nb*nnb + 2
414 DO i = ihi, jrow, -1
415 c = a( i, j )
416 s = b( i, j )
417 DO jj = ppw, ppw+len-1
418 temp = work( jj + nblst )
419 work( jj + nblst ) = c*temp - s*work( jj )
420 work( jj ) = s*temp + c*work( jj )
421 END DO
422 len = len + 1
423 ppw = ppw - nblst - 1
424 END DO
425
426 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
427 j0 = jrow - nnb
428 DO jrow = j0, j+2, -nnb
429 ppw = ppwo
430 len = 2 + j - jcol
431 DO i = jrow+nnb-1, jrow, -1
432 c = a( i, j )
433 s = b( i, j )
434 DO jj = ppw, ppw+len-1
435 temp = work( jj + 2*nnb )
436 work( jj + 2*nnb ) = c*temp - s*work( jj )
437 work( jj ) = s*temp + c*work( jj )
438 END DO
439 len = len + 1
440 ppw = ppw - 2*nnb - 1
441 END DO
442 ppwo = ppwo + 4*nnb*nnb
443 END DO
444
445
446
447
448 IF( jcol.LE.2 ) THEN
449 top = 0
450 ELSE
451 top = jcol
452 END IF
453
454
455
456
457 DO jj = n, j+1, -1
458
459
460
461 DO i = min( jj+1, ihi ), j+2, -1
462 c = a( i, j )
463 s = b( i, j )
464 temp = b( i, jj )
465 b( i, jj ) = c*temp - s*b( i-1, jj )
466 b( i-1, jj ) = s*temp + c*b( i-1, jj )
467 END DO
468
469
470
471 IF( jj.LT.ihi ) THEN
472 temp = b( jj+1, jj+1 )
473 CALL slartg( temp, b( jj+1, jj ), c, s,
474 $ b( jj+1, jj+1 ) )
475 b( jj+1, jj ) = zero
476 CALL srot( jj-top, b( top+1, jj+1 ), 1,
477 $ b( top+1, jj ), 1, c, s )
478 a( jj+1, j ) = c
479 b( jj+1, j ) = -s
480 END IF
481 END DO
482
483
484
485
486
487
488
489
490 jj = mod( ihi-j-1, 3 )
491 DO i = ihi-j-3, jj+1, -3
492 c = a( j+1+i, j )
493 s = -b( j+1+i, j )
494 c1 = a( j+2+i, j )
495 s1 = -b( j+2+i, j )
496 c2 = a( j+3+i, j )
497 s2 = -b( j+3+i, j )
498
499 DO k = top+1, ihi
500 temp = a( k, j+i )
501 temp1 = a( k, j+i+1 )
502 temp2 = a( k, j+i+2 )
503 temp3 = a( k, j+i+3 )
504 a( k, j+i+3 ) = c2*temp3 + s2*temp2
505 temp2 = -s2*temp3 + c2*temp2
506 a( k, j+i+2 ) = c1*temp2 + s1*temp1
507 temp1 = -s1*temp2 + c1*temp1
508 a( k, j+i+1 ) = c*temp1 + s*temp
509 a( k, j+i ) = -s*temp1 + c*temp
510 END DO
511 END DO
512
513 IF( jj.GT.0 ) THEN
514 DO i = jj, 1, -1
515 CALL srot( ihi-top, a( top+1, j+i+1 ), 1,
516 $ a( top+1, j+i ), 1, a( j+1+i, j ),
517 $ -b( j+1+i, j ) )
518 END DO
519 END IF
520
521
522
523 IF ( j .LT. jcol + nnb - 1 ) THEN
524 len = 1 + j - jcol
525
526
527
528
529
530
531
532
533
534
535
536 jrow = ihi - nblst + 1
537 CALL sgemv(
'Transpose', nblst, len, one, work,
538 $ nblst, a( jrow, j+1 ), 1, zero,
539 $ work( pw ), 1 )
540 ppw = pw + len
541 DO i = jrow, jrow+nblst-len-1
542 work( ppw ) = a( i, j+1 )
543 ppw = ppw + 1
544 END DO
545 CALL strmv(
'Lower',
'Transpose',
'Non-unit',
546 $ nblst-len, work( len*nblst + 1 ), nblst,
547 $ work( pw+len ), 1 )
548 CALL sgemv(
'Transpose', len, nblst-len, one,
549 $ work( (len+1)*nblst - len + 1 ), nblst,
550 $ a( jrow+nblst-len, j+1 ), 1, one,
551 $ work( pw+len ), 1 )
552 ppw = pw
553 DO i = jrow, jrow+nblst-1
554 a( i, j+1 ) = work( ppw )
555 ppw = ppw + 1
556 END DO
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571 ppwo = 1 + nblst*nblst
572 j0 = jrow - nnb
573 DO jrow = j0, jcol+1, -nnb
574 ppw = pw + len
575 DO i = jrow, jrow+nnb-1
576 work( ppw ) = a( i, j+1 )
577 ppw = ppw + 1
578 END DO
579 ppw = pw
580 DO i = jrow+nnb, jrow+nnb+len-1
581 work( ppw ) = a( i, j+1 )
582 ppw = ppw + 1
583 END DO
584 CALL strmv(
'Upper',
'Transpose',
'Non-unit', len,
585 $ work( ppwo + nnb ), 2*nnb, work( pw ),
586 $ 1 )
587 CALL strmv(
'Lower',
'Transpose',
'Non-unit', nnb,
588 $ work( ppwo + 2*len*nnb ),
589 $ 2*nnb, work( pw + len ), 1 )
590 CALL sgemv(
'Transpose', nnb, len, one,
591 $ work( ppwo ), 2*nnb, a( jrow, j+1 ), 1,
592 $ one, work( pw ), 1 )
593 CALL sgemv(
'Transpose', len, nnb, one,
594 $ work( ppwo + 2*len*nnb + nnb ), 2*nnb,
595 $ a( jrow+nnb, j+1 ), 1, one,
596 $ work( pw+len ), 1 )
597 ppw = pw
598 DO i = jrow, jrow+len+nnb-1
599 a( i, j+1 ) = work( ppw )
600 ppw = ppw + 1
601 END DO
602 ppwo = ppwo + 4*nnb*nnb
603 END DO
604 END IF
605 END DO
606
607
608
609 cola = n - jcol - nnb + 1
610 j = ihi - nblst + 1
611 CALL sgemm(
'Transpose',
'No Transpose', nblst,
612 $ cola, nblst, one, work, nblst,
613 $ a( j, jcol+nnb ), lda, zero, work( pw ),
614 $ nblst )
615 CALL slacpy(
'All', nblst, cola, work( pw ), nblst,
616 $ a( j, jcol+nnb ), lda )
617 ppwo = nblst*nblst + 1
618 j0 = j - nnb
619 DO j = j0, jcol+1, -nnb
620 IF ( blk22 ) THEN
621
622
623
624
625
626
627
628
629
630
631 CALL sorm22(
'Left',
'Transpose', 2*nnb, cola, nnb,
632 $ nnb, work( ppwo ), 2*nnb,
633 $ a( j, jcol+nnb ), lda, work( pw ),
634 $ lwork-pw+1, ierr )
635 ELSE
636
637
638
639 CALL sgemm(
'Transpose',
'No Transpose', 2*nnb,
640 $ cola, 2*nnb, one, work( ppwo ), 2*nnb,
641 $ a( j, jcol+nnb ), lda, zero, work( pw ),
642 $ 2*nnb )
643 CALL slacpy(
'All', 2*nnb, cola, work( pw ), 2*nnb,
644 $ a( j, jcol+nnb ), lda )
645 END IF
646 ppwo = ppwo + 4*nnb*nnb
647 END DO
648
649
650
651 IF( wantq ) THEN
652 j = ihi - nblst + 1
653 IF ( initq ) THEN
654 topq = max( 2, j - jcol + 1 )
655 nh = ihi - topq + 1
656 ELSE
657 topq = 1
658 nh = n
659 END IF
660 CALL sgemm(
'No Transpose',
'No Transpose', nh,
661 $ nblst, nblst, one, q( topq, j ), ldq,
662 $ work, nblst, zero, work( pw ), nh )
663 CALL slacpy(
'All', nh, nblst, work( pw ), nh,
664 $ q( topq, j ), ldq )
665 ppwo = nblst*nblst + 1
666 j0 = j - nnb
667 DO j = j0, jcol+1, -nnb
668 IF ( initq ) THEN
669 topq = max( 2, j - jcol + 1 )
670 nh = ihi - topq + 1
671 END IF
672 IF ( blk22 ) THEN
673
674
675
676 CALL sorm22(
'Right',
'No Transpose', nh, 2*nnb,
677 $ nnb, nnb, work( ppwo ), 2*nnb,
678 $ q( topq, j ), ldq, work( pw ),
679 $ lwork-pw+1, ierr )
680 ELSE
681
682
683
684 CALL sgemm(
'No Transpose',
'No Transpose', nh,
685 $ 2*nnb, 2*nnb, one, q( topq, j ), ldq,
686 $ work( ppwo ), 2*nnb, zero, work( pw ),
687 $ nh )
688 CALL slacpy(
'All', nh, 2*nnb, work( pw ), nh,
689 $ q( topq, j ), ldq )
690 END IF
691 ppwo = ppwo + 4*nnb*nnb
692 END DO
693 END IF
694
695
696
697 IF ( wantz .OR. top.GT.0 ) THEN
698
699
700
701
702 CALL slaset(
'All', nblst, nblst, zero, one, work,
703 $ nblst )
704 pw = nblst * nblst + 1
705 DO i = 1, n2nb
706 CALL slaset(
'All', 2*nnb, 2*nnb, zero, one,
707 $ work( pw ), 2*nnb )
708 pw = pw + 4*nnb*nnb
709 END DO
710
711
712
713 DO j = jcol, jcol+nnb-1
714 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
715 len = 2 + j - jcol
716 jrow = j + n2nb*nnb + 2
717 DO i = ihi, jrow, -1
718 c = a( i, j )
719 a( i, j ) = zero
720 s = b( i, j )
721 b( i, j ) = zero
722 DO jj = ppw, ppw+len-1
723 temp = work( jj + nblst )
724 work( jj + nblst ) = c*temp - s*work( jj )
725 work( jj ) = s*temp + c*work( jj )
726 END DO
727 len = len + 1
728 ppw = ppw - nblst - 1
729 END DO
730
731 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
732 j0 = jrow - nnb
733 DO jrow = j0, j+2, -nnb
734 ppw = ppwo
735 len = 2 + j - jcol
736 DO i = jrow+nnb-1, jrow, -1
737 c = a( i, j )
738 a( i, j ) = zero
739 s = b( i, j )
740 b( i, j ) = zero
741 DO jj = ppw, ppw+len-1
742 temp = work( jj + 2*nnb )
743 work( jj + 2*nnb ) = c*temp - s*work( jj )
744 work( jj ) = s*temp + c*work( jj )
745 END DO
746 len = len + 1
747 ppw = ppw - 2*nnb - 1
748 END DO
749 ppwo = ppwo + 4*nnb*nnb
750 END DO
751 END DO
752 ELSE
753
754 CALL slaset(
'Lower', ihi - jcol - 1, nnb, zero, zero,
755 $ a( jcol + 2, jcol ), lda )
756 CALL slaset(
'Lower', ihi - jcol - 1, nnb, zero, zero,
757 $ b( jcol + 2, jcol ), ldb )
758 END IF
759
760
761
762 IF ( top.GT.0 ) THEN
763 j = ihi - nblst + 1
764 CALL sgemm(
'No Transpose',
'No Transpose', top,
765 $ nblst, nblst, one, a( 1, j ), lda,
766 $ work, nblst, zero, work( pw ), top )
767 CALL slacpy(
'All', top, nblst, work( pw ), top,
768 $ a( 1, j ), lda )
769 ppwo = nblst*nblst + 1
770 j0 = j - nnb
771 DO j = j0, jcol+1, -nnb
772 IF ( blk22 ) THEN
773
774
775
776 CALL sorm22(
'Right',
'No Transpose', top, 2*nnb,
777 $ nnb, nnb, work( ppwo ), 2*nnb,
778 $ a( 1, j ), lda, work( pw ),
779 $ lwork-pw+1, ierr )
780 ELSE
781
782
783
784 CALL sgemm(
'No Transpose',
'No Transpose', top,
785 $ 2*nnb, 2*nnb, one, a( 1, j ), lda,
786 $ work( ppwo ), 2*nnb, zero,
787 $ work( pw ), top )
788 CALL slacpy(
'All', top, 2*nnb, work( pw ), top,
789 $ a( 1, j ), lda )
790 END IF
791 ppwo = ppwo + 4*nnb*nnb
792 END DO
793
794 j = ihi - nblst + 1
795 CALL sgemm(
'No Transpose',
'No Transpose', top,
796 $ nblst, nblst, one, b( 1, j ), ldb,
797 $ work, nblst, zero, work( pw ), top )
798 CALL slacpy(
'All', top, nblst, work( pw ), top,
799 $ b( 1, j ), ldb )
800 ppwo = nblst*nblst + 1
801 j0 = j - nnb
802 DO j = j0, jcol+1, -nnb
803 IF ( blk22 ) THEN
804
805
806
807 CALL sorm22(
'Right',
'No Transpose', top, 2*nnb,
808 $ nnb, nnb, work( ppwo ), 2*nnb,
809 $ b( 1, j ), ldb, work( pw ),
810 $ lwork-pw+1, ierr )
811 ELSE
812
813
814
815 CALL sgemm(
'No Transpose',
'No Transpose', top,
816 $ 2*nnb, 2*nnb, one, b( 1, j ), ldb,
817 $ work( ppwo ), 2*nnb, zero,
818 $ work( pw ), top )
819 CALL slacpy(
'All', top, 2*nnb, work( pw ), top,
820 $ b( 1, j ), ldb )
821 END IF
822 ppwo = ppwo + 4*nnb*nnb
823 END DO
824 END IF
825
826
827
828 IF( wantz ) THEN
829 j = ihi - nblst + 1
830 IF ( initq ) THEN
831 topq = max( 2, j - jcol + 1 )
832 nh = ihi - topq + 1
833 ELSE
834 topq = 1
835 nh = n
836 END IF
837 CALL sgemm(
'No Transpose',
'No Transpose', nh,
838 $ nblst, nblst, one, z( topq, j ), ldz,
839 $ work, nblst, zero, work( pw ), nh )
840 CALL slacpy(
'All', nh, nblst, work( pw ), nh,
841 $ z( topq, j ), ldz )
842 ppwo = nblst*nblst + 1
843 j0 = j - nnb
844 DO j = j0, jcol+1, -nnb
845 IF ( initq ) THEN
846 topq = max( 2, j - jcol + 1 )
847 nh = ihi - topq + 1
848 END IF
849 IF ( blk22 ) THEN
850
851
852
853 CALL sorm22(
'Right',
'No Transpose', nh, 2*nnb,
854 $ nnb, nnb, work( ppwo ), 2*nnb,
855 $ z( topq, j ), ldz, work( pw ),
856 $ lwork-pw+1, ierr )
857 ELSE
858
859
860
861 CALL sgemm(
'No Transpose',
'No Transpose', nh,
862 $ 2*nnb, 2*nnb, one, z( topq, j ), ldz,
863 $ work( ppwo ), 2*nnb, zero, work( pw ),
864 $ nh )
865 CALL slacpy(
'All', nh, 2*nnb, work( pw ), nh,
866 $ z( topq, j ), ldz )
867 END IF
868 ppwo = ppwo + 4*nnb*nnb
869 END DO
870 END IF
871 END DO
872 END IF
873
874
875
876
877 compq2 = compq
878 compz2 = compz
879 IF ( jcol.NE.ilo ) THEN
880 IF ( wantq )
881 $ compq2 = 'V'
882 IF ( wantz )
883 $ compz2 = 'V'
884 END IF
885
886 IF ( jcol.LT.ihi )
887 $
CALL sgghrd( compq2, compz2, n, jcol, ihi, a, lda, b, ldb, q,
888 $ ldq, z, ldz, ierr )
890
891 RETURN
892
893
894
subroutine xerbla(srname, info)
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
subroutine sgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
SGEMV
subroutine sgghrd(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, info)
SGGHRD
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
subroutine slartg(f, g, c, s, r)
SLARTG generates a plane rotation with real cosine and real sine.
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
logical function lsame(ca, cb)
LSAME
subroutine srot(n, sx, incx, sy, incy, c, s)
SROT
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine strmv(uplo, trans, diag, n, a, lda, x, incx)
STRMV
subroutine sorm22(side, trans, m, n, n1, n2, q, ldq, c, ldc, work, lwork, info)
SORM22 multiplies a general matrix by a banded orthogonal matrix.