LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ sgghd3()

subroutine sgghd3 ( character compq,
character compz,
integer n,
integer ilo,
integer ihi,
real, dimension( lda, * ) a,
integer lda,
real, dimension( ldb, * ) b,
integer ldb,
real, dimension( ldq, * ) q,
integer ldq,
real, dimension( ldz, * ) z,
integer ldz,
real, dimension( * ) work,
integer lwork,
integer info )

SGGHD3

Download SGGHD3 + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> SGGHD3 reduces a pair of real matrices (A,B) to generalized upper
!> Hessenberg form using orthogonal transformations, where A is a
!> general matrix and B is upper triangular.  The form of the
!> generalized eigenvalue problem is
!>    A*x = lambda*B*x,
!> and B is typically made upper triangular by computing its QR
!> factorization and moving the orthogonal matrix Q to the left side
!> of the equation.
!>
!> This subroutine simultaneously reduces A to a Hessenberg matrix H:
!>    Q**T*A*Z = H
!> and transforms B to another upper triangular matrix T:
!>    Q**T*B*Z = T
!> in order to reduce the problem to its standard form
!>    H*y = lambda*T*y
!> where y = Z**T*x.
!>
!> The orthogonal matrices Q and Z are determined as products of Givens
!> rotations.  They may either be formed explicitly, or they may be
!> postmultiplied into input matrices Q1 and Z1, so that
!>
!>      Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T
!>
!>      Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T
!>
!> If Q1 is the orthogonal matrix from the QR factorization of B in the
!> original equation A*x = lambda*B*x, then SGGHD3 reduces the original
!> problem to generalized Hessenberg form.
!>
!> This is a blocked variant of SGGHRD, using matrix-matrix
!> multiplications for parts of the computation to enhance performance.
!> 
Parameters
[in]COMPQ
!>          COMPQ is CHARACTER*1
!>          = 'N': do not compute Q;
!>          = 'I': Q is initialized to the unit matrix, and the
!>                 orthogonal matrix Q is returned;
!>          = 'V': Q must contain an orthogonal matrix Q1 on entry,
!>                 and the product Q1*Q is returned.
!> 
[in]COMPZ
!>          COMPZ is CHARACTER*1
!>          = 'N': do not compute Z;
!>          = 'I': Z is initialized to the unit matrix, and the
!>                 orthogonal matrix Z is returned;
!>          = 'V': Z must contain an orthogonal matrix Z1 on entry,
!>                 and the product Z1*Z is returned.
!> 
[in]N
!>          N is INTEGER
!>          The order of the matrices A and B.  N >= 0.
!> 
[in]ILO
!>          ILO is INTEGER
!> 
[in]IHI
!>          IHI is INTEGER
!>
!>          ILO and IHI mark the rows and columns of A which are to be
!>          reduced.  It is assumed that A is already upper triangular
!>          in rows and columns 1:ILO-1 and IHI+1:N.  ILO and IHI are
!>          normally set by a previous call to SGGBAL; otherwise they
!>          should be set to 1 and N respectively.
!>          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
!> 
[in,out]A
!>          A is REAL array, dimension (LDA, N)
!>          On entry, the N-by-N general matrix to be reduced.
!>          On exit, the upper triangle and the first subdiagonal of A
!>          are overwritten with the upper Hessenberg matrix H, and the
!>          rest is set to zero.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,N).
!> 
[in,out]B
!>          B is REAL array, dimension (LDB, N)
!>          On entry, the N-by-N upper triangular matrix B.
!>          On exit, the upper triangular matrix T = Q**T B Z.  The
!>          elements below the diagonal are set to zero.
!> 
[in]LDB
!>          LDB is INTEGER
!>          The leading dimension of the array B.  LDB >= max(1,N).
!> 
[in,out]Q
!>          Q is REAL array, dimension (LDQ, N)
!>          On entry, if COMPQ = 'V', the orthogonal matrix Q1,
!>          typically from the QR factorization of B.
!>          On exit, if COMPQ='I', the orthogonal matrix Q, and if
!>          COMPQ = 'V', the product Q1*Q.
!>          Not referenced if COMPQ='N'.
!> 
[in]LDQ
!>          LDQ is INTEGER
!>          The leading dimension of the array Q.
!>          LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
!> 
[in,out]Z
!>          Z is REAL array, dimension (LDZ, N)
!>          On entry, if COMPZ = 'V', the orthogonal matrix Z1.
!>          On exit, if COMPZ='I', the orthogonal matrix Z, and if
!>          COMPZ = 'V', the product Z1*Z.
!>          Not referenced if COMPZ='N'.
!> 
[in]LDZ
!>          LDZ is INTEGER
!>          The leading dimension of the array Z.
!>          LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
!> 
[out]WORK
!>          WORK is REAL array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The length of the array WORK. LWORK >= 1.
!>          For optimum performance LWORK >= 6*N*NB, where NB is the
!>          optimal blocksize.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>
!>  This routine reduces A to Hessenberg form and maintains B in triangular form
!>  using a blocked variant of Moler and Stewart's original algorithm,
!>  as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti
!>  (BIT 2008).
!> 

Definition at line 226 of file sgghd3.f.

229*
230* -- LAPACK computational routine --
231* -- LAPACK is a software package provided by Univ. of Tennessee, --
232* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
233*
234 IMPLICIT NONE
235*
236* .. Scalar Arguments ..
237 CHARACTER COMPQ, COMPZ
238 INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
239* ..
240* .. Array Arguments ..
241 REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
242 $ Z( LDZ, * ), WORK( * )
243* ..
244*
245* =====================================================================
246*
247* .. Parameters ..
248 REAL ZERO, ONE
249 parameter( zero = 0.0e+0, one = 1.0e+0 )
250* ..
251* .. Local Scalars ..
252 LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
253 CHARACTER*1 COMPQ2, COMPZ2
254 INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
255 $ KACC22, LEN, LWKOPT, N2NB, NB, NBLST, NBMIN,
256 $ NH, NNB, NX, PPW, PPWO, PW, TOP, TOPQ
257 REAL C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
258* ..
259* .. External Functions ..
260 LOGICAL LSAME
261 INTEGER ILAENV
262 REAL SROUNDUP_LWORK
263 EXTERNAL ilaenv, lsame, sroundup_lwork
264* ..
265* .. External Subroutines ..
266 EXTERNAL sgghrd, slartg, slaset, sorm22, srot,
267 $ sgemm,
269* ..
270* .. Intrinsic Functions ..
271 INTRINSIC max
272* ..
273* .. Executable Statements ..
274*
275* Decode and test the input parameters.
276*
277 info = 0
278 nb = ilaenv( 1, 'SGGHD3', ' ', n, ilo, ihi, -1 )
279 nh = ihi - ilo + 1
280 IF( nh.LE.1 ) THEN
281 lwkopt = 1
282 ELSE
283 lwkopt = 6*n*nb
284 END IF
285 work( 1 ) = sroundup_lwork( lwkopt )
286 initq = lsame( compq, 'I' )
287 wantq = initq .OR. lsame( compq, 'V' )
288 initz = lsame( compz, 'I' )
289 wantz = initz .OR. lsame( compz, 'V' )
290 lquery = ( lwork.EQ.-1 )
291*
292 IF( .NOT.lsame( compq, 'N' ) .AND. .NOT.wantq ) THEN
293 info = -1
294 ELSE IF( .NOT.lsame( compz, 'N' ) .AND. .NOT.wantz ) THEN
295 info = -2
296 ELSE IF( n.LT.0 ) THEN
297 info = -3
298 ELSE IF( ilo.LT.1 ) THEN
299 info = -4
300 ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN
301 info = -5
302 ELSE IF( lda.LT.max( 1, n ) ) THEN
303 info = -7
304 ELSE IF( ldb.LT.max( 1, n ) ) THEN
305 info = -9
306 ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN
307 info = -11
308 ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN
309 info = -13
310 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
311 info = -15
312 END IF
313 IF( info.NE.0 ) THEN
314 CALL xerbla( 'SGGHD3', -info )
315 RETURN
316 ELSE IF( lquery ) THEN
317 RETURN
318 END IF
319*
320* Initialize Q and Z if desired.
321*
322 IF( initq )
323 $ CALL slaset( 'All', n, n, zero, one, q, ldq )
324 IF( initz )
325 $ CALL slaset( 'All', n, n, zero, one, z, ldz )
326*
327* Zero out lower triangle of B.
328*
329 IF( n.GT.1 )
330 $ CALL slaset( 'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )
331*
332* Quick return if possible
333*
334 IF( nh.LE.1 ) THEN
335 work( 1 ) = one
336 RETURN
337 END IF
338*
339* Determine the blocksize.
340*
341 nbmin = ilaenv( 2, 'SGGHD3', ' ', n, ilo, ihi, -1 )
342 IF( nb.GT.1 .AND. nb.LT.nh ) THEN
343*
344* Determine when to use unblocked instead of blocked code.
345*
346 nx = max( nb, ilaenv( 3, 'SGGHD3', ' ', n, ilo, ihi, -1 ) )
347 IF( nx.LT.nh ) THEN
348*
349* Determine if workspace is large enough for blocked code.
350*
351 IF( lwork.LT.lwkopt ) THEN
352*
353* Not enough workspace to use optimal NB: determine the
354* minimum value of NB, and reduce NB or force use of
355* unblocked code.
356*
357 nbmin = max( 2, ilaenv( 2, 'SGGHD3', ' ', n, ilo, ihi,
358 $ -1 ) )
359 IF( lwork.GE.6*n*nbmin ) THEN
360 nb = lwork / ( 6*n )
361 ELSE
362 nb = 1
363 END IF
364 END IF
365 END IF
366 END IF
367*
368 IF( nb.LT.nbmin .OR. nb.GE.nh ) THEN
369*
370* Use unblocked code below
371*
372 jcol = ilo
373*
374 ELSE
375*
376* Use blocked code
377*
378 kacc22 = ilaenv( 16, 'SGGHD3', ' ', n, ilo, ihi, -1 )
379 blk22 = kacc22.EQ.2
380 DO jcol = ilo, ihi-2, nb
381 nnb = min( nb, ihi-jcol-1 )
382*
383* Initialize small orthogonal factors that will hold the
384* accumulated Givens rotations in workspace.
385* N2NB denotes the number of 2*NNB-by-2*NNB factors
386* NBLST denotes the (possibly smaller) order of the last
387* factor.
388*
389 n2nb = ( ihi-jcol-1 ) / nnb - 1
390 nblst = ihi - jcol - n2nb*nnb
391 CALL slaset( 'All', nblst, nblst, zero, one, work,
392 $ nblst )
393 pw = nblst * nblst + 1
394 DO i = 1, n2nb
395 CALL slaset( 'All', 2*nnb, 2*nnb, zero, one,
396 $ work( pw ), 2*nnb )
397 pw = pw + 4*nnb*nnb
398 END DO
399*
400* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form.
401*
402 DO j = jcol, jcol+nnb-1
403*
404* Reduce Jth column of A. Store cosines and sines in Jth
405* column of A and B, respectively.
406*
407 DO i = ihi, j+2, -1
408 temp = a( i-1, j )
409 CALL slartg( temp, a( i, j ), c, s, a( i-1, j ) )
410 a( i, j ) = c
411 b( i, j ) = s
412 END DO
413*
414* Accumulate Givens rotations into workspace array.
415*
416 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
417 len = 2 + j - jcol
418 jrow = j + n2nb*nnb + 2
419 DO i = ihi, jrow, -1
420 c = a( i, j )
421 s = b( i, j )
422 DO jj = ppw, ppw+len-1
423 temp = work( jj + nblst )
424 work( jj + nblst ) = c*temp - s*work( jj )
425 work( jj ) = s*temp + c*work( jj )
426 END DO
427 len = len + 1
428 ppw = ppw - nblst - 1
429 END DO
430*
431 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
432 j0 = jrow - nnb
433 DO jrow = j0, j+2, -nnb
434 ppw = ppwo
435 len = 2 + j - jcol
436 DO i = jrow+nnb-1, jrow, -1
437 c = a( i, j )
438 s = b( i, j )
439 DO jj = ppw, ppw+len-1
440 temp = work( jj + 2*nnb )
441 work( jj + 2*nnb ) = c*temp - s*work( jj )
442 work( jj ) = s*temp + c*work( jj )
443 END DO
444 len = len + 1
445 ppw = ppw - 2*nnb - 1
446 END DO
447 ppwo = ppwo + 4*nnb*nnb
448 END DO
449*
450* TOP denotes the number of top rows in A and B that will
451* not be updated during the next steps.
452*
453 IF( jcol.LE.2 ) THEN
454 top = 0
455 ELSE
456 top = jcol
457 END IF
458*
459* Propagate transformations through B and replace stored
460* left sines/cosines by right sines/cosines.
461*
462 DO jj = n, j+1, -1
463*
464* Update JJth column of B.
465*
466 DO i = min( jj+1, ihi ), j+2, -1
467 c = a( i, j )
468 s = b( i, j )
469 temp = b( i, jj )
470 b( i, jj ) = c*temp - s*b( i-1, jj )
471 b( i-1, jj ) = s*temp + c*b( i-1, jj )
472 END DO
473*
474* Annihilate B( JJ+1, JJ ).
475*
476 IF( jj.LT.ihi ) THEN
477 temp = b( jj+1, jj+1 )
478 CALL slartg( temp, b( jj+1, jj ), c, s,
479 $ b( jj+1, jj+1 ) )
480 b( jj+1, jj ) = zero
481 CALL srot( jj-top, b( top+1, jj+1 ), 1,
482 $ b( top+1, jj ), 1, c, s )
483 a( jj+1, j ) = c
484 b( jj+1, j ) = -s
485 END IF
486 END DO
487*
488* Update A by transformations from right.
489* Explicit loop unrolling provides better performance
490* compared to SLASR.
491* CALL SLASR( 'Right', 'Variable', 'Backward', IHI-TOP,
492* $ IHI-J, A( J+2, J ), B( J+2, J ),
493* $ A( TOP+1, J+1 ), LDA )
494*
495 jj = mod( ihi-j-1, 3 )
496 DO i = ihi-j-3, jj+1, -3
497 c = a( j+1+i, j )
498 s = -b( j+1+i, j )
499 c1 = a( j+2+i, j )
500 s1 = -b( j+2+i, j )
501 c2 = a( j+3+i, j )
502 s2 = -b( j+3+i, j )
503*
504 DO k = top+1, ihi
505 temp = a( k, j+i )
506 temp1 = a( k, j+i+1 )
507 temp2 = a( k, j+i+2 )
508 temp3 = a( k, j+i+3 )
509 a( k, j+i+3 ) = c2*temp3 + s2*temp2
510 temp2 = -s2*temp3 + c2*temp2
511 a( k, j+i+2 ) = c1*temp2 + s1*temp1
512 temp1 = -s1*temp2 + c1*temp1
513 a( k, j+i+1 ) = c*temp1 + s*temp
514 a( k, j+i ) = -s*temp1 + c*temp
515 END DO
516 END DO
517*
518 IF( jj.GT.0 ) THEN
519 DO i = jj, 1, -1
520 CALL srot( ihi-top, a( top+1, j+i+1 ), 1,
521 $ a( top+1, j+i ), 1, a( j+1+i, j ),
522 $ -b( j+1+i, j ) )
523 END DO
524 END IF
525*
526* Update (J+1)th column of A by transformations from left.
527*
528 IF ( j .LT. jcol + nnb - 1 ) THEN
529 len = 1 + j - jcol
530*
531* Multiply with the trailing accumulated orthogonal
532* matrix, which takes the form
533*
534* [ U11 U12 ]
535* U = [ ],
536* [ U21 U22 ]
537*
538* where U21 is a LEN-by-LEN matrix and U12 is lower
539* triangular.
540*
541 jrow = ihi - nblst + 1
542 CALL sgemv( 'Transpose', nblst, len, one, work,
543 $ nblst, a( jrow, j+1 ), 1, zero,
544 $ work( pw ), 1 )
545 ppw = pw + len
546 DO i = jrow, jrow+nblst-len-1
547 work( ppw ) = a( i, j+1 )
548 ppw = ppw + 1
549 END DO
550 CALL strmv( 'Lower', 'Transpose', 'Non-unit',
551 $ nblst-len, work( len*nblst + 1 ), nblst,
552 $ work( pw+len ), 1 )
553 CALL sgemv( 'Transpose', len, nblst-len, one,
554 $ work( (len+1)*nblst - len + 1 ), nblst,
555 $ a( jrow+nblst-len, j+1 ), 1, one,
556 $ work( pw+len ), 1 )
557 ppw = pw
558 DO i = jrow, jrow+nblst-1
559 a( i, j+1 ) = work( ppw )
560 ppw = ppw + 1
561 END DO
562*
563* Multiply with the other accumulated orthogonal
564* matrices, which take the form
565*
566* [ U11 U12 0 ]
567* [ ]
568* U = [ U21 U22 0 ],
569* [ ]
570* [ 0 0 I ]
571*
572* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity
573* matrix, U21 is a LEN-by-LEN upper triangular matrix
574* and U12 is an NNB-by-NNB lower triangular matrix.
575*
576 ppwo = 1 + nblst*nblst
577 j0 = jrow - nnb
578 DO jrow = j0, jcol+1, -nnb
579 ppw = pw + len
580 DO i = jrow, jrow+nnb-1
581 work( ppw ) = a( i, j+1 )
582 ppw = ppw + 1
583 END DO
584 ppw = pw
585 DO i = jrow+nnb, jrow+nnb+len-1
586 work( ppw ) = a( i, j+1 )
587 ppw = ppw + 1
588 END DO
589 CALL strmv( 'Upper', 'Transpose', 'Non-unit',
590 $ len,
591 $ work( ppwo + nnb ), 2*nnb, work( pw ),
592 $ 1 )
593 CALL strmv( 'Lower', 'Transpose', 'Non-unit',
594 $ nnb,
595 $ work( ppwo + 2*len*nnb ),
596 $ 2*nnb, work( pw + len ), 1 )
597 CALL sgemv( 'Transpose', nnb, len, one,
598 $ work( ppwo ), 2*nnb, a( jrow, j+1 ), 1,
599 $ one, work( pw ), 1 )
600 CALL sgemv( 'Transpose', len, nnb, one,
601 $ work( ppwo + 2*len*nnb + nnb ), 2*nnb,
602 $ a( jrow+nnb, j+1 ), 1, one,
603 $ work( pw+len ), 1 )
604 ppw = pw
605 DO i = jrow, jrow+len+nnb-1
606 a( i, j+1 ) = work( ppw )
607 ppw = ppw + 1
608 END DO
609 ppwo = ppwo + 4*nnb*nnb
610 END DO
611 END IF
612 END DO
613*
614* Apply accumulated orthogonal matrices to A.
615*
616 cola = n - jcol - nnb + 1
617 j = ihi - nblst + 1
618 CALL sgemm( 'Transpose', 'No Transpose', nblst,
619 $ cola, nblst, one, work, nblst,
620 $ a( j, jcol+nnb ), lda, zero, work( pw ),
621 $ nblst )
622 CALL slacpy( 'All', nblst, cola, work( pw ), nblst,
623 $ a( j, jcol+nnb ), lda )
624 ppwo = nblst*nblst + 1
625 j0 = j - nnb
626 DO j = j0, jcol+1, -nnb
627 IF ( blk22 ) THEN
628*
629* Exploit the structure of
630*
631* [ U11 U12 ]
632* U = [ ]
633* [ U21 U22 ],
634*
635* where all blocks are NNB-by-NNB, U21 is upper
636* triangular and U12 is lower triangular.
637*
638 CALL sorm22( 'Left', 'Transpose', 2*nnb, cola, nnb,
639 $ nnb, work( ppwo ), 2*nnb,
640 $ a( j, jcol+nnb ), lda, work( pw ),
641 $ lwork-pw+1, ierr )
642 ELSE
643*
644* Ignore the structure of U.
645*
646 CALL sgemm( 'Transpose', 'No Transpose', 2*nnb,
647 $ cola, 2*nnb, one, work( ppwo ), 2*nnb,
648 $ a( j, jcol+nnb ), lda, zero, work( pw ),
649 $ 2*nnb )
650 CALL slacpy( 'All', 2*nnb, cola, work( pw ), 2*nnb,
651 $ a( j, jcol+nnb ), lda )
652 END IF
653 ppwo = ppwo + 4*nnb*nnb
654 END DO
655*
656* Apply accumulated orthogonal matrices to Q.
657*
658 IF( wantq ) THEN
659 j = ihi - nblst + 1
660 IF ( initq ) THEN
661 topq = max( 2, j - jcol + 1 )
662 nh = ihi - topq + 1
663 ELSE
664 topq = 1
665 nh = n
666 END IF
667 CALL sgemm( 'No Transpose', 'No Transpose', nh,
668 $ nblst, nblst, one, q( topq, j ), ldq,
669 $ work, nblst, zero, work( pw ), nh )
670 CALL slacpy( 'All', nh, nblst, work( pw ), nh,
671 $ q( topq, j ), ldq )
672 ppwo = nblst*nblst + 1
673 j0 = j - nnb
674 DO j = j0, jcol+1, -nnb
675 IF ( initq ) THEN
676 topq = max( 2, j - jcol + 1 )
677 nh = ihi - topq + 1
678 END IF
679 IF ( blk22 ) THEN
680*
681* Exploit the structure of U.
682*
683 CALL sorm22( 'Right', 'No Transpose', nh, 2*nnb,
684 $ nnb, nnb, work( ppwo ), 2*nnb,
685 $ q( topq, j ), ldq, work( pw ),
686 $ lwork-pw+1, ierr )
687 ELSE
688*
689* Ignore the structure of U.
690*
691 CALL sgemm( 'No Transpose', 'No Transpose', nh,
692 $ 2*nnb, 2*nnb, one, q( topq, j ), ldq,
693 $ work( ppwo ), 2*nnb, zero, work( pw ),
694 $ nh )
695 CALL slacpy( 'All', nh, 2*nnb, work( pw ), nh,
696 $ q( topq, j ), ldq )
697 END IF
698 ppwo = ppwo + 4*nnb*nnb
699 END DO
700 END IF
701*
702* Accumulate right Givens rotations if required.
703*
704 IF ( wantz .OR. top.GT.0 ) THEN
705*
706* Initialize small orthogonal factors that will hold the
707* accumulated Givens rotations in workspace.
708*
709 CALL slaset( 'All', nblst, nblst, zero, one, work,
710 $ nblst )
711 pw = nblst * nblst + 1
712 DO i = 1, n2nb
713 CALL slaset( 'All', 2*nnb, 2*nnb, zero, one,
714 $ work( pw ), 2*nnb )
715 pw = pw + 4*nnb*nnb
716 END DO
717*
718* Accumulate Givens rotations into workspace array.
719*
720 DO j = jcol, jcol+nnb-1
721 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
722 len = 2 + j - jcol
723 jrow = j + n2nb*nnb + 2
724 DO i = ihi, jrow, -1
725 c = a( i, j )
726 a( i, j ) = zero
727 s = b( i, j )
728 b( i, j ) = zero
729 DO jj = ppw, ppw+len-1
730 temp = work( jj + nblst )
731 work( jj + nblst ) = c*temp - s*work( jj )
732 work( jj ) = s*temp + c*work( jj )
733 END DO
734 len = len + 1
735 ppw = ppw - nblst - 1
736 END DO
737*
738 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
739 j0 = jrow - nnb
740 DO jrow = j0, j+2, -nnb
741 ppw = ppwo
742 len = 2 + j - jcol
743 DO i = jrow+nnb-1, jrow, -1
744 c = a( i, j )
745 a( i, j ) = zero
746 s = b( i, j )
747 b( i, j ) = zero
748 DO jj = ppw, ppw+len-1
749 temp = work( jj + 2*nnb )
750 work( jj + 2*nnb ) = c*temp - s*work( jj )
751 work( jj ) = s*temp + c*work( jj )
752 END DO
753 len = len + 1
754 ppw = ppw - 2*nnb - 1
755 END DO
756 ppwo = ppwo + 4*nnb*nnb
757 END DO
758 END DO
759 ELSE
760*
761 CALL slaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
762 $ a( jcol + 2, jcol ), lda )
763 CALL slaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
764 $ b( jcol + 2, jcol ), ldb )
765 END IF
766*
767* Apply accumulated orthogonal matrices to A and B.
768*
769 IF ( top.GT.0 ) THEN
770 j = ihi - nblst + 1
771 CALL sgemm( 'No Transpose', 'No Transpose', top,
772 $ nblst, nblst, one, a( 1, j ), lda,
773 $ work, nblst, zero, work( pw ), top )
774 CALL slacpy( 'All', top, nblst, work( pw ), top,
775 $ a( 1, j ), lda )
776 ppwo = nblst*nblst + 1
777 j0 = j - nnb
778 DO j = j0, jcol+1, -nnb
779 IF ( blk22 ) THEN
780*
781* Exploit the structure of U.
782*
783 CALL sorm22( 'Right', 'No Transpose', top,
784 $ 2*nnb,
785 $ nnb, nnb, work( ppwo ), 2*nnb,
786 $ a( 1, j ), lda, work( pw ),
787 $ lwork-pw+1, ierr )
788 ELSE
789*
790* Ignore the structure of U.
791*
792 CALL sgemm( 'No Transpose', 'No Transpose', top,
793 $ 2*nnb, 2*nnb, one, a( 1, j ), lda,
794 $ work( ppwo ), 2*nnb, zero,
795 $ work( pw ), top )
796 CALL slacpy( 'All', top, 2*nnb, work( pw ), top,
797 $ a( 1, j ), lda )
798 END IF
799 ppwo = ppwo + 4*nnb*nnb
800 END DO
801*
802 j = ihi - nblst + 1
803 CALL sgemm( 'No Transpose', 'No Transpose', top,
804 $ nblst, nblst, one, b( 1, j ), ldb,
805 $ work, nblst, zero, work( pw ), top )
806 CALL slacpy( 'All', top, nblst, work( pw ), top,
807 $ b( 1, j ), ldb )
808 ppwo = nblst*nblst + 1
809 j0 = j - nnb
810 DO j = j0, jcol+1, -nnb
811 IF ( blk22 ) THEN
812*
813* Exploit the structure of U.
814*
815 CALL sorm22( 'Right', 'No Transpose', top,
816 $ 2*nnb,
817 $ nnb, nnb, work( ppwo ), 2*nnb,
818 $ b( 1, j ), ldb, work( pw ),
819 $ lwork-pw+1, ierr )
820 ELSE
821*
822* Ignore the structure of U.
823*
824 CALL sgemm( 'No Transpose', 'No Transpose', top,
825 $ 2*nnb, 2*nnb, one, b( 1, j ), ldb,
826 $ work( ppwo ), 2*nnb, zero,
827 $ work( pw ), top )
828 CALL slacpy( 'All', top, 2*nnb, work( pw ), top,
829 $ b( 1, j ), ldb )
830 END IF
831 ppwo = ppwo + 4*nnb*nnb
832 END DO
833 END IF
834*
835* Apply accumulated orthogonal matrices to Z.
836*
837 IF( wantz ) THEN
838 j = ihi - nblst + 1
839 IF ( initq ) THEN
840 topq = max( 2, j - jcol + 1 )
841 nh = ihi - topq + 1
842 ELSE
843 topq = 1
844 nh = n
845 END IF
846 CALL sgemm( 'No Transpose', 'No Transpose', nh,
847 $ nblst, nblst, one, z( topq, j ), ldz,
848 $ work, nblst, zero, work( pw ), nh )
849 CALL slacpy( 'All', nh, nblst, work( pw ), nh,
850 $ z( topq, j ), ldz )
851 ppwo = nblst*nblst + 1
852 j0 = j - nnb
853 DO j = j0, jcol+1, -nnb
854 IF ( initq ) THEN
855 topq = max( 2, j - jcol + 1 )
856 nh = ihi - topq + 1
857 END IF
858 IF ( blk22 ) THEN
859*
860* Exploit the structure of U.
861*
862 CALL sorm22( 'Right', 'No Transpose', nh, 2*nnb,
863 $ nnb, nnb, work( ppwo ), 2*nnb,
864 $ z( topq, j ), ldz, work( pw ),
865 $ lwork-pw+1, ierr )
866 ELSE
867*
868* Ignore the structure of U.
869*
870 CALL sgemm( 'No Transpose', 'No Transpose', nh,
871 $ 2*nnb, 2*nnb, one, z( topq, j ), ldz,
872 $ work( ppwo ), 2*nnb, zero, work( pw ),
873 $ nh )
874 CALL slacpy( 'All', nh, 2*nnb, work( pw ), nh,
875 $ z( topq, j ), ldz )
876 END IF
877 ppwo = ppwo + 4*nnb*nnb
878 END DO
879 END IF
880 END DO
881 END IF
882*
883* Use unblocked code to reduce the rest of the matrix
884* Avoid re-initialization of modified Q and Z.
885*
886 compq2 = compq
887 compz2 = compz
888 IF ( jcol.NE.ilo ) THEN
889 IF ( wantq )
890 $ compq2 = 'V'
891 IF ( wantz )
892 $ compz2 = 'V'
893 END IF
894*
895 IF ( jcol.LT.ihi )
896 $ CALL sgghrd( compq2, compz2, n, jcol, ihi, a, lda, b, ldb,
897 $ q,
898 $ ldq, z, ldz, ierr )
899*
900 work( 1 ) = sroundup_lwork( lwkopt )
901*
902 RETURN
903*
904* End of SGGHD3
905*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:188
subroutine sgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
SGEMV
Definition sgemv.f:158
subroutine sgghrd(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, info)
SGGHRD
Definition sgghrd.f:206
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:160
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
Definition slacpy.f:101
subroutine slartg(f, g, c, s, r)
SLARTG generates a plane rotation with real cosine and real sine.
Definition slartg.f90:111
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:108
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
subroutine srot(n, sx, incx, sy, incy, c, s)
SROT
Definition srot.f:92
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine strmv(uplo, trans, diag, n, a, lda, x, incx)
STRMV
Definition strmv.f:147
subroutine sorm22(side, trans, m, n, n1, n2, q, ldq, c, ldc, work, lwork, info)
SORM22 multiplies a general matrix by a banded orthogonal matrix.
Definition sorm22.f:161
Here is the call graph for this function:
Here is the caller graph for this function: