LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
sgghd3.f
Go to the documentation of this file.
1*> \brief \b SGGHD3
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> Download SGGHD3 + dependencies
9*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgghd3.f">
10*> [TGZ]</a>
11*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgghd3.f">
12*> [ZIP]</a>
13*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgghd3.f">
14*> [TXT]</a>
15*
16* Definition:
17* ===========
18*
19* SUBROUTINE SGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
20* LDQ, Z, LDZ, WORK, LWORK, INFO )
21*
22* .. Scalar Arguments ..
23* CHARACTER COMPQ, COMPZ
24* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
25* ..
26* .. Array Arguments ..
27* REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
28* $ Z( LDZ, * ), WORK( * )
29* ..
30*
31*
32*> \par Purpose:
33* =============
34*>
35*> \verbatim
36*>
37*> SGGHD3 reduces a pair of real matrices (A,B) to generalized upper
38*> Hessenberg form using orthogonal transformations, where A is a
39*> general matrix and B is upper triangular. The form of the
40*> generalized eigenvalue problem is
41*> A*x = lambda*B*x,
42*> and B is typically made upper triangular by computing its QR
43*> factorization and moving the orthogonal matrix Q to the left side
44*> of the equation.
45*>
46*> This subroutine simultaneously reduces A to a Hessenberg matrix H:
47*> Q**T*A*Z = H
48*> and transforms B to another upper triangular matrix T:
49*> Q**T*B*Z = T
50*> in order to reduce the problem to its standard form
51*> H*y = lambda*T*y
52*> where y = Z**T*x.
53*>
54*> The orthogonal matrices Q and Z are determined as products of Givens
55*> rotations. They may either be formed explicitly, or they may be
56*> postmultiplied into input matrices Q1 and Z1, so that
57*>
58*> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T
59*>
60*> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T
61*>
62*> If Q1 is the orthogonal matrix from the QR factorization of B in the
63*> original equation A*x = lambda*B*x, then SGGHD3 reduces the original
64*> problem to generalized Hessenberg form.
65*>
66*> This is a blocked variant of SGGHRD, using matrix-matrix
67*> multiplications for parts of the computation to enhance performance.
68*> \endverbatim
69*
70* Arguments:
71* ==========
72*
73*> \param[in] COMPQ
74*> \verbatim
75*> COMPQ is CHARACTER*1
76*> = 'N': do not compute Q;
77*> = 'I': Q is initialized to the unit matrix, and the
78*> orthogonal matrix Q is returned;
79*> = 'V': Q must contain an orthogonal matrix Q1 on entry,
80*> and the product Q1*Q is returned.
81*> \endverbatim
82*>
83*> \param[in] COMPZ
84*> \verbatim
85*> COMPZ is CHARACTER*1
86*> = 'N': do not compute Z;
87*> = 'I': Z is initialized to the unit matrix, and the
88*> orthogonal matrix Z is returned;
89*> = 'V': Z must contain an orthogonal matrix Z1 on entry,
90*> and the product Z1*Z is returned.
91*> \endverbatim
92*>
93*> \param[in] N
94*> \verbatim
95*> N is INTEGER
96*> The order of the matrices A and B. N >= 0.
97*> \endverbatim
98*>
99*> \param[in] ILO
100*> \verbatim
101*> ILO is INTEGER
102*> \endverbatim
103*>
104*> \param[in] IHI
105*> \verbatim
106*> IHI is INTEGER
107*>
108*> ILO and IHI mark the rows and columns of A which are to be
109*> reduced. It is assumed that A is already upper triangular
110*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are
111*> normally set by a previous call to SGGBAL; otherwise they
112*> should be set to 1 and N respectively.
113*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
114*> \endverbatim
115*>
116*> \param[in,out] A
117*> \verbatim
118*> A is REAL array, dimension (LDA, N)
119*> On entry, the N-by-N general matrix to be reduced.
120*> On exit, the upper triangle and the first subdiagonal of A
121*> are overwritten with the upper Hessenberg matrix H, and the
122*> rest is set to zero.
123*> \endverbatim
124*>
125*> \param[in] LDA
126*> \verbatim
127*> LDA is INTEGER
128*> The leading dimension of the array A. LDA >= max(1,N).
129*> \endverbatim
130*>
131*> \param[in,out] B
132*> \verbatim
133*> B is REAL array, dimension (LDB, N)
134*> On entry, the N-by-N upper triangular matrix B.
135*> On exit, the upper triangular matrix T = Q**T B Z. The
136*> elements below the diagonal are set to zero.
137*> \endverbatim
138*>
139*> \param[in] LDB
140*> \verbatim
141*> LDB is INTEGER
142*> The leading dimension of the array B. LDB >= max(1,N).
143*> \endverbatim
144*>
145*> \param[in,out] Q
146*> \verbatim
147*> Q is REAL array, dimension (LDQ, N)
148*> On entry, if COMPQ = 'V', the orthogonal matrix Q1,
149*> typically from the QR factorization of B.
150*> On exit, if COMPQ='I', the orthogonal matrix Q, and if
151*> COMPQ = 'V', the product Q1*Q.
152*> Not referenced if COMPQ='N'.
153*> \endverbatim
154*>
155*> \param[in] LDQ
156*> \verbatim
157*> LDQ is INTEGER
158*> The leading dimension of the array Q.
159*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
160*> \endverbatim
161*>
162*> \param[in,out] Z
163*> \verbatim
164*> Z is REAL array, dimension (LDZ, N)
165*> On entry, if COMPZ = 'V', the orthogonal matrix Z1.
166*> On exit, if COMPZ='I', the orthogonal matrix Z, and if
167*> COMPZ = 'V', the product Z1*Z.
168*> Not referenced if COMPZ='N'.
169*> \endverbatim
170*>
171*> \param[in] LDZ
172*> \verbatim
173*> LDZ is INTEGER
174*> The leading dimension of the array Z.
175*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
176*> \endverbatim
177*>
178*> \param[out] WORK
179*> \verbatim
180*> WORK is REAL array, dimension (MAX(1,LWORK))
181*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
182*> \endverbatim
183*>
184*> \param[in] LWORK
185*> \verbatim
186*> LWORK is INTEGER
187*> The length of the array WORK. LWORK >= 1.
188*> For optimum performance LWORK >= 6*N*NB, where NB is the
189*> optimal blocksize.
190*>
191*> If LWORK = -1, then a workspace query is assumed; the routine
192*> only calculates the optimal size of the WORK array, returns
193*> this value as the first entry of the WORK array, and no error
194*> message related to LWORK is issued by XERBLA.
195*> \endverbatim
196*>
197*> \param[out] INFO
198*> \verbatim
199*> INFO is INTEGER
200*> = 0: successful exit.
201*> < 0: if INFO = -i, the i-th argument had an illegal value.
202*> \endverbatim
203*
204* Authors:
205* ========
206*
207*> \author Univ. of Tennessee
208*> \author Univ. of California Berkeley
209*> \author Univ. of Colorado Denver
210*> \author NAG Ltd.
211*
212*> \ingroup gghd3
213*
214*> \par Further Details:
215* =====================
216*>
217*> \verbatim
218*>
219*> This routine reduces A to Hessenberg form and maintains B in triangular form
220*> using a blocked variant of Moler and Stewart's original algorithm,
221*> as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti
222*> (BIT 2008).
223*> \endverbatim
224*>
225* =====================================================================
226 SUBROUTINE sgghd3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB,
227 $ Q,
228 $ LDQ, Z, LDZ, WORK, LWORK, INFO )
229*
230* -- LAPACK computational routine --
231* -- LAPACK is a software package provided by Univ. of Tennessee, --
232* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
233*
234 IMPLICIT NONE
235*
236* .. Scalar Arguments ..
237 CHARACTER COMPQ, COMPZ
238 INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
239* ..
240* .. Array Arguments ..
241 REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
242 $ Z( LDZ, * ), WORK( * )
243* ..
244*
245* =====================================================================
246*
247* .. Parameters ..
248 REAL ZERO, ONE
249 PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
250* ..
251* .. Local Scalars ..
252 LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
253 CHARACTER*1 COMPQ2, COMPZ2
254 INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
255 $ kacc22, len, lwkopt, n2nb, nb, nblst, nbmin,
256 $ nh, nnb, nx, ppw, ppwo, pw, top, topq
257 REAL C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
258* ..
259* .. External Functions ..
260 LOGICAL LSAME
261 INTEGER ILAENV
262 REAL SROUNDUP_LWORK
263 EXTERNAL ilaenv, lsame, sroundup_lwork
264* ..
265* .. External Subroutines ..
266 EXTERNAL sgghrd, slartg, slaset, sorm22, srot,
267 $ sgemm,
269* ..
270* .. Intrinsic Functions ..
271 INTRINSIC max
272* ..
273* .. Executable Statements ..
274*
275* Decode and test the input parameters.
276*
277 info = 0
278 nb = ilaenv( 1, 'SGGHD3', ' ', n, ilo, ihi, -1 )
279 nh = ihi - ilo + 1
280 IF( nh.LE.1 ) THEN
281 lwkopt = 1
282 ELSE
283 lwkopt = 6*n*nb
284 END IF
285 work( 1 ) = sroundup_lwork( lwkopt )
286 initq = lsame( compq, 'I' )
287 wantq = initq .OR. lsame( compq, 'V' )
288 initz = lsame( compz, 'I' )
289 wantz = initz .OR. lsame( compz, 'V' )
290 lquery = ( lwork.EQ.-1 )
291*
292 IF( .NOT.lsame( compq, 'N' ) .AND. .NOT.wantq ) THEN
293 info = -1
294 ELSE IF( .NOT.lsame( compz, 'N' ) .AND. .NOT.wantz ) THEN
295 info = -2
296 ELSE IF( n.LT.0 ) THEN
297 info = -3
298 ELSE IF( ilo.LT.1 ) THEN
299 info = -4
300 ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN
301 info = -5
302 ELSE IF( lda.LT.max( 1, n ) ) THEN
303 info = -7
304 ELSE IF( ldb.LT.max( 1, n ) ) THEN
305 info = -9
306 ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN
307 info = -11
308 ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN
309 info = -13
310 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
311 info = -15
312 END IF
313 IF( info.NE.0 ) THEN
314 CALL xerbla( 'SGGHD3', -info )
315 RETURN
316 ELSE IF( lquery ) THEN
317 RETURN
318 END IF
319*
320* Initialize Q and Z if desired.
321*
322 IF( initq )
323 $ CALL slaset( 'All', n, n, zero, one, q, ldq )
324 IF( initz )
325 $ CALL slaset( 'All', n, n, zero, one, z, ldz )
326*
327* Zero out lower triangle of B.
328*
329 IF( n.GT.1 )
330 $ CALL slaset( 'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )
331*
332* Quick return if possible
333*
334 IF( nh.LE.1 ) THEN
335 work( 1 ) = one
336 RETURN
337 END IF
338*
339* Determine the blocksize.
340*
341 nbmin = ilaenv( 2, 'SGGHD3', ' ', n, ilo, ihi, -1 )
342 IF( nb.GT.1 .AND. nb.LT.nh ) THEN
343*
344* Determine when to use unblocked instead of blocked code.
345*
346 nx = max( nb, ilaenv( 3, 'SGGHD3', ' ', n, ilo, ihi, -1 ) )
347 IF( nx.LT.nh ) THEN
348*
349* Determine if workspace is large enough for blocked code.
350*
351 IF( lwork.LT.lwkopt ) THEN
352*
353* Not enough workspace to use optimal NB: determine the
354* minimum value of NB, and reduce NB or force use of
355* unblocked code.
356*
357 nbmin = max( 2, ilaenv( 2, 'SGGHD3', ' ', n, ilo, ihi,
358 $ -1 ) )
359 IF( lwork.GE.6*n*nbmin ) THEN
360 nb = lwork / ( 6*n )
361 ELSE
362 nb = 1
363 END IF
364 END IF
365 END IF
366 END IF
367*
368 IF( nb.LT.nbmin .OR. nb.GE.nh ) THEN
369*
370* Use unblocked code below
371*
372 jcol = ilo
373*
374 ELSE
375*
376* Use blocked code
377*
378 kacc22 = ilaenv( 16, 'SGGHD3', ' ', n, ilo, ihi, -1 )
379 blk22 = kacc22.EQ.2
380 DO jcol = ilo, ihi-2, nb
381 nnb = min( nb, ihi-jcol-1 )
382*
383* Initialize small orthogonal factors that will hold the
384* accumulated Givens rotations in workspace.
385* N2NB denotes the number of 2*NNB-by-2*NNB factors
386* NBLST denotes the (possibly smaller) order of the last
387* factor.
388*
389 n2nb = ( ihi-jcol-1 ) / nnb - 1
390 nblst = ihi - jcol - n2nb*nnb
391 CALL slaset( 'All', nblst, nblst, zero, one, work,
392 $ nblst )
393 pw = nblst * nblst + 1
394 DO i = 1, n2nb
395 CALL slaset( 'All', 2*nnb, 2*nnb, zero, one,
396 $ work( pw ), 2*nnb )
397 pw = pw + 4*nnb*nnb
398 END DO
399*
400* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form.
401*
402 DO j = jcol, jcol+nnb-1
403*
404* Reduce Jth column of A. Store cosines and sines in Jth
405* column of A and B, respectively.
406*
407 DO i = ihi, j+2, -1
408 temp = a( i-1, j )
409 CALL slartg( temp, a( i, j ), c, s, a( i-1, j ) )
410 a( i, j ) = c
411 b( i, j ) = s
412 END DO
413*
414* Accumulate Givens rotations into workspace array.
415*
416 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
417 len = 2 + j - jcol
418 jrow = j + n2nb*nnb + 2
419 DO i = ihi, jrow, -1
420 c = a( i, j )
421 s = b( i, j )
422 DO jj = ppw, ppw+len-1
423 temp = work( jj + nblst )
424 work( jj + nblst ) = c*temp - s*work( jj )
425 work( jj ) = s*temp + c*work( jj )
426 END DO
427 len = len + 1
428 ppw = ppw - nblst - 1
429 END DO
430*
431 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
432 j0 = jrow - nnb
433 DO jrow = j0, j+2, -nnb
434 ppw = ppwo
435 len = 2 + j - jcol
436 DO i = jrow+nnb-1, jrow, -1
437 c = a( i, j )
438 s = b( i, j )
439 DO jj = ppw, ppw+len-1
440 temp = work( jj + 2*nnb )
441 work( jj + 2*nnb ) = c*temp - s*work( jj )
442 work( jj ) = s*temp + c*work( jj )
443 END DO
444 len = len + 1
445 ppw = ppw - 2*nnb - 1
446 END DO
447 ppwo = ppwo + 4*nnb*nnb
448 END DO
449*
450* TOP denotes the number of top rows in A and B that will
451* not be updated during the next steps.
452*
453 IF( jcol.LE.2 ) THEN
454 top = 0
455 ELSE
456 top = jcol
457 END IF
458*
459* Propagate transformations through B and replace stored
460* left sines/cosines by right sines/cosines.
461*
462 DO jj = n, j+1, -1
463*
464* Update JJth column of B.
465*
466 DO i = min( jj+1, ihi ), j+2, -1
467 c = a( i, j )
468 s = b( i, j )
469 temp = b( i, jj )
470 b( i, jj ) = c*temp - s*b( i-1, jj )
471 b( i-1, jj ) = s*temp + c*b( i-1, jj )
472 END DO
473*
474* Annihilate B( JJ+1, JJ ).
475*
476 IF( jj.LT.ihi ) THEN
477 temp = b( jj+1, jj+1 )
478 CALL slartg( temp, b( jj+1, jj ), c, s,
479 $ b( jj+1, jj+1 ) )
480 b( jj+1, jj ) = zero
481 CALL srot( jj-top, b( top+1, jj+1 ), 1,
482 $ b( top+1, jj ), 1, c, s )
483 a( jj+1, j ) = c
484 b( jj+1, j ) = -s
485 END IF
486 END DO
487*
488* Update A by transformations from right.
489* Explicit loop unrolling provides better performance
490* compared to SLASR.
491* CALL SLASR( 'Right', 'Variable', 'Backward', IHI-TOP,
492* $ IHI-J, A( J+2, J ), B( J+2, J ),
493* $ A( TOP+1, J+1 ), LDA )
494*
495 jj = mod( ihi-j-1, 3 )
496 DO i = ihi-j-3, jj+1, -3
497 c = a( j+1+i, j )
498 s = -b( j+1+i, j )
499 c1 = a( j+2+i, j )
500 s1 = -b( j+2+i, j )
501 c2 = a( j+3+i, j )
502 s2 = -b( j+3+i, j )
503*
504 DO k = top+1, ihi
505 temp = a( k, j+i )
506 temp1 = a( k, j+i+1 )
507 temp2 = a( k, j+i+2 )
508 temp3 = a( k, j+i+3 )
509 a( k, j+i+3 ) = c2*temp3 + s2*temp2
510 temp2 = -s2*temp3 + c2*temp2
511 a( k, j+i+2 ) = c1*temp2 + s1*temp1
512 temp1 = -s1*temp2 + c1*temp1
513 a( k, j+i+1 ) = c*temp1 + s*temp
514 a( k, j+i ) = -s*temp1 + c*temp
515 END DO
516 END DO
517*
518 IF( jj.GT.0 ) THEN
519 DO i = jj, 1, -1
520 CALL srot( ihi-top, a( top+1, j+i+1 ), 1,
521 $ a( top+1, j+i ), 1, a( j+1+i, j ),
522 $ -b( j+1+i, j ) )
523 END DO
524 END IF
525*
526* Update (J+1)th column of A by transformations from left.
527*
528 IF ( j .LT. jcol + nnb - 1 ) THEN
529 len = 1 + j - jcol
530*
531* Multiply with the trailing accumulated orthogonal
532* matrix, which takes the form
533*
534* [ U11 U12 ]
535* U = [ ],
536* [ U21 U22 ]
537*
538* where U21 is a LEN-by-LEN matrix and U12 is lower
539* triangular.
540*
541 jrow = ihi - nblst + 1
542 CALL sgemv( 'Transpose', nblst, len, one, work,
543 $ nblst, a( jrow, j+1 ), 1, zero,
544 $ work( pw ), 1 )
545 ppw = pw + len
546 DO i = jrow, jrow+nblst-len-1
547 work( ppw ) = a( i, j+1 )
548 ppw = ppw + 1
549 END DO
550 CALL strmv( 'Lower', 'Transpose', 'Non-unit',
551 $ nblst-len, work( len*nblst + 1 ), nblst,
552 $ work( pw+len ), 1 )
553 CALL sgemv( 'Transpose', len, nblst-len, one,
554 $ work( (len+1)*nblst - len + 1 ), nblst,
555 $ a( jrow+nblst-len, j+1 ), 1, one,
556 $ work( pw+len ), 1 )
557 ppw = pw
558 DO i = jrow, jrow+nblst-1
559 a( i, j+1 ) = work( ppw )
560 ppw = ppw + 1
561 END DO
562*
563* Multiply with the other accumulated orthogonal
564* matrices, which take the form
565*
566* [ U11 U12 0 ]
567* [ ]
568* U = [ U21 U22 0 ],
569* [ ]
570* [ 0 0 I ]
571*
572* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity
573* matrix, U21 is a LEN-by-LEN upper triangular matrix
574* and U12 is an NNB-by-NNB lower triangular matrix.
575*
576 ppwo = 1 + nblst*nblst
577 j0 = jrow - nnb
578 DO jrow = j0, jcol+1, -nnb
579 ppw = pw + len
580 DO i = jrow, jrow+nnb-1
581 work( ppw ) = a( i, j+1 )
582 ppw = ppw + 1
583 END DO
584 ppw = pw
585 DO i = jrow+nnb, jrow+nnb+len-1
586 work( ppw ) = a( i, j+1 )
587 ppw = ppw + 1
588 END DO
589 CALL strmv( 'Upper', 'Transpose', 'Non-unit',
590 $ len,
591 $ work( ppwo + nnb ), 2*nnb, work( pw ),
592 $ 1 )
593 CALL strmv( 'Lower', 'Transpose', 'Non-unit',
594 $ nnb,
595 $ work( ppwo + 2*len*nnb ),
596 $ 2*nnb, work( pw + len ), 1 )
597 CALL sgemv( 'Transpose', nnb, len, one,
598 $ work( ppwo ), 2*nnb, a( jrow, j+1 ), 1,
599 $ one, work( pw ), 1 )
600 CALL sgemv( 'Transpose', len, nnb, one,
601 $ work( ppwo + 2*len*nnb + nnb ), 2*nnb,
602 $ a( jrow+nnb, j+1 ), 1, one,
603 $ work( pw+len ), 1 )
604 ppw = pw
605 DO i = jrow, jrow+len+nnb-1
606 a( i, j+1 ) = work( ppw )
607 ppw = ppw + 1
608 END DO
609 ppwo = ppwo + 4*nnb*nnb
610 END DO
611 END IF
612 END DO
613*
614* Apply accumulated orthogonal matrices to A.
615*
616 cola = n - jcol - nnb + 1
617 j = ihi - nblst + 1
618 CALL sgemm( 'Transpose', 'No Transpose', nblst,
619 $ cola, nblst, one, work, nblst,
620 $ a( j, jcol+nnb ), lda, zero, work( pw ),
621 $ nblst )
622 CALL slacpy( 'All', nblst, cola, work( pw ), nblst,
623 $ a( j, jcol+nnb ), lda )
624 ppwo = nblst*nblst + 1
625 j0 = j - nnb
626 DO j = j0, jcol+1, -nnb
627 IF ( blk22 ) THEN
628*
629* Exploit the structure of
630*
631* [ U11 U12 ]
632* U = [ ]
633* [ U21 U22 ],
634*
635* where all blocks are NNB-by-NNB, U21 is upper
636* triangular and U12 is lower triangular.
637*
638 CALL sorm22( 'Left', 'Transpose', 2*nnb, cola, nnb,
639 $ nnb, work( ppwo ), 2*nnb,
640 $ a( j, jcol+nnb ), lda, work( pw ),
641 $ lwork-pw+1, ierr )
642 ELSE
643*
644* Ignore the structure of U.
645*
646 CALL sgemm( 'Transpose', 'No Transpose', 2*nnb,
647 $ cola, 2*nnb, one, work( ppwo ), 2*nnb,
648 $ a( j, jcol+nnb ), lda, zero, work( pw ),
649 $ 2*nnb )
650 CALL slacpy( 'All', 2*nnb, cola, work( pw ), 2*nnb,
651 $ a( j, jcol+nnb ), lda )
652 END IF
653 ppwo = ppwo + 4*nnb*nnb
654 END DO
655*
656* Apply accumulated orthogonal matrices to Q.
657*
658 IF( wantq ) THEN
659 j = ihi - nblst + 1
660 IF ( initq ) THEN
661 topq = max( 2, j - jcol + 1 )
662 nh = ihi - topq + 1
663 ELSE
664 topq = 1
665 nh = n
666 END IF
667 CALL sgemm( 'No Transpose', 'No Transpose', nh,
668 $ nblst, nblst, one, q( topq, j ), ldq,
669 $ work, nblst, zero, work( pw ), nh )
670 CALL slacpy( 'All', nh, nblst, work( pw ), nh,
671 $ q( topq, j ), ldq )
672 ppwo = nblst*nblst + 1
673 j0 = j - nnb
674 DO j = j0, jcol+1, -nnb
675 IF ( initq ) THEN
676 topq = max( 2, j - jcol + 1 )
677 nh = ihi - topq + 1
678 END IF
679 IF ( blk22 ) THEN
680*
681* Exploit the structure of U.
682*
683 CALL sorm22( 'Right', 'No Transpose', nh, 2*nnb,
684 $ nnb, nnb, work( ppwo ), 2*nnb,
685 $ q( topq, j ), ldq, work( pw ),
686 $ lwork-pw+1, ierr )
687 ELSE
688*
689* Ignore the structure of U.
690*
691 CALL sgemm( 'No Transpose', 'No Transpose', nh,
692 $ 2*nnb, 2*nnb, one, q( topq, j ), ldq,
693 $ work( ppwo ), 2*nnb, zero, work( pw ),
694 $ nh )
695 CALL slacpy( 'All', nh, 2*nnb, work( pw ), nh,
696 $ q( topq, j ), ldq )
697 END IF
698 ppwo = ppwo + 4*nnb*nnb
699 END DO
700 END IF
701*
702* Accumulate right Givens rotations if required.
703*
704 IF ( wantz .OR. top.GT.0 ) THEN
705*
706* Initialize small orthogonal factors that will hold the
707* accumulated Givens rotations in workspace.
708*
709 CALL slaset( 'All', nblst, nblst, zero, one, work,
710 $ nblst )
711 pw = nblst * nblst + 1
712 DO i = 1, n2nb
713 CALL slaset( 'All', 2*nnb, 2*nnb, zero, one,
714 $ work( pw ), 2*nnb )
715 pw = pw + 4*nnb*nnb
716 END DO
717*
718* Accumulate Givens rotations into workspace array.
719*
720 DO j = jcol, jcol+nnb-1
721 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
722 len = 2 + j - jcol
723 jrow = j + n2nb*nnb + 2
724 DO i = ihi, jrow, -1
725 c = a( i, j )
726 a( i, j ) = zero
727 s = b( i, j )
728 b( i, j ) = zero
729 DO jj = ppw, ppw+len-1
730 temp = work( jj + nblst )
731 work( jj + nblst ) = c*temp - s*work( jj )
732 work( jj ) = s*temp + c*work( jj )
733 END DO
734 len = len + 1
735 ppw = ppw - nblst - 1
736 END DO
737*
738 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
739 j0 = jrow - nnb
740 DO jrow = j0, j+2, -nnb
741 ppw = ppwo
742 len = 2 + j - jcol
743 DO i = jrow+nnb-1, jrow, -1
744 c = a( i, j )
745 a( i, j ) = zero
746 s = b( i, j )
747 b( i, j ) = zero
748 DO jj = ppw, ppw+len-1
749 temp = work( jj + 2*nnb )
750 work( jj + 2*nnb ) = c*temp - s*work( jj )
751 work( jj ) = s*temp + c*work( jj )
752 END DO
753 len = len + 1
754 ppw = ppw - 2*nnb - 1
755 END DO
756 ppwo = ppwo + 4*nnb*nnb
757 END DO
758 END DO
759 ELSE
760*
761 CALL slaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
762 $ a( jcol + 2, jcol ), lda )
763 CALL slaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
764 $ b( jcol + 2, jcol ), ldb )
765 END IF
766*
767* Apply accumulated orthogonal matrices to A and B.
768*
769 IF ( top.GT.0 ) THEN
770 j = ihi - nblst + 1
771 CALL sgemm( 'No Transpose', 'No Transpose', top,
772 $ nblst, nblst, one, a( 1, j ), lda,
773 $ work, nblst, zero, work( pw ), top )
774 CALL slacpy( 'All', top, nblst, work( pw ), top,
775 $ a( 1, j ), lda )
776 ppwo = nblst*nblst + 1
777 j0 = j - nnb
778 DO j = j0, jcol+1, -nnb
779 IF ( blk22 ) THEN
780*
781* Exploit the structure of U.
782*
783 CALL sorm22( 'Right', 'No Transpose', top,
784 $ 2*nnb,
785 $ nnb, nnb, work( ppwo ), 2*nnb,
786 $ a( 1, j ), lda, work( pw ),
787 $ lwork-pw+1, ierr )
788 ELSE
789*
790* Ignore the structure of U.
791*
792 CALL sgemm( 'No Transpose', 'No Transpose', top,
793 $ 2*nnb, 2*nnb, one, a( 1, j ), lda,
794 $ work( ppwo ), 2*nnb, zero,
795 $ work( pw ), top )
796 CALL slacpy( 'All', top, 2*nnb, work( pw ), top,
797 $ a( 1, j ), lda )
798 END IF
799 ppwo = ppwo + 4*nnb*nnb
800 END DO
801*
802 j = ihi - nblst + 1
803 CALL sgemm( 'No Transpose', 'No Transpose', top,
804 $ nblst, nblst, one, b( 1, j ), ldb,
805 $ work, nblst, zero, work( pw ), top )
806 CALL slacpy( 'All', top, nblst, work( pw ), top,
807 $ b( 1, j ), ldb )
808 ppwo = nblst*nblst + 1
809 j0 = j - nnb
810 DO j = j0, jcol+1, -nnb
811 IF ( blk22 ) THEN
812*
813* Exploit the structure of U.
814*
815 CALL sorm22( 'Right', 'No Transpose', top,
816 $ 2*nnb,
817 $ nnb, nnb, work( ppwo ), 2*nnb,
818 $ b( 1, j ), ldb, work( pw ),
819 $ lwork-pw+1, ierr )
820 ELSE
821*
822* Ignore the structure of U.
823*
824 CALL sgemm( 'No Transpose', 'No Transpose', top,
825 $ 2*nnb, 2*nnb, one, b( 1, j ), ldb,
826 $ work( ppwo ), 2*nnb, zero,
827 $ work( pw ), top )
828 CALL slacpy( 'All', top, 2*nnb, work( pw ), top,
829 $ b( 1, j ), ldb )
830 END IF
831 ppwo = ppwo + 4*nnb*nnb
832 END DO
833 END IF
834*
835* Apply accumulated orthogonal matrices to Z.
836*
837 IF( wantz ) THEN
838 j = ihi - nblst + 1
839 IF ( initq ) THEN
840 topq = max( 2, j - jcol + 1 )
841 nh = ihi - topq + 1
842 ELSE
843 topq = 1
844 nh = n
845 END IF
846 CALL sgemm( 'No Transpose', 'No Transpose', nh,
847 $ nblst, nblst, one, z( topq, j ), ldz,
848 $ work, nblst, zero, work( pw ), nh )
849 CALL slacpy( 'All', nh, nblst, work( pw ), nh,
850 $ z( topq, j ), ldz )
851 ppwo = nblst*nblst + 1
852 j0 = j - nnb
853 DO j = j0, jcol+1, -nnb
854 IF ( initq ) THEN
855 topq = max( 2, j - jcol + 1 )
856 nh = ihi - topq + 1
857 END IF
858 IF ( blk22 ) THEN
859*
860* Exploit the structure of U.
861*
862 CALL sorm22( 'Right', 'No Transpose', nh, 2*nnb,
863 $ nnb, nnb, work( ppwo ), 2*nnb,
864 $ z( topq, j ), ldz, work( pw ),
865 $ lwork-pw+1, ierr )
866 ELSE
867*
868* Ignore the structure of U.
869*
870 CALL sgemm( 'No Transpose', 'No Transpose', nh,
871 $ 2*nnb, 2*nnb, one, z( topq, j ), ldz,
872 $ work( ppwo ), 2*nnb, zero, work( pw ),
873 $ nh )
874 CALL slacpy( 'All', nh, 2*nnb, work( pw ), nh,
875 $ z( topq, j ), ldz )
876 END IF
877 ppwo = ppwo + 4*nnb*nnb
878 END DO
879 END IF
880 END DO
881 END IF
882*
883* Use unblocked code to reduce the rest of the matrix
884* Avoid re-initialization of modified Q and Z.
885*
886 compq2 = compq
887 compz2 = compz
888 IF ( jcol.NE.ilo ) THEN
889 IF ( wantq )
890 $ compq2 = 'V'
891 IF ( wantz )
892 $ compz2 = 'V'
893 END IF
894*
895 IF ( jcol.LT.ihi )
896 $ CALL sgghrd( compq2, compz2, n, jcol, ihi, a, lda, b, ldb,
897 $ q,
898 $ ldq, z, ldz, ierr )
899*
900 work( 1 ) = sroundup_lwork( lwkopt )
901*
902 RETURN
903*
904* End of SGGHD3
905*
906 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:188
subroutine sgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
SGEMV
Definition sgemv.f:158
subroutine sgghd3(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, work, lwork, info)
SGGHD3
Definition sgghd3.f:229
subroutine sgghrd(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, info)
SGGHRD
Definition sgghrd.f:206
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
Definition slacpy.f:101
subroutine slartg(f, g, c, s, r)
SLARTG generates a plane rotation with real cosine and real sine.
Definition slartg.f90:111
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:108
subroutine srot(n, sx, incx, sy, incy, c, s)
SROT
Definition srot.f:92
subroutine strmv(uplo, trans, diag, n, a, lda, x, incx)
STRMV
Definition strmv.f:147
subroutine sorm22(side, trans, m, n, n1, n2, q, ldq, c, ldc, work, lwork, info)
SORM22 multiplies a general matrix by a banded orthogonal matrix.
Definition sorm22.f:161