LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
sgghd3.f
Go to the documentation of this file.
1*> \brief \b SGGHD3
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SGGHD3 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgghd3.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgghd3.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgghd3.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE SGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
22* LDQ, Z, LDZ, WORK, LWORK, INFO )
23*
24* .. Scalar Arguments ..
25* CHARACTER COMPQ, COMPZ
26* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
27* ..
28* .. Array Arguments ..
29* REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
30* $ Z( LDZ, * ), WORK( * )
31* ..
32*
33*
34*> \par Purpose:
35* =============
36*>
37*> \verbatim
38*>
39*> SGGHD3 reduces a pair of real matrices (A,B) to generalized upper
40*> Hessenberg form using orthogonal transformations, where A is a
41*> general matrix and B is upper triangular. The form of the
42*> generalized eigenvalue problem is
43*> A*x = lambda*B*x,
44*> and B is typically made upper triangular by computing its QR
45*> factorization and moving the orthogonal matrix Q to the left side
46*> of the equation.
47*>
48*> This subroutine simultaneously reduces A to a Hessenberg matrix H:
49*> Q**T*A*Z = H
50*> and transforms B to another upper triangular matrix T:
51*> Q**T*B*Z = T
52*> in order to reduce the problem to its standard form
53*> H*y = lambda*T*y
54*> where y = Z**T*x.
55*>
56*> The orthogonal matrices Q and Z are determined as products of Givens
57*> rotations. They may either be formed explicitly, or they may be
58*> postmultiplied into input matrices Q1 and Z1, so that
59*>
60*> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T
61*>
62*> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T
63*>
64*> If Q1 is the orthogonal matrix from the QR factorization of B in the
65*> original equation A*x = lambda*B*x, then SGGHD3 reduces the original
66*> problem to generalized Hessenberg form.
67*>
68*> This is a blocked variant of SGGHRD, using matrix-matrix
69*> multiplications for parts of the computation to enhance performance.
70*> \endverbatim
71*
72* Arguments:
73* ==========
74*
75*> \param[in] COMPQ
76*> \verbatim
77*> COMPQ is CHARACTER*1
78*> = 'N': do not compute Q;
79*> = 'I': Q is initialized to the unit matrix, and the
80*> orthogonal matrix Q is returned;
81*> = 'V': Q must contain an orthogonal matrix Q1 on entry,
82*> and the product Q1*Q is returned.
83*> \endverbatim
84*>
85*> \param[in] COMPZ
86*> \verbatim
87*> COMPZ is CHARACTER*1
88*> = 'N': do not compute Z;
89*> = 'I': Z is initialized to the unit matrix, and the
90*> orthogonal matrix Z is returned;
91*> = 'V': Z must contain an orthogonal matrix Z1 on entry,
92*> and the product Z1*Z is returned.
93*> \endverbatim
94*>
95*> \param[in] N
96*> \verbatim
97*> N is INTEGER
98*> The order of the matrices A and B. N >= 0.
99*> \endverbatim
100*>
101*> \param[in] ILO
102*> \verbatim
103*> ILO is INTEGER
104*> \endverbatim
105*>
106*> \param[in] IHI
107*> \verbatim
108*> IHI is INTEGER
109*>
110*> ILO and IHI mark the rows and columns of A which are to be
111*> reduced. It is assumed that A is already upper triangular
112*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are
113*> normally set by a previous call to SGGBAL; otherwise they
114*> should be set to 1 and N respectively.
115*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
116*> \endverbatim
117*>
118*> \param[in,out] A
119*> \verbatim
120*> A is REAL array, dimension (LDA, N)
121*> On entry, the N-by-N general matrix to be reduced.
122*> On exit, the upper triangle and the first subdiagonal of A
123*> are overwritten with the upper Hessenberg matrix H, and the
124*> rest is set to zero.
125*> \endverbatim
126*>
127*> \param[in] LDA
128*> \verbatim
129*> LDA is INTEGER
130*> The leading dimension of the array A. LDA >= max(1,N).
131*> \endverbatim
132*>
133*> \param[in,out] B
134*> \verbatim
135*> B is REAL array, dimension (LDB, N)
136*> On entry, the N-by-N upper triangular matrix B.
137*> On exit, the upper triangular matrix T = Q**T B Z. The
138*> elements below the diagonal are set to zero.
139*> \endverbatim
140*>
141*> \param[in] LDB
142*> \verbatim
143*> LDB is INTEGER
144*> The leading dimension of the array B. LDB >= max(1,N).
145*> \endverbatim
146*>
147*> \param[in,out] Q
148*> \verbatim
149*> Q is REAL array, dimension (LDQ, N)
150*> On entry, if COMPQ = 'V', the orthogonal matrix Q1,
151*> typically from the QR factorization of B.
152*> On exit, if COMPQ='I', the orthogonal matrix Q, and if
153*> COMPQ = 'V', the product Q1*Q.
154*> Not referenced if COMPQ='N'.
155*> \endverbatim
156*>
157*> \param[in] LDQ
158*> \verbatim
159*> LDQ is INTEGER
160*> The leading dimension of the array Q.
161*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
162*> \endverbatim
163*>
164*> \param[in,out] Z
165*> \verbatim
166*> Z is REAL array, dimension (LDZ, N)
167*> On entry, if COMPZ = 'V', the orthogonal matrix Z1.
168*> On exit, if COMPZ='I', the orthogonal matrix Z, and if
169*> COMPZ = 'V', the product Z1*Z.
170*> Not referenced if COMPZ='N'.
171*> \endverbatim
172*>
173*> \param[in] LDZ
174*> \verbatim
175*> LDZ is INTEGER
176*> The leading dimension of the array Z.
177*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
178*> \endverbatim
179*>
180*> \param[out] WORK
181*> \verbatim
182*> WORK is REAL array, dimension (LWORK)
183*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
184*> \endverbatim
185*>
186*> \param[in] LWORK
187*> \verbatim
188*> LWORK is INTEGER
189*> The length of the array WORK. LWORK >= 1.
190*> For optimum performance LWORK >= 6*N*NB, where NB is the
191*> optimal blocksize.
192*>
193*> If LWORK = -1, then a workspace query is assumed; the routine
194*> only calculates the optimal size of the WORK array, returns
195*> this value as the first entry of the WORK array, and no error
196*> message related to LWORK is issued by XERBLA.
197*> \endverbatim
198*>
199*> \param[out] INFO
200*> \verbatim
201*> INFO is INTEGER
202*> = 0: successful exit.
203*> < 0: if INFO = -i, the i-th argument had an illegal value.
204*> \endverbatim
205*
206* Authors:
207* ========
208*
209*> \author Univ. of Tennessee
210*> \author Univ. of California Berkeley
211*> \author Univ. of Colorado Denver
212*> \author NAG Ltd.
213*
214*> \ingroup gghd3
215*
216*> \par Further Details:
217* =====================
218*>
219*> \verbatim
220*>
221*> This routine reduces A to Hessenberg form and maintains B in triangular form
222*> using a blocked variant of Moler and Stewart's original algorithm,
223*> as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti
224*> (BIT 2008).
225*> \endverbatim
226*>
227* =====================================================================
228 SUBROUTINE sgghd3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
229 $ LDQ, Z, LDZ, WORK, LWORK, INFO )
230*
231* -- LAPACK computational routine --
232* -- LAPACK is a software package provided by Univ. of Tennessee, --
233* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
234*
235 IMPLICIT NONE
236*
237* .. Scalar Arguments ..
238 CHARACTER COMPQ, COMPZ
239 INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
240* ..
241* .. Array Arguments ..
242 REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
243 $ z( ldz, * ), work( * )
244* ..
245*
246* =====================================================================
247*
248* .. Parameters ..
249 REAL ZERO, ONE
250 parameter( zero = 0.0e+0, one = 1.0e+0 )
251* ..
252* .. Local Scalars ..
253 LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
254 CHARACTER*1 COMPQ2, COMPZ2
255 INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
256 $ kacc22, len, lwkopt, n2nb, nb, nblst, nbmin,
257 $ nh, nnb, nx, ppw, ppwo, pw, top, topq
258 REAL C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
259* ..
260* .. External Functions ..
261 LOGICAL LSAME
262 INTEGER ILAENV
263 REAL SROUNDUP_LWORK
264 EXTERNAL ilaenv, lsame, sroundup_lwork
265* ..
266* .. External Subroutines ..
267 EXTERNAL sgghrd, slartg, slaset, sorm22, srot, sgemm,
269* ..
270* .. Intrinsic Functions ..
271 INTRINSIC max
272* ..
273* .. Executable Statements ..
274*
275* Decode and test the input parameters.
276*
277 info = 0
278 nb = ilaenv( 1, 'SGGHD3', ' ', n, ilo, ihi, -1 )
279 lwkopt = max( 6*n*nb, 1 )
280 work( 1 ) = sroundup_lwork( lwkopt )
281 initq = lsame( compq, 'I' )
282 wantq = initq .OR. lsame( compq, 'V' )
283 initz = lsame( compz, 'I' )
284 wantz = initz .OR. lsame( compz, 'V' )
285 lquery = ( lwork.EQ.-1 )
286*
287 IF( .NOT.lsame( compq, 'N' ) .AND. .NOT.wantq ) THEN
288 info = -1
289 ELSE IF( .NOT.lsame( compz, 'N' ) .AND. .NOT.wantz ) THEN
290 info = -2
291 ELSE IF( n.LT.0 ) THEN
292 info = -3
293 ELSE IF( ilo.LT.1 ) THEN
294 info = -4
295 ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN
296 info = -5
297 ELSE IF( lda.LT.max( 1, n ) ) THEN
298 info = -7
299 ELSE IF( ldb.LT.max( 1, n ) ) THEN
300 info = -9
301 ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN
302 info = -11
303 ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN
304 info = -13
305 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
306 info = -15
307 END IF
308 IF( info.NE.0 ) THEN
309 CALL xerbla( 'SGGHD3', -info )
310 RETURN
311 ELSE IF( lquery ) THEN
312 RETURN
313 END IF
314*
315* Initialize Q and Z if desired.
316*
317 IF( initq )
318 $ CALL slaset( 'All', n, n, zero, one, q, ldq )
319 IF( initz )
320 $ CALL slaset( 'All', n, n, zero, one, z, ldz )
321*
322* Zero out lower triangle of B.
323*
324 IF( n.GT.1 )
325 $ CALL slaset( 'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )
326*
327* Quick return if possible
328*
329 nh = ihi - ilo + 1
330 IF( nh.LE.1 ) THEN
331 work( 1 ) = one
332 RETURN
333 END IF
334*
335* Determine the blocksize.
336*
337 nbmin = ilaenv( 2, 'SGGHD3', ' ', n, ilo, ihi, -1 )
338 IF( nb.GT.1 .AND. nb.LT.nh ) THEN
339*
340* Determine when to use unblocked instead of blocked code.
341*
342 nx = max( nb, ilaenv( 3, 'SGGHD3', ' ', n, ilo, ihi, -1 ) )
343 IF( nx.LT.nh ) THEN
344*
345* Determine if workspace is large enough for blocked code.
346*
347 IF( lwork.LT.lwkopt ) THEN
348*
349* Not enough workspace to use optimal NB: determine the
350* minimum value of NB, and reduce NB or force use of
351* unblocked code.
352*
353 nbmin = max( 2, ilaenv( 2, 'SGGHD3', ' ', n, ilo, ihi,
354 $ -1 ) )
355 IF( lwork.GE.6*n*nbmin ) THEN
356 nb = lwork / ( 6*n )
357 ELSE
358 nb = 1
359 END IF
360 END IF
361 END IF
362 END IF
363*
364 IF( nb.LT.nbmin .OR. nb.GE.nh ) THEN
365*
366* Use unblocked code below
367*
368 jcol = ilo
369*
370 ELSE
371*
372* Use blocked code
373*
374 kacc22 = ilaenv( 16, 'SGGHD3', ' ', n, ilo, ihi, -1 )
375 blk22 = kacc22.EQ.2
376 DO jcol = ilo, ihi-2, nb
377 nnb = min( nb, ihi-jcol-1 )
378*
379* Initialize small orthogonal factors that will hold the
380* accumulated Givens rotations in workspace.
381* N2NB denotes the number of 2*NNB-by-2*NNB factors
382* NBLST denotes the (possibly smaller) order of the last
383* factor.
384*
385 n2nb = ( ihi-jcol-1 ) / nnb - 1
386 nblst = ihi - jcol - n2nb*nnb
387 CALL slaset( 'All', nblst, nblst, zero, one, work, nblst )
388 pw = nblst * nblst + 1
389 DO i = 1, n2nb
390 CALL slaset( 'All', 2*nnb, 2*nnb, zero, one,
391 $ work( pw ), 2*nnb )
392 pw = pw + 4*nnb*nnb
393 END DO
394*
395* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form.
396*
397 DO j = jcol, jcol+nnb-1
398*
399* Reduce Jth column of A. Store cosines and sines in Jth
400* column of A and B, respectively.
401*
402 DO i = ihi, j+2, -1
403 temp = a( i-1, j )
404 CALL slartg( temp, a( i, j ), c, s, a( i-1, j ) )
405 a( i, j ) = c
406 b( i, j ) = s
407 END DO
408*
409* Accumulate Givens rotations into workspace array.
410*
411 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
412 len = 2 + j - jcol
413 jrow = j + n2nb*nnb + 2
414 DO i = ihi, jrow, -1
415 c = a( i, j )
416 s = b( i, j )
417 DO jj = ppw, ppw+len-1
418 temp = work( jj + nblst )
419 work( jj + nblst ) = c*temp - s*work( jj )
420 work( jj ) = s*temp + c*work( jj )
421 END DO
422 len = len + 1
423 ppw = ppw - nblst - 1
424 END DO
425*
426 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
427 j0 = jrow - nnb
428 DO jrow = j0, j+2, -nnb
429 ppw = ppwo
430 len = 2 + j - jcol
431 DO i = jrow+nnb-1, jrow, -1
432 c = a( i, j )
433 s = b( i, j )
434 DO jj = ppw, ppw+len-1
435 temp = work( jj + 2*nnb )
436 work( jj + 2*nnb ) = c*temp - s*work( jj )
437 work( jj ) = s*temp + c*work( jj )
438 END DO
439 len = len + 1
440 ppw = ppw - 2*nnb - 1
441 END DO
442 ppwo = ppwo + 4*nnb*nnb
443 END DO
444*
445* TOP denotes the number of top rows in A and B that will
446* not be updated during the next steps.
447*
448 IF( jcol.LE.2 ) THEN
449 top = 0
450 ELSE
451 top = jcol
452 END IF
453*
454* Propagate transformations through B and replace stored
455* left sines/cosines by right sines/cosines.
456*
457 DO jj = n, j+1, -1
458*
459* Update JJth column of B.
460*
461 DO i = min( jj+1, ihi ), j+2, -1
462 c = a( i, j )
463 s = b( i, j )
464 temp = b( i, jj )
465 b( i, jj ) = c*temp - s*b( i-1, jj )
466 b( i-1, jj ) = s*temp + c*b( i-1, jj )
467 END DO
468*
469* Annihilate B( JJ+1, JJ ).
470*
471 IF( jj.LT.ihi ) THEN
472 temp = b( jj+1, jj+1 )
473 CALL slartg( temp, b( jj+1, jj ), c, s,
474 $ b( jj+1, jj+1 ) )
475 b( jj+1, jj ) = zero
476 CALL srot( jj-top, b( top+1, jj+1 ), 1,
477 $ b( top+1, jj ), 1, c, s )
478 a( jj+1, j ) = c
479 b( jj+1, j ) = -s
480 END IF
481 END DO
482*
483* Update A by transformations from right.
484* Explicit loop unrolling provides better performance
485* compared to SLASR.
486* CALL SLASR( 'Right', 'Variable', 'Backward', IHI-TOP,
487* $ IHI-J, A( J+2, J ), B( J+2, J ),
488* $ A( TOP+1, J+1 ), LDA )
489*
490 jj = mod( ihi-j-1, 3 )
491 DO i = ihi-j-3, jj+1, -3
492 c = a( j+1+i, j )
493 s = -b( j+1+i, j )
494 c1 = a( j+2+i, j )
495 s1 = -b( j+2+i, j )
496 c2 = a( j+3+i, j )
497 s2 = -b( j+3+i, j )
498*
499 DO k = top+1, ihi
500 temp = a( k, j+i )
501 temp1 = a( k, j+i+1 )
502 temp2 = a( k, j+i+2 )
503 temp3 = a( k, j+i+3 )
504 a( k, j+i+3 ) = c2*temp3 + s2*temp2
505 temp2 = -s2*temp3 + c2*temp2
506 a( k, j+i+2 ) = c1*temp2 + s1*temp1
507 temp1 = -s1*temp2 + c1*temp1
508 a( k, j+i+1 ) = c*temp1 + s*temp
509 a( k, j+i ) = -s*temp1 + c*temp
510 END DO
511 END DO
512*
513 IF( jj.GT.0 ) THEN
514 DO i = jj, 1, -1
515 CALL srot( ihi-top, a( top+1, j+i+1 ), 1,
516 $ a( top+1, j+i ), 1, a( j+1+i, j ),
517 $ -b( j+1+i, j ) )
518 END DO
519 END IF
520*
521* Update (J+1)th column of A by transformations from left.
522*
523 IF ( j .LT. jcol + nnb - 1 ) THEN
524 len = 1 + j - jcol
525*
526* Multiply with the trailing accumulated orthogonal
527* matrix, which takes the form
528*
529* [ U11 U12 ]
530* U = [ ],
531* [ U21 U22 ]
532*
533* where U21 is a LEN-by-LEN matrix and U12 is lower
534* triangular.
535*
536 jrow = ihi - nblst + 1
537 CALL sgemv( 'Transpose', nblst, len, one, work,
538 $ nblst, a( jrow, j+1 ), 1, zero,
539 $ work( pw ), 1 )
540 ppw = pw + len
541 DO i = jrow, jrow+nblst-len-1
542 work( ppw ) = a( i, j+1 )
543 ppw = ppw + 1
544 END DO
545 CALL strmv( 'Lower', 'Transpose', 'Non-unit',
546 $ nblst-len, work( len*nblst + 1 ), nblst,
547 $ work( pw+len ), 1 )
548 CALL sgemv( 'Transpose', len, nblst-len, one,
549 $ work( (len+1)*nblst - len + 1 ), nblst,
550 $ a( jrow+nblst-len, j+1 ), 1, one,
551 $ work( pw+len ), 1 )
552 ppw = pw
553 DO i = jrow, jrow+nblst-1
554 a( i, j+1 ) = work( ppw )
555 ppw = ppw + 1
556 END DO
557*
558* Multiply with the other accumulated orthogonal
559* matrices, which take the form
560*
561* [ U11 U12 0 ]
562* [ ]
563* U = [ U21 U22 0 ],
564* [ ]
565* [ 0 0 I ]
566*
567* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity
568* matrix, U21 is a LEN-by-LEN upper triangular matrix
569* and U12 is an NNB-by-NNB lower triangular matrix.
570*
571 ppwo = 1 + nblst*nblst
572 j0 = jrow - nnb
573 DO jrow = j0, jcol+1, -nnb
574 ppw = pw + len
575 DO i = jrow, jrow+nnb-1
576 work( ppw ) = a( i, j+1 )
577 ppw = ppw + 1
578 END DO
579 ppw = pw
580 DO i = jrow+nnb, jrow+nnb+len-1
581 work( ppw ) = a( i, j+1 )
582 ppw = ppw + 1
583 END DO
584 CALL strmv( 'Upper', 'Transpose', 'Non-unit', len,
585 $ work( ppwo + nnb ), 2*nnb, work( pw ),
586 $ 1 )
587 CALL strmv( 'Lower', 'Transpose', 'Non-unit', nnb,
588 $ work( ppwo + 2*len*nnb ),
589 $ 2*nnb, work( pw + len ), 1 )
590 CALL sgemv( 'Transpose', nnb, len, one,
591 $ work( ppwo ), 2*nnb, a( jrow, j+1 ), 1,
592 $ one, work( pw ), 1 )
593 CALL sgemv( 'Transpose', len, nnb, one,
594 $ work( ppwo + 2*len*nnb + nnb ), 2*nnb,
595 $ a( jrow+nnb, j+1 ), 1, one,
596 $ work( pw+len ), 1 )
597 ppw = pw
598 DO i = jrow, jrow+len+nnb-1
599 a( i, j+1 ) = work( ppw )
600 ppw = ppw + 1
601 END DO
602 ppwo = ppwo + 4*nnb*nnb
603 END DO
604 END IF
605 END DO
606*
607* Apply accumulated orthogonal matrices to A.
608*
609 cola = n - jcol - nnb + 1
610 j = ihi - nblst + 1
611 CALL sgemm( 'Transpose', 'No Transpose', nblst,
612 $ cola, nblst, one, work, nblst,
613 $ a( j, jcol+nnb ), lda, zero, work( pw ),
614 $ nblst )
615 CALL slacpy( 'All', nblst, cola, work( pw ), nblst,
616 $ a( j, jcol+nnb ), lda )
617 ppwo = nblst*nblst + 1
618 j0 = j - nnb
619 DO j = j0, jcol+1, -nnb
620 IF ( blk22 ) THEN
621*
622* Exploit the structure of
623*
624* [ U11 U12 ]
625* U = [ ]
626* [ U21 U22 ],
627*
628* where all blocks are NNB-by-NNB, U21 is upper
629* triangular and U12 is lower triangular.
630*
631 CALL sorm22( 'Left', 'Transpose', 2*nnb, cola, nnb,
632 $ nnb, work( ppwo ), 2*nnb,
633 $ a( j, jcol+nnb ), lda, work( pw ),
634 $ lwork-pw+1, ierr )
635 ELSE
636*
637* Ignore the structure of U.
638*
639 CALL sgemm( 'Transpose', 'No Transpose', 2*nnb,
640 $ cola, 2*nnb, one, work( ppwo ), 2*nnb,
641 $ a( j, jcol+nnb ), lda, zero, work( pw ),
642 $ 2*nnb )
643 CALL slacpy( 'All', 2*nnb, cola, work( pw ), 2*nnb,
644 $ a( j, jcol+nnb ), lda )
645 END IF
646 ppwo = ppwo + 4*nnb*nnb
647 END DO
648*
649* Apply accumulated orthogonal matrices to Q.
650*
651 IF( wantq ) THEN
652 j = ihi - nblst + 1
653 IF ( initq ) THEN
654 topq = max( 2, j - jcol + 1 )
655 nh = ihi - topq + 1
656 ELSE
657 topq = 1
658 nh = n
659 END IF
660 CALL sgemm( 'No Transpose', 'No Transpose', nh,
661 $ nblst, nblst, one, q( topq, j ), ldq,
662 $ work, nblst, zero, work( pw ), nh )
663 CALL slacpy( 'All', nh, nblst, work( pw ), nh,
664 $ q( topq, j ), ldq )
665 ppwo = nblst*nblst + 1
666 j0 = j - nnb
667 DO j = j0, jcol+1, -nnb
668 IF ( initq ) THEN
669 topq = max( 2, j - jcol + 1 )
670 nh = ihi - topq + 1
671 END IF
672 IF ( blk22 ) THEN
673*
674* Exploit the structure of U.
675*
676 CALL sorm22( 'Right', 'No Transpose', nh, 2*nnb,
677 $ nnb, nnb, work( ppwo ), 2*nnb,
678 $ q( topq, j ), ldq, work( pw ),
679 $ lwork-pw+1, ierr )
680 ELSE
681*
682* Ignore the structure of U.
683*
684 CALL sgemm( 'No Transpose', 'No Transpose', nh,
685 $ 2*nnb, 2*nnb, one, q( topq, j ), ldq,
686 $ work( ppwo ), 2*nnb, zero, work( pw ),
687 $ nh )
688 CALL slacpy( 'All', nh, 2*nnb, work( pw ), nh,
689 $ q( topq, j ), ldq )
690 END IF
691 ppwo = ppwo + 4*nnb*nnb
692 END DO
693 END IF
694*
695* Accumulate right Givens rotations if required.
696*
697 IF ( wantz .OR. top.GT.0 ) THEN
698*
699* Initialize small orthogonal factors that will hold the
700* accumulated Givens rotations in workspace.
701*
702 CALL slaset( 'All', nblst, nblst, zero, one, work,
703 $ nblst )
704 pw = nblst * nblst + 1
705 DO i = 1, n2nb
706 CALL slaset( 'All', 2*nnb, 2*nnb, zero, one,
707 $ work( pw ), 2*nnb )
708 pw = pw + 4*nnb*nnb
709 END DO
710*
711* Accumulate Givens rotations into workspace array.
712*
713 DO j = jcol, jcol+nnb-1
714 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
715 len = 2 + j - jcol
716 jrow = j + n2nb*nnb + 2
717 DO i = ihi, jrow, -1
718 c = a( i, j )
719 a( i, j ) = zero
720 s = b( i, j )
721 b( i, j ) = zero
722 DO jj = ppw, ppw+len-1
723 temp = work( jj + nblst )
724 work( jj + nblst ) = c*temp - s*work( jj )
725 work( jj ) = s*temp + c*work( jj )
726 END DO
727 len = len + 1
728 ppw = ppw - nblst - 1
729 END DO
730*
731 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
732 j0 = jrow - nnb
733 DO jrow = j0, j+2, -nnb
734 ppw = ppwo
735 len = 2 + j - jcol
736 DO i = jrow+nnb-1, jrow, -1
737 c = a( i, j )
738 a( i, j ) = zero
739 s = b( i, j )
740 b( i, j ) = zero
741 DO jj = ppw, ppw+len-1
742 temp = work( jj + 2*nnb )
743 work( jj + 2*nnb ) = c*temp - s*work( jj )
744 work( jj ) = s*temp + c*work( jj )
745 END DO
746 len = len + 1
747 ppw = ppw - 2*nnb - 1
748 END DO
749 ppwo = ppwo + 4*nnb*nnb
750 END DO
751 END DO
752 ELSE
753*
754 CALL slaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
755 $ a( jcol + 2, jcol ), lda )
756 CALL slaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
757 $ b( jcol + 2, jcol ), ldb )
758 END IF
759*
760* Apply accumulated orthogonal matrices to A and B.
761*
762 IF ( top.GT.0 ) THEN
763 j = ihi - nblst + 1
764 CALL sgemm( 'No Transpose', 'No Transpose', top,
765 $ nblst, nblst, one, a( 1, j ), lda,
766 $ work, nblst, zero, work( pw ), top )
767 CALL slacpy( 'All', top, nblst, work( pw ), top,
768 $ a( 1, j ), lda )
769 ppwo = nblst*nblst + 1
770 j0 = j - nnb
771 DO j = j0, jcol+1, -nnb
772 IF ( blk22 ) THEN
773*
774* Exploit the structure of U.
775*
776 CALL sorm22( 'Right', 'No Transpose', top, 2*nnb,
777 $ nnb, nnb, work( ppwo ), 2*nnb,
778 $ a( 1, j ), lda, work( pw ),
779 $ lwork-pw+1, ierr )
780 ELSE
781*
782* Ignore the structure of U.
783*
784 CALL sgemm( 'No Transpose', 'No Transpose', top,
785 $ 2*nnb, 2*nnb, one, a( 1, j ), lda,
786 $ work( ppwo ), 2*nnb, zero,
787 $ work( pw ), top )
788 CALL slacpy( 'All', top, 2*nnb, work( pw ), top,
789 $ a( 1, j ), lda )
790 END IF
791 ppwo = ppwo + 4*nnb*nnb
792 END DO
793*
794 j = ihi - nblst + 1
795 CALL sgemm( 'No Transpose', 'No Transpose', top,
796 $ nblst, nblst, one, b( 1, j ), ldb,
797 $ work, nblst, zero, work( pw ), top )
798 CALL slacpy( 'All', top, nblst, work( pw ), top,
799 $ b( 1, j ), ldb )
800 ppwo = nblst*nblst + 1
801 j0 = j - nnb
802 DO j = j0, jcol+1, -nnb
803 IF ( blk22 ) THEN
804*
805* Exploit the structure of U.
806*
807 CALL sorm22( 'Right', 'No Transpose', top, 2*nnb,
808 $ nnb, nnb, work( ppwo ), 2*nnb,
809 $ b( 1, j ), ldb, work( pw ),
810 $ lwork-pw+1, ierr )
811 ELSE
812*
813* Ignore the structure of U.
814*
815 CALL sgemm( 'No Transpose', 'No Transpose', top,
816 $ 2*nnb, 2*nnb, one, b( 1, j ), ldb,
817 $ work( ppwo ), 2*nnb, zero,
818 $ work( pw ), top )
819 CALL slacpy( 'All', top, 2*nnb, work( pw ), top,
820 $ b( 1, j ), ldb )
821 END IF
822 ppwo = ppwo + 4*nnb*nnb
823 END DO
824 END IF
825*
826* Apply accumulated orthogonal matrices to Z.
827*
828 IF( wantz ) THEN
829 j = ihi - nblst + 1
830 IF ( initq ) THEN
831 topq = max( 2, j - jcol + 1 )
832 nh = ihi - topq + 1
833 ELSE
834 topq = 1
835 nh = n
836 END IF
837 CALL sgemm( 'No Transpose', 'No Transpose', nh,
838 $ nblst, nblst, one, z( topq, j ), ldz,
839 $ work, nblst, zero, work( pw ), nh )
840 CALL slacpy( 'All', nh, nblst, work( pw ), nh,
841 $ z( topq, j ), ldz )
842 ppwo = nblst*nblst + 1
843 j0 = j - nnb
844 DO j = j0, jcol+1, -nnb
845 IF ( initq ) THEN
846 topq = max( 2, j - jcol + 1 )
847 nh = ihi - topq + 1
848 END IF
849 IF ( blk22 ) THEN
850*
851* Exploit the structure of U.
852*
853 CALL sorm22( 'Right', 'No Transpose', nh, 2*nnb,
854 $ nnb, nnb, work( ppwo ), 2*nnb,
855 $ z( topq, j ), ldz, work( pw ),
856 $ lwork-pw+1, ierr )
857 ELSE
858*
859* Ignore the structure of U.
860*
861 CALL sgemm( 'No Transpose', 'No Transpose', nh,
862 $ 2*nnb, 2*nnb, one, z( topq, j ), ldz,
863 $ work( ppwo ), 2*nnb, zero, work( pw ),
864 $ nh )
865 CALL slacpy( 'All', nh, 2*nnb, work( pw ), nh,
866 $ z( topq, j ), ldz )
867 END IF
868 ppwo = ppwo + 4*nnb*nnb
869 END DO
870 END IF
871 END DO
872 END IF
873*
874* Use unblocked code to reduce the rest of the matrix
875* Avoid re-initialization of modified Q and Z.
876*
877 compq2 = compq
878 compz2 = compz
879 IF ( jcol.NE.ilo ) THEN
880 IF ( wantq )
881 $ compq2 = 'V'
882 IF ( wantz )
883 $ compz2 = 'V'
884 END IF
885*
886 IF ( jcol.LT.ihi )
887 $ CALL sgghrd( compq2, compz2, n, jcol, ihi, a, lda, b, ldb, q,
888 $ ldq, z, ldz, ierr )
889 work( 1 ) = sroundup_lwork( lwkopt )
890*
891 RETURN
892*
893* End of SGGHD3
894*
895 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:188
subroutine sgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
SGEMV
Definition sgemv.f:158
subroutine sgghd3(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, work, lwork, info)
SGGHD3
Definition sgghd3.f:230
subroutine sgghrd(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, info)
SGGHRD
Definition sgghrd.f:207
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
Definition slacpy.f:103
subroutine slartg(f, g, c, s, r)
SLARTG generates a plane rotation with real cosine and real sine.
Definition slartg.f90:111
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:110
subroutine srot(n, sx, incx, sy, incy, c, s)
SROT
Definition srot.f:92
subroutine strmv(uplo, trans, diag, n, a, lda, x, incx)
STRMV
Definition strmv.f:147
subroutine sorm22(side, trans, m, n, n1, n2, q, ldq, c, ldc, work, lwork, info)
SORM22 multiplies a general matrix by a banded orthogonal matrix.
Definition sorm22.f:163