LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
dgghd3.f
Go to the documentation of this file.
1*> \brief \b DGGHD3
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DGGHD3 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgghd3.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgghd3.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgghd3.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE DGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
22* LDQ, Z, LDZ, WORK, LWORK, INFO )
23*
24* .. Scalar Arguments ..
25* CHARACTER COMPQ, COMPZ
26* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
27* ..
28* .. Array Arguments ..
29* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
30* $ Z( LDZ, * ), WORK( * )
31* ..
32*
33*
34*> \par Purpose:
35* =============
36*>
37*> \verbatim
38*>
39*> DGGHD3 reduces a pair of real matrices (A,B) to generalized upper
40*> Hessenberg form using orthogonal transformations, where A is a
41*> general matrix and B is upper triangular. The form of the
42*> generalized eigenvalue problem is
43*> A*x = lambda*B*x,
44*> and B is typically made upper triangular by computing its QR
45*> factorization and moving the orthogonal matrix Q to the left side
46*> of the equation.
47*>
48*> This subroutine simultaneously reduces A to a Hessenberg matrix H:
49*> Q**T*A*Z = H
50*> and transforms B to another upper triangular matrix T:
51*> Q**T*B*Z = T
52*> in order to reduce the problem to its standard form
53*> H*y = lambda*T*y
54*> where y = Z**T*x.
55*>
56*> The orthogonal matrices Q and Z are determined as products of Givens
57*> rotations. They may either be formed explicitly, or they may be
58*> postmultiplied into input matrices Q1 and Z1, so that
59*>
60*> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T
61*>
62*> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T
63*>
64*> If Q1 is the orthogonal matrix from the QR factorization of B in the
65*> original equation A*x = lambda*B*x, then DGGHD3 reduces the original
66*> problem to generalized Hessenberg form.
67*>
68*> This is a blocked variant of DGGHRD, using matrix-matrix
69*> multiplications for parts of the computation to enhance performance.
70*> \endverbatim
71*
72* Arguments:
73* ==========
74*
75*> \param[in] COMPQ
76*> \verbatim
77*> COMPQ is CHARACTER*1
78*> = 'N': do not compute Q;
79*> = 'I': Q is initialized to the unit matrix, and the
80*> orthogonal matrix Q is returned;
81*> = 'V': Q must contain an orthogonal matrix Q1 on entry,
82*> and the product Q1*Q is returned.
83*> \endverbatim
84*>
85*> \param[in] COMPZ
86*> \verbatim
87*> COMPZ is CHARACTER*1
88*> = 'N': do not compute Z;
89*> = 'I': Z is initialized to the unit matrix, and the
90*> orthogonal matrix Z is returned;
91*> = 'V': Z must contain an orthogonal matrix Z1 on entry,
92*> and the product Z1*Z is returned.
93*> \endverbatim
94*>
95*> \param[in] N
96*> \verbatim
97*> N is INTEGER
98*> The order of the matrices A and B. N >= 0.
99*> \endverbatim
100*>
101*> \param[in] ILO
102*> \verbatim
103*> ILO is INTEGER
104*> \endverbatim
105*>
106*> \param[in] IHI
107*> \verbatim
108*> IHI is INTEGER
109*>
110*> ILO and IHI mark the rows and columns of A which are to be
111*> reduced. It is assumed that A is already upper triangular
112*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are
113*> normally set by a previous call to DGGBAL; otherwise they
114*> should be set to 1 and N respectively.
115*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
116*> \endverbatim
117*>
118*> \param[in,out] A
119*> \verbatim
120*> A is DOUBLE PRECISION array, dimension (LDA, N)
121*> On entry, the N-by-N general matrix to be reduced.
122*> On exit, the upper triangle and the first subdiagonal of A
123*> are overwritten with the upper Hessenberg matrix H, and the
124*> rest is set to zero.
125*> \endverbatim
126*>
127*> \param[in] LDA
128*> \verbatim
129*> LDA is INTEGER
130*> The leading dimension of the array A. LDA >= max(1,N).
131*> \endverbatim
132*>
133*> \param[in,out] B
134*> \verbatim
135*> B is DOUBLE PRECISION array, dimension (LDB, N)
136*> On entry, the N-by-N upper triangular matrix B.
137*> On exit, the upper triangular matrix T = Q**T B Z. The
138*> elements below the diagonal are set to zero.
139*> \endverbatim
140*>
141*> \param[in] LDB
142*> \verbatim
143*> LDB is INTEGER
144*> The leading dimension of the array B. LDB >= max(1,N).
145*> \endverbatim
146*>
147*> \param[in,out] Q
148*> \verbatim
149*> Q is DOUBLE PRECISION array, dimension (LDQ, N)
150*> On entry, if COMPQ = 'V', the orthogonal matrix Q1,
151*> typically from the QR factorization of B.
152*> On exit, if COMPQ='I', the orthogonal matrix Q, and if
153*> COMPQ = 'V', the product Q1*Q.
154*> Not referenced if COMPQ='N'.
155*> \endverbatim
156*>
157*> \param[in] LDQ
158*> \verbatim
159*> LDQ is INTEGER
160*> The leading dimension of the array Q.
161*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
162*> \endverbatim
163*>
164*> \param[in,out] Z
165*> \verbatim
166*> Z is DOUBLE PRECISION array, dimension (LDZ, N)
167*> On entry, if COMPZ = 'V', the orthogonal matrix Z1.
168*> On exit, if COMPZ='I', the orthogonal matrix Z, and if
169*> COMPZ = 'V', the product Z1*Z.
170*> Not referenced if COMPZ='N'.
171*> \endverbatim
172*>
173*> \param[in] LDZ
174*> \verbatim
175*> LDZ is INTEGER
176*> The leading dimension of the array Z.
177*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
178*> \endverbatim
179*>
180*> \param[out] WORK
181*> \verbatim
182*> WORK is DOUBLE PRECISION array, dimension (LWORK)
183*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
184*> \endverbatim
185*>
186*> \param[in] LWORK
187*> \verbatim
188*> LWORK is INTEGER
189*> The length of the array WORK. LWORK >= 1.
190*> For optimum performance LWORK >= 6*N*NB, where NB is the
191*> optimal blocksize.
192*>
193*> If LWORK = -1, then a workspace query is assumed; the routine
194*> only calculates the optimal size of the WORK array, returns
195*> this value as the first entry of the WORK array, and no error
196*> message related to LWORK is issued by XERBLA.
197*> \endverbatim
198*>
199*> \param[out] INFO
200*> \verbatim
201*> INFO is INTEGER
202*> = 0: successful exit.
203*> < 0: if INFO = -i, the i-th argument had an illegal value.
204*> \endverbatim
205*
206* Authors:
207* ========
208*
209*> \author Univ. of Tennessee
210*> \author Univ. of California Berkeley
211*> \author Univ. of Colorado Denver
212*> \author NAG Ltd.
213*
214*> \ingroup gghd3
215*
216*> \par Further Details:
217* =====================
218*>
219*> \verbatim
220*>
221*> This routine reduces A to Hessenberg form and maintains B in triangular form
222*> using a blocked variant of Moler and Stewart's original algorithm,
223*> as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti
224*> (BIT 2008).
225*> \endverbatim
226*>
227* =====================================================================
228 SUBROUTINE dgghd3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
229 $ LDQ, Z, LDZ, WORK, LWORK, INFO )
230*
231* -- LAPACK computational routine --
232* -- LAPACK is a software package provided by Univ. of Tennessee, --
233* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
234*
235 IMPLICIT NONE
236*
237* .. Scalar Arguments ..
238 CHARACTER COMPQ, COMPZ
239 INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
240* ..
241* .. Array Arguments ..
242 DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
243 $ z( ldz, * ), work( * )
244* ..
245*
246* =====================================================================
247*
248* .. Parameters ..
249 DOUBLE PRECISION ZERO, ONE
250 parameter( zero = 0.0d+0, one = 1.0d+0 )
251* ..
252* .. Local Scalars ..
253 LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
254 CHARACTER*1 COMPQ2, COMPZ2
255 INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
256 $ kacc22, len, lwkopt, n2nb, nb, nblst, nbmin,
257 $ nh, nnb, nx, ppw, ppwo, pw, top, topq
258 DOUBLE PRECISION C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
259* ..
260* .. External Functions ..
261 LOGICAL LSAME
262 INTEGER ILAENV
263 EXTERNAL ilaenv, lsame
264* ..
265* .. External Subroutines ..
266 EXTERNAL dgghrd, dlartg, dlaset, dorm22, drot, dgemm,
268* ..
269* .. Intrinsic Functions ..
270 INTRINSIC dble, max
271* ..
272* .. Executable Statements ..
273*
274* Decode and test the input parameters.
275*
276 info = 0
277 nb = ilaenv( 1, 'DGGHD3', ' ', n, ilo, ihi, -1 )
278 lwkopt = max( 6*n*nb, 1 )
279 work( 1 ) = dble( lwkopt )
280 initq = lsame( compq, 'I' )
281 wantq = initq .OR. lsame( compq, 'V' )
282 initz = lsame( compz, 'I' )
283 wantz = initz .OR. lsame( compz, 'V' )
284 lquery = ( lwork.EQ.-1 )
285*
286 IF( .NOT.lsame( compq, 'N' ) .AND. .NOT.wantq ) THEN
287 info = -1
288 ELSE IF( .NOT.lsame( compz, 'N' ) .AND. .NOT.wantz ) THEN
289 info = -2
290 ELSE IF( n.LT.0 ) THEN
291 info = -3
292 ELSE IF( ilo.LT.1 ) THEN
293 info = -4
294 ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN
295 info = -5
296 ELSE IF( lda.LT.max( 1, n ) ) THEN
297 info = -7
298 ELSE IF( ldb.LT.max( 1, n ) ) THEN
299 info = -9
300 ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN
301 info = -11
302 ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN
303 info = -13
304 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
305 info = -15
306 END IF
307 IF( info.NE.0 ) THEN
308 CALL xerbla( 'DGGHD3', -info )
309 RETURN
310 ELSE IF( lquery ) THEN
311 RETURN
312 END IF
313*
314* Initialize Q and Z if desired.
315*
316 IF( initq )
317 $ CALL dlaset( 'All', n, n, zero, one, q, ldq )
318 IF( initz )
319 $ CALL dlaset( 'All', n, n, zero, one, z, ldz )
320*
321* Zero out lower triangle of B.
322*
323 IF( n.GT.1 )
324 $ CALL dlaset( 'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )
325*
326* Quick return if possible
327*
328 nh = ihi - ilo + 1
329 IF( nh.LE.1 ) THEN
330 work( 1 ) = one
331 RETURN
332 END IF
333*
334* Determine the blocksize.
335*
336 nbmin = ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi, -1 )
337 IF( nb.GT.1 .AND. nb.LT.nh ) THEN
338*
339* Determine when to use unblocked instead of blocked code.
340*
341 nx = max( nb, ilaenv( 3, 'DGGHD3', ' ', n, ilo, ihi, -1 ) )
342 IF( nx.LT.nh ) THEN
343*
344* Determine if workspace is large enough for blocked code.
345*
346 IF( lwork.LT.lwkopt ) THEN
347*
348* Not enough workspace to use optimal NB: determine the
349* minimum value of NB, and reduce NB or force use of
350* unblocked code.
351*
352 nbmin = max( 2, ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi,
353 $ -1 ) )
354 IF( lwork.GE.6*n*nbmin ) THEN
355 nb = lwork / ( 6*n )
356 ELSE
357 nb = 1
358 END IF
359 END IF
360 END IF
361 END IF
362*
363 IF( nb.LT.nbmin .OR. nb.GE.nh ) THEN
364*
365* Use unblocked code below
366*
367 jcol = ilo
368*
369 ELSE
370*
371* Use blocked code
372*
373 kacc22 = ilaenv( 16, 'DGGHD3', ' ', n, ilo, ihi, -1 )
374 blk22 = kacc22.EQ.2
375 DO jcol = ilo, ihi-2, nb
376 nnb = min( nb, ihi-jcol-1 )
377*
378* Initialize small orthogonal factors that will hold the
379* accumulated Givens rotations in workspace.
380* N2NB denotes the number of 2*NNB-by-2*NNB factors
381* NBLST denotes the (possibly smaller) order of the last
382* factor.
383*
384 n2nb = ( ihi-jcol-1 ) / nnb - 1
385 nblst = ihi - jcol - n2nb*nnb
386 CALL dlaset( 'All', nblst, nblst, zero, one, work, nblst )
387 pw = nblst * nblst + 1
388 DO i = 1, n2nb
389 CALL dlaset( 'All', 2*nnb, 2*nnb, zero, one,
390 $ work( pw ), 2*nnb )
391 pw = pw + 4*nnb*nnb
392 END DO
393*
394* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form.
395*
396 DO j = jcol, jcol+nnb-1
397*
398* Reduce Jth column of A. Store cosines and sines in Jth
399* column of A and B, respectively.
400*
401 DO i = ihi, j+2, -1
402 temp = a( i-1, j )
403 CALL dlartg( temp, a( i, j ), c, s, a( i-1, j ) )
404 a( i, j ) = c
405 b( i, j ) = s
406 END DO
407*
408* Accumulate Givens rotations into workspace array.
409*
410 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
411 len = 2 + j - jcol
412 jrow = j + n2nb*nnb + 2
413 DO i = ihi, jrow, -1
414 c = a( i, j )
415 s = b( i, j )
416 DO jj = ppw, ppw+len-1
417 temp = work( jj + nblst )
418 work( jj + nblst ) = c*temp - s*work( jj )
419 work( jj ) = s*temp + c*work( jj )
420 END DO
421 len = len + 1
422 ppw = ppw - nblst - 1
423 END DO
424*
425 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
426 j0 = jrow - nnb
427 DO jrow = j0, j+2, -nnb
428 ppw = ppwo
429 len = 2 + j - jcol
430 DO i = jrow+nnb-1, jrow, -1
431 c = a( i, j )
432 s = b( i, j )
433 DO jj = ppw, ppw+len-1
434 temp = work( jj + 2*nnb )
435 work( jj + 2*nnb ) = c*temp - s*work( jj )
436 work( jj ) = s*temp + c*work( jj )
437 END DO
438 len = len + 1
439 ppw = ppw - 2*nnb - 1
440 END DO
441 ppwo = ppwo + 4*nnb*nnb
442 END DO
443*
444* TOP denotes the number of top rows in A and B that will
445* not be updated during the next steps.
446*
447 IF( jcol.LE.2 ) THEN
448 top = 0
449 ELSE
450 top = jcol
451 END IF
452*
453* Propagate transformations through B and replace stored
454* left sines/cosines by right sines/cosines.
455*
456 DO jj = n, j+1, -1
457*
458* Update JJth column of B.
459*
460 DO i = min( jj+1, ihi ), j+2, -1
461 c = a( i, j )
462 s = b( i, j )
463 temp = b( i, jj )
464 b( i, jj ) = c*temp - s*b( i-1, jj )
465 b( i-1, jj ) = s*temp + c*b( i-1, jj )
466 END DO
467*
468* Annihilate B( JJ+1, JJ ).
469*
470 IF( jj.LT.ihi ) THEN
471 temp = b( jj+1, jj+1 )
472 CALL dlartg( temp, b( jj+1, jj ), c, s,
473 $ b( jj+1, jj+1 ) )
474 b( jj+1, jj ) = zero
475 CALL drot( jj-top, b( top+1, jj+1 ), 1,
476 $ b( top+1, jj ), 1, c, s )
477 a( jj+1, j ) = c
478 b( jj+1, j ) = -s
479 END IF
480 END DO
481*
482* Update A by transformations from right.
483* Explicit loop unrolling provides better performance
484* compared to DLASR.
485* CALL DLASR( 'Right', 'Variable', 'Backward', IHI-TOP,
486* $ IHI-J, A( J+2, J ), B( J+2, J ),
487* $ A( TOP+1, J+1 ), LDA )
488*
489 jj = mod( ihi-j-1, 3 )
490 DO i = ihi-j-3, jj+1, -3
491 c = a( j+1+i, j )
492 s = -b( j+1+i, j )
493 c1 = a( j+2+i, j )
494 s1 = -b( j+2+i, j )
495 c2 = a( j+3+i, j )
496 s2 = -b( j+3+i, j )
497*
498 DO k = top+1, ihi
499 temp = a( k, j+i )
500 temp1 = a( k, j+i+1 )
501 temp2 = a( k, j+i+2 )
502 temp3 = a( k, j+i+3 )
503 a( k, j+i+3 ) = c2*temp3 + s2*temp2
504 temp2 = -s2*temp3 + c2*temp2
505 a( k, j+i+2 ) = c1*temp2 + s1*temp1
506 temp1 = -s1*temp2 + c1*temp1
507 a( k, j+i+1 ) = c*temp1 + s*temp
508 a( k, j+i ) = -s*temp1 + c*temp
509 END DO
510 END DO
511*
512 IF( jj.GT.0 ) THEN
513 DO i = jj, 1, -1
514 CALL drot( ihi-top, a( top+1, j+i+1 ), 1,
515 $ a( top+1, j+i ), 1, a( j+1+i, j ),
516 $ -b( j+1+i, j ) )
517 END DO
518 END IF
519*
520* Update (J+1)th column of A by transformations from left.
521*
522 IF ( j .LT. jcol + nnb - 1 ) THEN
523 len = 1 + j - jcol
524*
525* Multiply with the trailing accumulated orthogonal
526* matrix, which takes the form
527*
528* [ U11 U12 ]
529* U = [ ],
530* [ U21 U22 ]
531*
532* where U21 is a LEN-by-LEN matrix and U12 is lower
533* triangular.
534*
535 jrow = ihi - nblst + 1
536 CALL dgemv( 'Transpose', nblst, len, one, work,
537 $ nblst, a( jrow, j+1 ), 1, zero,
538 $ work( pw ), 1 )
539 ppw = pw + len
540 DO i = jrow, jrow+nblst-len-1
541 work( ppw ) = a( i, j+1 )
542 ppw = ppw + 1
543 END DO
544 CALL dtrmv( 'Lower', 'Transpose', 'Non-unit',
545 $ nblst-len, work( len*nblst + 1 ), nblst,
546 $ work( pw+len ), 1 )
547 CALL dgemv( 'Transpose', len, nblst-len, one,
548 $ work( (len+1)*nblst - len + 1 ), nblst,
549 $ a( jrow+nblst-len, j+1 ), 1, one,
550 $ work( pw+len ), 1 )
551 ppw = pw
552 DO i = jrow, jrow+nblst-1
553 a( i, j+1 ) = work( ppw )
554 ppw = ppw + 1
555 END DO
556*
557* Multiply with the other accumulated orthogonal
558* matrices, which take the form
559*
560* [ U11 U12 0 ]
561* [ ]
562* U = [ U21 U22 0 ],
563* [ ]
564* [ 0 0 I ]
565*
566* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity
567* matrix, U21 is a LEN-by-LEN upper triangular matrix
568* and U12 is an NNB-by-NNB lower triangular matrix.
569*
570 ppwo = 1 + nblst*nblst
571 j0 = jrow - nnb
572 DO jrow = j0, jcol+1, -nnb
573 ppw = pw + len
574 DO i = jrow, jrow+nnb-1
575 work( ppw ) = a( i, j+1 )
576 ppw = ppw + 1
577 END DO
578 ppw = pw
579 DO i = jrow+nnb, jrow+nnb+len-1
580 work( ppw ) = a( i, j+1 )
581 ppw = ppw + 1
582 END DO
583 CALL dtrmv( 'Upper', 'Transpose', 'Non-unit', len,
584 $ work( ppwo + nnb ), 2*nnb, work( pw ),
585 $ 1 )
586 CALL dtrmv( 'Lower', 'Transpose', 'Non-unit', nnb,
587 $ work( ppwo + 2*len*nnb ),
588 $ 2*nnb, work( pw + len ), 1 )
589 CALL dgemv( 'Transpose', nnb, len, one,
590 $ work( ppwo ), 2*nnb, a( jrow, j+1 ), 1,
591 $ one, work( pw ), 1 )
592 CALL dgemv( 'Transpose', len, nnb, one,
593 $ work( ppwo + 2*len*nnb + nnb ), 2*nnb,
594 $ a( jrow+nnb, j+1 ), 1, one,
595 $ work( pw+len ), 1 )
596 ppw = pw
597 DO i = jrow, jrow+len+nnb-1
598 a( i, j+1 ) = work( ppw )
599 ppw = ppw + 1
600 END DO
601 ppwo = ppwo + 4*nnb*nnb
602 END DO
603 END IF
604 END DO
605*
606* Apply accumulated orthogonal matrices to A.
607*
608 cola = n - jcol - nnb + 1
609 j = ihi - nblst + 1
610 CALL dgemm( 'Transpose', 'No Transpose', nblst,
611 $ cola, nblst, one, work, nblst,
612 $ a( j, jcol+nnb ), lda, zero, work( pw ),
613 $ nblst )
614 CALL dlacpy( 'All', nblst, cola, work( pw ), nblst,
615 $ a( j, jcol+nnb ), lda )
616 ppwo = nblst*nblst + 1
617 j0 = j - nnb
618 DO j = j0, jcol+1, -nnb
619 IF ( blk22 ) THEN
620*
621* Exploit the structure of
622*
623* [ U11 U12 ]
624* U = [ ]
625* [ U21 U22 ],
626*
627* where all blocks are NNB-by-NNB, U21 is upper
628* triangular and U12 is lower triangular.
629*
630 CALL dorm22( 'Left', 'Transpose', 2*nnb, cola, nnb,
631 $ nnb, work( ppwo ), 2*nnb,
632 $ a( j, jcol+nnb ), lda, work( pw ),
633 $ lwork-pw+1, ierr )
634 ELSE
635*
636* Ignore the structure of U.
637*
638 CALL dgemm( 'Transpose', 'No Transpose', 2*nnb,
639 $ cola, 2*nnb, one, work( ppwo ), 2*nnb,
640 $ a( j, jcol+nnb ), lda, zero, work( pw ),
641 $ 2*nnb )
642 CALL dlacpy( 'All', 2*nnb, cola, work( pw ), 2*nnb,
643 $ a( j, jcol+nnb ), lda )
644 END IF
645 ppwo = ppwo + 4*nnb*nnb
646 END DO
647*
648* Apply accumulated orthogonal matrices to Q.
649*
650 IF( wantq ) THEN
651 j = ihi - nblst + 1
652 IF ( initq ) THEN
653 topq = max( 2, j - jcol + 1 )
654 nh = ihi - topq + 1
655 ELSE
656 topq = 1
657 nh = n
658 END IF
659 CALL dgemm( 'No Transpose', 'No Transpose', nh,
660 $ nblst, nblst, one, q( topq, j ), ldq,
661 $ work, nblst, zero, work( pw ), nh )
662 CALL dlacpy( 'All', nh, nblst, work( pw ), nh,
663 $ q( topq, j ), ldq )
664 ppwo = nblst*nblst + 1
665 j0 = j - nnb
666 DO j = j0, jcol+1, -nnb
667 IF ( initq ) THEN
668 topq = max( 2, j - jcol + 1 )
669 nh = ihi - topq + 1
670 END IF
671 IF ( blk22 ) THEN
672*
673* Exploit the structure of U.
674*
675 CALL dorm22( 'Right', 'No Transpose', nh, 2*nnb,
676 $ nnb, nnb, work( ppwo ), 2*nnb,
677 $ q( topq, j ), ldq, work( pw ),
678 $ lwork-pw+1, ierr )
679 ELSE
680*
681* Ignore the structure of U.
682*
683 CALL dgemm( 'No Transpose', 'No Transpose', nh,
684 $ 2*nnb, 2*nnb, one, q( topq, j ), ldq,
685 $ work( ppwo ), 2*nnb, zero, work( pw ),
686 $ nh )
687 CALL dlacpy( 'All', nh, 2*nnb, work( pw ), nh,
688 $ q( topq, j ), ldq )
689 END IF
690 ppwo = ppwo + 4*nnb*nnb
691 END DO
692 END IF
693*
694* Accumulate right Givens rotations if required.
695*
696 IF ( wantz .OR. top.GT.0 ) THEN
697*
698* Initialize small orthogonal factors that will hold the
699* accumulated Givens rotations in workspace.
700*
701 CALL dlaset( 'All', nblst, nblst, zero, one, work,
702 $ nblst )
703 pw = nblst * nblst + 1
704 DO i = 1, n2nb
705 CALL dlaset( 'All', 2*nnb, 2*nnb, zero, one,
706 $ work( pw ), 2*nnb )
707 pw = pw + 4*nnb*nnb
708 END DO
709*
710* Accumulate Givens rotations into workspace array.
711*
712 DO j = jcol, jcol+nnb-1
713 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
714 len = 2 + j - jcol
715 jrow = j + n2nb*nnb + 2
716 DO i = ihi, jrow, -1
717 c = a( i, j )
718 a( i, j ) = zero
719 s = b( i, j )
720 b( i, j ) = zero
721 DO jj = ppw, ppw+len-1
722 temp = work( jj + nblst )
723 work( jj + nblst ) = c*temp - s*work( jj )
724 work( jj ) = s*temp + c*work( jj )
725 END DO
726 len = len + 1
727 ppw = ppw - nblst - 1
728 END DO
729*
730 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
731 j0 = jrow - nnb
732 DO jrow = j0, j+2, -nnb
733 ppw = ppwo
734 len = 2 + j - jcol
735 DO i = jrow+nnb-1, jrow, -1
736 c = a( i, j )
737 a( i, j ) = zero
738 s = b( i, j )
739 b( i, j ) = zero
740 DO jj = ppw, ppw+len-1
741 temp = work( jj + 2*nnb )
742 work( jj + 2*nnb ) = c*temp - s*work( jj )
743 work( jj ) = s*temp + c*work( jj )
744 END DO
745 len = len + 1
746 ppw = ppw - 2*nnb - 1
747 END DO
748 ppwo = ppwo + 4*nnb*nnb
749 END DO
750 END DO
751 ELSE
752*
753 CALL dlaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
754 $ a( jcol + 2, jcol ), lda )
755 CALL dlaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
756 $ b( jcol + 2, jcol ), ldb )
757 END IF
758*
759* Apply accumulated orthogonal matrices to A and B.
760*
761 IF ( top.GT.0 ) THEN
762 j = ihi - nblst + 1
763 CALL dgemm( 'No Transpose', 'No Transpose', top,
764 $ nblst, nblst, one, a( 1, j ), lda,
765 $ work, nblst, zero, work( pw ), top )
766 CALL dlacpy( 'All', top, nblst, work( pw ), top,
767 $ a( 1, j ), lda )
768 ppwo = nblst*nblst + 1
769 j0 = j - nnb
770 DO j = j0, jcol+1, -nnb
771 IF ( blk22 ) THEN
772*
773* Exploit the structure of U.
774*
775 CALL dorm22( 'Right', 'No Transpose', top, 2*nnb,
776 $ nnb, nnb, work( ppwo ), 2*nnb,
777 $ a( 1, j ), lda, work( pw ),
778 $ lwork-pw+1, ierr )
779 ELSE
780*
781* Ignore the structure of U.
782*
783 CALL dgemm( 'No Transpose', 'No Transpose', top,
784 $ 2*nnb, 2*nnb, one, a( 1, j ), lda,
785 $ work( ppwo ), 2*nnb, zero,
786 $ work( pw ), top )
787 CALL dlacpy( 'All', top, 2*nnb, work( pw ), top,
788 $ a( 1, j ), lda )
789 END IF
790 ppwo = ppwo + 4*nnb*nnb
791 END DO
792*
793 j = ihi - nblst + 1
794 CALL dgemm( 'No Transpose', 'No Transpose', top,
795 $ nblst, nblst, one, b( 1, j ), ldb,
796 $ work, nblst, zero, work( pw ), top )
797 CALL dlacpy( 'All', top, nblst, work( pw ), top,
798 $ b( 1, j ), ldb )
799 ppwo = nblst*nblst + 1
800 j0 = j - nnb
801 DO j = j0, jcol+1, -nnb
802 IF ( blk22 ) THEN
803*
804* Exploit the structure of U.
805*
806 CALL dorm22( 'Right', 'No Transpose', top, 2*nnb,
807 $ nnb, nnb, work( ppwo ), 2*nnb,
808 $ b( 1, j ), ldb, work( pw ),
809 $ lwork-pw+1, ierr )
810 ELSE
811*
812* Ignore the structure of U.
813*
814 CALL dgemm( 'No Transpose', 'No Transpose', top,
815 $ 2*nnb, 2*nnb, one, b( 1, j ), ldb,
816 $ work( ppwo ), 2*nnb, zero,
817 $ work( pw ), top )
818 CALL dlacpy( 'All', top, 2*nnb, work( pw ), top,
819 $ b( 1, j ), ldb )
820 END IF
821 ppwo = ppwo + 4*nnb*nnb
822 END DO
823 END IF
824*
825* Apply accumulated orthogonal matrices to Z.
826*
827 IF( wantz ) THEN
828 j = ihi - nblst + 1
829 IF ( initq ) THEN
830 topq = max( 2, j - jcol + 1 )
831 nh = ihi - topq + 1
832 ELSE
833 topq = 1
834 nh = n
835 END IF
836 CALL dgemm( 'No Transpose', 'No Transpose', nh,
837 $ nblst, nblst, one, z( topq, j ), ldz,
838 $ work, nblst, zero, work( pw ), nh )
839 CALL dlacpy( 'All', nh, nblst, work( pw ), nh,
840 $ z( topq, j ), ldz )
841 ppwo = nblst*nblst + 1
842 j0 = j - nnb
843 DO j = j0, jcol+1, -nnb
844 IF ( initq ) THEN
845 topq = max( 2, j - jcol + 1 )
846 nh = ihi - topq + 1
847 END IF
848 IF ( blk22 ) THEN
849*
850* Exploit the structure of U.
851*
852 CALL dorm22( 'Right', 'No Transpose', nh, 2*nnb,
853 $ nnb, nnb, work( ppwo ), 2*nnb,
854 $ z( topq, j ), ldz, work( pw ),
855 $ lwork-pw+1, ierr )
856 ELSE
857*
858* Ignore the structure of U.
859*
860 CALL dgemm( 'No Transpose', 'No Transpose', nh,
861 $ 2*nnb, 2*nnb, one, z( topq, j ), ldz,
862 $ work( ppwo ), 2*nnb, zero, work( pw ),
863 $ nh )
864 CALL dlacpy( 'All', nh, 2*nnb, work( pw ), nh,
865 $ z( topq, j ), ldz )
866 END IF
867 ppwo = ppwo + 4*nnb*nnb
868 END DO
869 END IF
870 END DO
871 END IF
872*
873* Use unblocked code to reduce the rest of the matrix
874* Avoid re-initialization of modified Q and Z.
875*
876 compq2 = compq
877 compz2 = compz
878 IF ( jcol.NE.ilo ) THEN
879 IF ( wantq )
880 $ compq2 = 'V'
881 IF ( wantz )
882 $ compz2 = 'V'
883 END IF
884*
885 IF ( jcol.LT.ihi )
886 $ CALL dgghrd( compq2, compz2, n, jcol, ihi, a, lda, b, ldb, q,
887 $ ldq, z, ldz, ierr )
888 work( 1 ) = dble( lwkopt )
889*
890 RETURN
891*
892* End of DGGHD3
893*
894 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:188
subroutine dgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
DGEMV
Definition dgemv.f:158
subroutine dgghd3(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, work, lwork, info)
DGGHD3
Definition dgghd3.f:230
subroutine dgghrd(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, info)
DGGHRD
Definition dgghrd.f:207
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:103
subroutine dlartg(f, g, c, s, r)
DLARTG generates a plane rotation with real cosine and real sine.
Definition dlartg.f90:111
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:110
subroutine drot(n, dx, incx, dy, incy, c, s)
DROT
Definition drot.f:92
subroutine dtrmv(uplo, trans, diag, n, a, lda, x, incx)
DTRMV
Definition dtrmv.f:147
subroutine dorm22(side, trans, m, n, n1, n2, q, ldq, c, ldc, work, lwork, info)
DORM22 multiplies a general matrix by a banded orthogonal matrix.
Definition dorm22.f:163