LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
dgghd3.f
Go to the documentation of this file.
1*> \brief \b DGGHD3
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> Download DGGHD3 + dependencies
9*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgghd3.f">
10*> [TGZ]</a>
11*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgghd3.f">
12*> [ZIP]</a>
13*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgghd3.f">
14*> [TXT]</a>
15*
16* Definition:
17* ===========
18*
19* SUBROUTINE DGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
20* LDQ, Z, LDZ, WORK, LWORK, INFO )
21*
22* .. Scalar Arguments ..
23* CHARACTER COMPQ, COMPZ
24* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
25* ..
26* .. Array Arguments ..
27* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
28* $ Z( LDZ, * ), WORK( * )
29* ..
30*
31*
32*> \par Purpose:
33* =============
34*>
35*> \verbatim
36*>
37*> DGGHD3 reduces a pair of real matrices (A,B) to generalized upper
38*> Hessenberg form using orthogonal transformations, where A is a
39*> general matrix and B is upper triangular. The form of the
40*> generalized eigenvalue problem is
41*> A*x = lambda*B*x,
42*> and B is typically made upper triangular by computing its QR
43*> factorization and moving the orthogonal matrix Q to the left side
44*> of the equation.
45*>
46*> This subroutine simultaneously reduces A to a Hessenberg matrix H:
47*> Q**T*A*Z = H
48*> and transforms B to another upper triangular matrix T:
49*> Q**T*B*Z = T
50*> in order to reduce the problem to its standard form
51*> H*y = lambda*T*y
52*> where y = Z**T*x.
53*>
54*> The orthogonal matrices Q and Z are determined as products of Givens
55*> rotations. They may either be formed explicitly, or they may be
56*> postmultiplied into input matrices Q1 and Z1, so that
57*>
58*> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T
59*>
60*> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T
61*>
62*> If Q1 is the orthogonal matrix from the QR factorization of B in the
63*> original equation A*x = lambda*B*x, then DGGHD3 reduces the original
64*> problem to generalized Hessenberg form.
65*>
66*> This is a blocked variant of DGGHRD, using matrix-matrix
67*> multiplications for parts of the computation to enhance performance.
68*> \endverbatim
69*
70* Arguments:
71* ==========
72*
73*> \param[in] COMPQ
74*> \verbatim
75*> COMPQ is CHARACTER*1
76*> = 'N': do not compute Q;
77*> = 'I': Q is initialized to the unit matrix, and the
78*> orthogonal matrix Q is returned;
79*> = 'V': Q must contain an orthogonal matrix Q1 on entry,
80*> and the product Q1*Q is returned.
81*> \endverbatim
82*>
83*> \param[in] COMPZ
84*> \verbatim
85*> COMPZ is CHARACTER*1
86*> = 'N': do not compute Z;
87*> = 'I': Z is initialized to the unit matrix, and the
88*> orthogonal matrix Z is returned;
89*> = 'V': Z must contain an orthogonal matrix Z1 on entry,
90*> and the product Z1*Z is returned.
91*> \endverbatim
92*>
93*> \param[in] N
94*> \verbatim
95*> N is INTEGER
96*> The order of the matrices A and B. N >= 0.
97*> \endverbatim
98*>
99*> \param[in] ILO
100*> \verbatim
101*> ILO is INTEGER
102*> \endverbatim
103*>
104*> \param[in] IHI
105*> \verbatim
106*> IHI is INTEGER
107*>
108*> ILO and IHI mark the rows and columns of A which are to be
109*> reduced. It is assumed that A is already upper triangular
110*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are
111*> normally set by a previous call to DGGBAL; otherwise they
112*> should be set to 1 and N respectively.
113*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
114*> \endverbatim
115*>
116*> \param[in,out] A
117*> \verbatim
118*> A is DOUBLE PRECISION array, dimension (LDA, N)
119*> On entry, the N-by-N general matrix to be reduced.
120*> On exit, the upper triangle and the first subdiagonal of A
121*> are overwritten with the upper Hessenberg matrix H, and the
122*> rest is set to zero.
123*> \endverbatim
124*>
125*> \param[in] LDA
126*> \verbatim
127*> LDA is INTEGER
128*> The leading dimension of the array A. LDA >= max(1,N).
129*> \endverbatim
130*>
131*> \param[in,out] B
132*> \verbatim
133*> B is DOUBLE PRECISION array, dimension (LDB, N)
134*> On entry, the N-by-N upper triangular matrix B.
135*> On exit, the upper triangular matrix T = Q**T B Z. The
136*> elements below the diagonal are set to zero.
137*> \endverbatim
138*>
139*> \param[in] LDB
140*> \verbatim
141*> LDB is INTEGER
142*> The leading dimension of the array B. LDB >= max(1,N).
143*> \endverbatim
144*>
145*> \param[in,out] Q
146*> \verbatim
147*> Q is DOUBLE PRECISION array, dimension (LDQ, N)
148*> On entry, if COMPQ = 'V', the orthogonal matrix Q1,
149*> typically from the QR factorization of B.
150*> On exit, if COMPQ='I', the orthogonal matrix Q, and if
151*> COMPQ = 'V', the product Q1*Q.
152*> Not referenced if COMPQ='N'.
153*> \endverbatim
154*>
155*> \param[in] LDQ
156*> \verbatim
157*> LDQ is INTEGER
158*> The leading dimension of the array Q.
159*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
160*> \endverbatim
161*>
162*> \param[in,out] Z
163*> \verbatim
164*> Z is DOUBLE PRECISION array, dimension (LDZ, N)
165*> On entry, if COMPZ = 'V', the orthogonal matrix Z1.
166*> On exit, if COMPZ='I', the orthogonal matrix Z, and if
167*> COMPZ = 'V', the product Z1*Z.
168*> Not referenced if COMPZ='N'.
169*> \endverbatim
170*>
171*> \param[in] LDZ
172*> \verbatim
173*> LDZ is INTEGER
174*> The leading dimension of the array Z.
175*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
176*> \endverbatim
177*>
178*> \param[out] WORK
179*> \verbatim
180*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
181*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
182*> \endverbatim
183*>
184*> \param[in] LWORK
185*> \verbatim
186*> LWORK is INTEGER
187*> The length of the array WORK. LWORK >= 1.
188*> For optimum performance LWORK >= 6*N*NB, where NB is the
189*> optimal blocksize.
190*>
191*> If LWORK = -1, then a workspace query is assumed; the routine
192*> only calculates the optimal size of the WORK array, returns
193*> this value as the first entry of the WORK array, and no error
194*> message related to LWORK is issued by XERBLA.
195*> \endverbatim
196*>
197*> \param[out] INFO
198*> \verbatim
199*> INFO is INTEGER
200*> = 0: successful exit.
201*> < 0: if INFO = -i, the i-th argument had an illegal value.
202*> \endverbatim
203*
204* Authors:
205* ========
206*
207*> \author Univ. of Tennessee
208*> \author Univ. of California Berkeley
209*> \author Univ. of Colorado Denver
210*> \author NAG Ltd.
211*
212*> \ingroup gghd3
213*
214*> \par Further Details:
215* =====================
216*>
217*> \verbatim
218*>
219*> This routine reduces A to Hessenberg form and maintains B in triangular form
220*> using a blocked variant of Moler and Stewart's original algorithm,
221*> as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti
222*> (BIT 2008).
223*> \endverbatim
224*>
225* =====================================================================
226 SUBROUTINE dgghd3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB,
227 $ Q,
228 $ LDQ, Z, LDZ, WORK, LWORK, INFO )
229*
230* -- LAPACK computational routine --
231* -- LAPACK is a software package provided by Univ. of Tennessee, --
232* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
233*
234 IMPLICIT NONE
235*
236* .. Scalar Arguments ..
237 CHARACTER COMPQ, COMPZ
238 INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
239* ..
240* .. Array Arguments ..
241 DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
242 $ Z( LDZ, * ), WORK( * )
243* ..
244*
245* =====================================================================
246*
247* .. Parameters ..
248 DOUBLE PRECISION ZERO, ONE
249 PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
250* ..
251* .. Local Scalars ..
252 LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
253 CHARACTER*1 COMPQ2, COMPZ2
254 INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
255 $ kacc22, len, lwkopt, n2nb, nb, nblst, nbmin,
256 $ nh, nnb, nx, ppw, ppwo, pw, top, topq
257 DOUBLE PRECISION C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
258* ..
259* .. External Functions ..
260 LOGICAL LSAME
261 INTEGER ILAENV
262 EXTERNAL ilaenv, lsame
263* ..
264* .. External Subroutines ..
265 EXTERNAL dgghrd, dlartg, dlaset, dorm22, drot,
266 $ dgemm,
268* ..
269* .. Intrinsic Functions ..
270 INTRINSIC dble, max
271* ..
272* .. Executable Statements ..
273*
274* Decode and test the input parameters.
275*
276 info = 0
277 nb = ilaenv( 1, 'DGGHD3', ' ', n, ilo, ihi, -1 )
278 nh = ihi - ilo + 1
279 IF( nh.LE.1 ) THEN
280 lwkopt = 1
281 ELSE
282 lwkopt = 6*n*nb
283 END IF
284 work( 1 ) = dble( lwkopt )
285 initq = lsame( compq, 'I' )
286 wantq = initq .OR. lsame( compq, 'V' )
287 initz = lsame( compz, 'I' )
288 wantz = initz .OR. lsame( compz, 'V' )
289 lquery = ( lwork.EQ.-1 )
290*
291 IF( .NOT.lsame( compq, 'N' ) .AND. .NOT.wantq ) THEN
292 info = -1
293 ELSE IF( .NOT.lsame( compz, 'N' ) .AND. .NOT.wantz ) THEN
294 info = -2
295 ELSE IF( n.LT.0 ) THEN
296 info = -3
297 ELSE IF( ilo.LT.1 ) THEN
298 info = -4
299 ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN
300 info = -5
301 ELSE IF( lda.LT.max( 1, n ) ) THEN
302 info = -7
303 ELSE IF( ldb.LT.max( 1, n ) ) THEN
304 info = -9
305 ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN
306 info = -11
307 ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN
308 info = -13
309 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
310 info = -15
311 END IF
312 IF( info.NE.0 ) THEN
313 CALL xerbla( 'DGGHD3', -info )
314 RETURN
315 ELSE IF( lquery ) THEN
316 RETURN
317 END IF
318*
319* Initialize Q and Z if desired.
320*
321 IF( initq )
322 $ CALL dlaset( 'All', n, n, zero, one, q, ldq )
323 IF( initz )
324 $ CALL dlaset( 'All', n, n, zero, one, z, ldz )
325*
326* Zero out lower triangle of B.
327*
328 IF( n.GT.1 )
329 $ CALL dlaset( 'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )
330*
331* Quick return if possible
332*
333 IF( nh.LE.1 ) THEN
334 work( 1 ) = one
335 RETURN
336 END IF
337*
338* Determine the blocksize.
339*
340 nbmin = ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi, -1 )
341 IF( nb.GT.1 .AND. nb.LT.nh ) THEN
342*
343* Determine when to use unblocked instead of blocked code.
344*
345 nx = max( nb, ilaenv( 3, 'DGGHD3', ' ', n, ilo, ihi, -1 ) )
346 IF( nx.LT.nh ) THEN
347*
348* Determine if workspace is large enough for blocked code.
349*
350 IF( lwork.LT.lwkopt ) THEN
351*
352* Not enough workspace to use optimal NB: determine the
353* minimum value of NB, and reduce NB or force use of
354* unblocked code.
355*
356 nbmin = max( 2, ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi,
357 $ -1 ) )
358 IF( lwork.GE.6*n*nbmin ) THEN
359 nb = lwork / ( 6*n )
360 ELSE
361 nb = 1
362 END IF
363 END IF
364 END IF
365 END IF
366*
367 IF( nb.LT.nbmin .OR. nb.GE.nh ) THEN
368*
369* Use unblocked code below
370*
371 jcol = ilo
372*
373 ELSE
374*
375* Use blocked code
376*
377 kacc22 = ilaenv( 16, 'DGGHD3', ' ', n, ilo, ihi, -1 )
378 blk22 = kacc22.EQ.2
379 DO jcol = ilo, ihi-2, nb
380 nnb = min( nb, ihi-jcol-1 )
381*
382* Initialize small orthogonal factors that will hold the
383* accumulated Givens rotations in workspace.
384* N2NB denotes the number of 2*NNB-by-2*NNB factors
385* NBLST denotes the (possibly smaller) order of the last
386* factor.
387*
388 n2nb = ( ihi-jcol-1 ) / nnb - 1
389 nblst = ihi - jcol - n2nb*nnb
390 CALL dlaset( 'All', nblst, nblst, zero, one, work,
391 $ nblst )
392 pw = nblst * nblst + 1
393 DO i = 1, n2nb
394 CALL dlaset( 'All', 2*nnb, 2*nnb, zero, one,
395 $ work( pw ), 2*nnb )
396 pw = pw + 4*nnb*nnb
397 END DO
398*
399* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form.
400*
401 DO j = jcol, jcol+nnb-1
402*
403* Reduce Jth column of A. Store cosines and sines in Jth
404* column of A and B, respectively.
405*
406 DO i = ihi, j+2, -1
407 temp = a( i-1, j )
408 CALL dlartg( temp, a( i, j ), c, s, a( i-1, j ) )
409 a( i, j ) = c
410 b( i, j ) = s
411 END DO
412*
413* Accumulate Givens rotations into workspace array.
414*
415 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
416 len = 2 + j - jcol
417 jrow = j + n2nb*nnb + 2
418 DO i = ihi, jrow, -1
419 c = a( i, j )
420 s = b( i, j )
421 DO jj = ppw, ppw+len-1
422 temp = work( jj + nblst )
423 work( jj + nblst ) = c*temp - s*work( jj )
424 work( jj ) = s*temp + c*work( jj )
425 END DO
426 len = len + 1
427 ppw = ppw - nblst - 1
428 END DO
429*
430 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
431 j0 = jrow - nnb
432 DO jrow = j0, j+2, -nnb
433 ppw = ppwo
434 len = 2 + j - jcol
435 DO i = jrow+nnb-1, jrow, -1
436 c = a( i, j )
437 s = b( i, j )
438 DO jj = ppw, ppw+len-1
439 temp = work( jj + 2*nnb )
440 work( jj + 2*nnb ) = c*temp - s*work( jj )
441 work( jj ) = s*temp + c*work( jj )
442 END DO
443 len = len + 1
444 ppw = ppw - 2*nnb - 1
445 END DO
446 ppwo = ppwo + 4*nnb*nnb
447 END DO
448*
449* TOP denotes the number of top rows in A and B that will
450* not be updated during the next steps.
451*
452 IF( jcol.LE.2 ) THEN
453 top = 0
454 ELSE
455 top = jcol
456 END IF
457*
458* Propagate transformations through B and replace stored
459* left sines/cosines by right sines/cosines.
460*
461 DO jj = n, j+1, -1
462*
463* Update JJth column of B.
464*
465 DO i = min( jj+1, ihi ), j+2, -1
466 c = a( i, j )
467 s = b( i, j )
468 temp = b( i, jj )
469 b( i, jj ) = c*temp - s*b( i-1, jj )
470 b( i-1, jj ) = s*temp + c*b( i-1, jj )
471 END DO
472*
473* Annihilate B( JJ+1, JJ ).
474*
475 IF( jj.LT.ihi ) THEN
476 temp = b( jj+1, jj+1 )
477 CALL dlartg( temp, b( jj+1, jj ), c, s,
478 $ b( jj+1, jj+1 ) )
479 b( jj+1, jj ) = zero
480 CALL drot( jj-top, b( top+1, jj+1 ), 1,
481 $ b( top+1, jj ), 1, c, s )
482 a( jj+1, j ) = c
483 b( jj+1, j ) = -s
484 END IF
485 END DO
486*
487* Update A by transformations from right.
488* Explicit loop unrolling provides better performance
489* compared to DLASR.
490* CALL DLASR( 'Right', 'Variable', 'Backward', IHI-TOP,
491* $ IHI-J, A( J+2, J ), B( J+2, J ),
492* $ A( TOP+1, J+1 ), LDA )
493*
494 jj = mod( ihi-j-1, 3 )
495 DO i = ihi-j-3, jj+1, -3
496 c = a( j+1+i, j )
497 s = -b( j+1+i, j )
498 c1 = a( j+2+i, j )
499 s1 = -b( j+2+i, j )
500 c2 = a( j+3+i, j )
501 s2 = -b( j+3+i, j )
502*
503 DO k = top+1, ihi
504 temp = a( k, j+i )
505 temp1 = a( k, j+i+1 )
506 temp2 = a( k, j+i+2 )
507 temp3 = a( k, j+i+3 )
508 a( k, j+i+3 ) = c2*temp3 + s2*temp2
509 temp2 = -s2*temp3 + c2*temp2
510 a( k, j+i+2 ) = c1*temp2 + s1*temp1
511 temp1 = -s1*temp2 + c1*temp1
512 a( k, j+i+1 ) = c*temp1 + s*temp
513 a( k, j+i ) = -s*temp1 + c*temp
514 END DO
515 END DO
516*
517 IF( jj.GT.0 ) THEN
518 DO i = jj, 1, -1
519 CALL drot( ihi-top, a( top+1, j+i+1 ), 1,
520 $ a( top+1, j+i ), 1, a( j+1+i, j ),
521 $ -b( j+1+i, j ) )
522 END DO
523 END IF
524*
525* Update (J+1)th column of A by transformations from left.
526*
527 IF ( j .LT. jcol + nnb - 1 ) THEN
528 len = 1 + j - jcol
529*
530* Multiply with the trailing accumulated orthogonal
531* matrix, which takes the form
532*
533* [ U11 U12 ]
534* U = [ ],
535* [ U21 U22 ]
536*
537* where U21 is a LEN-by-LEN matrix and U12 is lower
538* triangular.
539*
540 jrow = ihi - nblst + 1
541 CALL dgemv( 'Transpose', nblst, len, one, work,
542 $ nblst, a( jrow, j+1 ), 1, zero,
543 $ work( pw ), 1 )
544 ppw = pw + len
545 DO i = jrow, jrow+nblst-len-1
546 work( ppw ) = a( i, j+1 )
547 ppw = ppw + 1
548 END DO
549 CALL dtrmv( 'Lower', 'Transpose', 'Non-unit',
550 $ nblst-len, work( len*nblst + 1 ), nblst,
551 $ work( pw+len ), 1 )
552 CALL dgemv( 'Transpose', len, nblst-len, one,
553 $ work( (len+1)*nblst - len + 1 ), nblst,
554 $ a( jrow+nblst-len, j+1 ), 1, one,
555 $ work( pw+len ), 1 )
556 ppw = pw
557 DO i = jrow, jrow+nblst-1
558 a( i, j+1 ) = work( ppw )
559 ppw = ppw + 1
560 END DO
561*
562* Multiply with the other accumulated orthogonal
563* matrices, which take the form
564*
565* [ U11 U12 0 ]
566* [ ]
567* U = [ U21 U22 0 ],
568* [ ]
569* [ 0 0 I ]
570*
571* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity
572* matrix, U21 is a LEN-by-LEN upper triangular matrix
573* and U12 is an NNB-by-NNB lower triangular matrix.
574*
575 ppwo = 1 + nblst*nblst
576 j0 = jrow - nnb
577 DO jrow = j0, jcol+1, -nnb
578 ppw = pw + len
579 DO i = jrow, jrow+nnb-1
580 work( ppw ) = a( i, j+1 )
581 ppw = ppw + 1
582 END DO
583 ppw = pw
584 DO i = jrow+nnb, jrow+nnb+len-1
585 work( ppw ) = a( i, j+1 )
586 ppw = ppw + 1
587 END DO
588 CALL dtrmv( 'Upper', 'Transpose', 'Non-unit',
589 $ len,
590 $ work( ppwo + nnb ), 2*nnb, work( pw ),
591 $ 1 )
592 CALL dtrmv( 'Lower', 'Transpose', 'Non-unit',
593 $ nnb,
594 $ work( ppwo + 2*len*nnb ),
595 $ 2*nnb, work( pw + len ), 1 )
596 CALL dgemv( 'Transpose', nnb, len, one,
597 $ work( ppwo ), 2*nnb, a( jrow, j+1 ), 1,
598 $ one, work( pw ), 1 )
599 CALL dgemv( 'Transpose', len, nnb, one,
600 $ work( ppwo + 2*len*nnb + nnb ), 2*nnb,
601 $ a( jrow+nnb, j+1 ), 1, one,
602 $ work( pw+len ), 1 )
603 ppw = pw
604 DO i = jrow, jrow+len+nnb-1
605 a( i, j+1 ) = work( ppw )
606 ppw = ppw + 1
607 END DO
608 ppwo = ppwo + 4*nnb*nnb
609 END DO
610 END IF
611 END DO
612*
613* Apply accumulated orthogonal matrices to A.
614*
615 cola = n - jcol - nnb + 1
616 j = ihi - nblst + 1
617 CALL dgemm( 'Transpose', 'No Transpose', nblst,
618 $ cola, nblst, one, work, nblst,
619 $ a( j, jcol+nnb ), lda, zero, work( pw ),
620 $ nblst )
621 CALL dlacpy( 'All', nblst, cola, work( pw ), nblst,
622 $ a( j, jcol+nnb ), lda )
623 ppwo = nblst*nblst + 1
624 j0 = j - nnb
625 DO j = j0, jcol+1, -nnb
626 IF ( blk22 ) THEN
627*
628* Exploit the structure of
629*
630* [ U11 U12 ]
631* U = [ ]
632* [ U21 U22 ],
633*
634* where all blocks are NNB-by-NNB, U21 is upper
635* triangular and U12 is lower triangular.
636*
637 CALL dorm22( 'Left', 'Transpose', 2*nnb, cola, nnb,
638 $ nnb, work( ppwo ), 2*nnb,
639 $ a( j, jcol+nnb ), lda, work( pw ),
640 $ lwork-pw+1, ierr )
641 ELSE
642*
643* Ignore the structure of U.
644*
645 CALL dgemm( 'Transpose', 'No Transpose', 2*nnb,
646 $ cola, 2*nnb, one, work( ppwo ), 2*nnb,
647 $ a( j, jcol+nnb ), lda, zero, work( pw ),
648 $ 2*nnb )
649 CALL dlacpy( 'All', 2*nnb, cola, work( pw ), 2*nnb,
650 $ a( j, jcol+nnb ), lda )
651 END IF
652 ppwo = ppwo + 4*nnb*nnb
653 END DO
654*
655* Apply accumulated orthogonal matrices to Q.
656*
657 IF( wantq ) THEN
658 j = ihi - nblst + 1
659 IF ( initq ) THEN
660 topq = max( 2, j - jcol + 1 )
661 nh = ihi - topq + 1
662 ELSE
663 topq = 1
664 nh = n
665 END IF
666 CALL dgemm( 'No Transpose', 'No Transpose', nh,
667 $ nblst, nblst, one, q( topq, j ), ldq,
668 $ work, nblst, zero, work( pw ), nh )
669 CALL dlacpy( 'All', nh, nblst, work( pw ), nh,
670 $ q( topq, j ), ldq )
671 ppwo = nblst*nblst + 1
672 j0 = j - nnb
673 DO j = j0, jcol+1, -nnb
674 IF ( initq ) THEN
675 topq = max( 2, j - jcol + 1 )
676 nh = ihi - topq + 1
677 END IF
678 IF ( blk22 ) THEN
679*
680* Exploit the structure of U.
681*
682 CALL dorm22( 'Right', 'No Transpose', nh, 2*nnb,
683 $ nnb, nnb, work( ppwo ), 2*nnb,
684 $ q( topq, j ), ldq, work( pw ),
685 $ lwork-pw+1, ierr )
686 ELSE
687*
688* Ignore the structure of U.
689*
690 CALL dgemm( 'No Transpose', 'No Transpose', nh,
691 $ 2*nnb, 2*nnb, one, q( topq, j ), ldq,
692 $ work( ppwo ), 2*nnb, zero, work( pw ),
693 $ nh )
694 CALL dlacpy( 'All', nh, 2*nnb, work( pw ), nh,
695 $ q( topq, j ), ldq )
696 END IF
697 ppwo = ppwo + 4*nnb*nnb
698 END DO
699 END IF
700*
701* Accumulate right Givens rotations if required.
702*
703 IF ( wantz .OR. top.GT.0 ) THEN
704*
705* Initialize small orthogonal factors that will hold the
706* accumulated Givens rotations in workspace.
707*
708 CALL dlaset( 'All', nblst, nblst, zero, one, work,
709 $ nblst )
710 pw = nblst * nblst + 1
711 DO i = 1, n2nb
712 CALL dlaset( 'All', 2*nnb, 2*nnb, zero, one,
713 $ work( pw ), 2*nnb )
714 pw = pw + 4*nnb*nnb
715 END DO
716*
717* Accumulate Givens rotations into workspace array.
718*
719 DO j = jcol, jcol+nnb-1
720 ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
721 len = 2 + j - jcol
722 jrow = j + n2nb*nnb + 2
723 DO i = ihi, jrow, -1
724 c = a( i, j )
725 a( i, j ) = zero
726 s = b( i, j )
727 b( i, j ) = zero
728 DO jj = ppw, ppw+len-1
729 temp = work( jj + nblst )
730 work( jj + nblst ) = c*temp - s*work( jj )
731 work( jj ) = s*temp + c*work( jj )
732 END DO
733 len = len + 1
734 ppw = ppw - nblst - 1
735 END DO
736*
737 ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
738 j0 = jrow - nnb
739 DO jrow = j0, j+2, -nnb
740 ppw = ppwo
741 len = 2 + j - jcol
742 DO i = jrow+nnb-1, jrow, -1
743 c = a( i, j )
744 a( i, j ) = zero
745 s = b( i, j )
746 b( i, j ) = zero
747 DO jj = ppw, ppw+len-1
748 temp = work( jj + 2*nnb )
749 work( jj + 2*nnb ) = c*temp - s*work( jj )
750 work( jj ) = s*temp + c*work( jj )
751 END DO
752 len = len + 1
753 ppw = ppw - 2*nnb - 1
754 END DO
755 ppwo = ppwo + 4*nnb*nnb
756 END DO
757 END DO
758 ELSE
759*
760 CALL dlaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
761 $ a( jcol + 2, jcol ), lda )
762 CALL dlaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
763 $ b( jcol + 2, jcol ), ldb )
764 END IF
765*
766* Apply accumulated orthogonal matrices to A and B.
767*
768 IF ( top.GT.0 ) THEN
769 j = ihi - nblst + 1
770 CALL dgemm( 'No Transpose', 'No Transpose', top,
771 $ nblst, nblst, one, a( 1, j ), lda,
772 $ work, nblst, zero, work( pw ), top )
773 CALL dlacpy( 'All', top, nblst, work( pw ), top,
774 $ a( 1, j ), lda )
775 ppwo = nblst*nblst + 1
776 j0 = j - nnb
777 DO j = j0, jcol+1, -nnb
778 IF ( blk22 ) THEN
779*
780* Exploit the structure of U.
781*
782 CALL dorm22( 'Right', 'No Transpose', top,
783 $ 2*nnb,
784 $ nnb, nnb, work( ppwo ), 2*nnb,
785 $ a( 1, j ), lda, work( pw ),
786 $ lwork-pw+1, ierr )
787 ELSE
788*
789* Ignore the structure of U.
790*
791 CALL dgemm( 'No Transpose', 'No Transpose', top,
792 $ 2*nnb, 2*nnb, one, a( 1, j ), lda,
793 $ work( ppwo ), 2*nnb, zero,
794 $ work( pw ), top )
795 CALL dlacpy( 'All', top, 2*nnb, work( pw ), top,
796 $ a( 1, j ), lda )
797 END IF
798 ppwo = ppwo + 4*nnb*nnb
799 END DO
800*
801 j = ihi - nblst + 1
802 CALL dgemm( 'No Transpose', 'No Transpose', top,
803 $ nblst, nblst, one, b( 1, j ), ldb,
804 $ work, nblst, zero, work( pw ), top )
805 CALL dlacpy( 'All', top, nblst, work( pw ), top,
806 $ b( 1, j ), ldb )
807 ppwo = nblst*nblst + 1
808 j0 = j - nnb
809 DO j = j0, jcol+1, -nnb
810 IF ( blk22 ) THEN
811*
812* Exploit the structure of U.
813*
814 CALL dorm22( 'Right', 'No Transpose', top,
815 $ 2*nnb,
816 $ nnb, nnb, work( ppwo ), 2*nnb,
817 $ b( 1, j ), ldb, work( pw ),
818 $ lwork-pw+1, ierr )
819 ELSE
820*
821* Ignore the structure of U.
822*
823 CALL dgemm( 'No Transpose', 'No Transpose', top,
824 $ 2*nnb, 2*nnb, one, b( 1, j ), ldb,
825 $ work( ppwo ), 2*nnb, zero,
826 $ work( pw ), top )
827 CALL dlacpy( 'All', top, 2*nnb, work( pw ), top,
828 $ b( 1, j ), ldb )
829 END IF
830 ppwo = ppwo + 4*nnb*nnb
831 END DO
832 END IF
833*
834* Apply accumulated orthogonal matrices to Z.
835*
836 IF( wantz ) THEN
837 j = ihi - nblst + 1
838 IF ( initq ) THEN
839 topq = max( 2, j - jcol + 1 )
840 nh = ihi - topq + 1
841 ELSE
842 topq = 1
843 nh = n
844 END IF
845 CALL dgemm( 'No Transpose', 'No Transpose', nh,
846 $ nblst, nblst, one, z( topq, j ), ldz,
847 $ work, nblst, zero, work( pw ), nh )
848 CALL dlacpy( 'All', nh, nblst, work( pw ), nh,
849 $ z( topq, j ), ldz )
850 ppwo = nblst*nblst + 1
851 j0 = j - nnb
852 DO j = j0, jcol+1, -nnb
853 IF ( initq ) THEN
854 topq = max( 2, j - jcol + 1 )
855 nh = ihi - topq + 1
856 END IF
857 IF ( blk22 ) THEN
858*
859* Exploit the structure of U.
860*
861 CALL dorm22( 'Right', 'No Transpose', nh, 2*nnb,
862 $ nnb, nnb, work( ppwo ), 2*nnb,
863 $ z( topq, j ), ldz, work( pw ),
864 $ lwork-pw+1, ierr )
865 ELSE
866*
867* Ignore the structure of U.
868*
869 CALL dgemm( 'No Transpose', 'No Transpose', nh,
870 $ 2*nnb, 2*nnb, one, z( topq, j ), ldz,
871 $ work( ppwo ), 2*nnb, zero, work( pw ),
872 $ nh )
873 CALL dlacpy( 'All', nh, 2*nnb, work( pw ), nh,
874 $ z( topq, j ), ldz )
875 END IF
876 ppwo = ppwo + 4*nnb*nnb
877 END DO
878 END IF
879 END DO
880 END IF
881*
882* Use unblocked code to reduce the rest of the matrix
883* Avoid re-initialization of modified Q and Z.
884*
885 compq2 = compq
886 compz2 = compz
887 IF ( jcol.NE.ilo ) THEN
888 IF ( wantq )
889 $ compq2 = 'V'
890 IF ( wantz )
891 $ compz2 = 'V'
892 END IF
893*
894 IF ( jcol.LT.ihi )
895 $ CALL dgghrd( compq2, compz2, n, jcol, ihi, a, lda, b, ldb,
896 $ q,
897 $ ldq, z, ldz, ierr )
898*
899 work( 1 ) = dble( lwkopt )
900*
901 RETURN
902*
903* End of DGGHD3
904*
905 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:188
subroutine dgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
DGEMV
Definition dgemv.f:158
subroutine dgghd3(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, work, lwork, info)
DGGHD3
Definition dgghd3.f:229
subroutine dgghrd(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, info)
DGGHRD
Definition dgghrd.f:206
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:101
subroutine dlartg(f, g, c, s, r)
DLARTG generates a plane rotation with real cosine and real sine.
Definition dlartg.f90:111
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:108
subroutine drot(n, dx, incx, dy, incy, c, s)
DROT
Definition drot.f:92
subroutine dtrmv(uplo, trans, diag, n, a, lda, x, incx)
DTRMV
Definition dtrmv.f:147
subroutine dorm22(side, trans, m, n, n1, n2, q, ldq, c, ldc, work, lwork, info)
DORM22 multiplies a general matrix by a banded orthogonal matrix.
Definition dorm22.f:161