LAPACK  3.6.1
LAPACK: Linear Algebra PACKage
dgghd3.f
Go to the documentation of this file.
1 *> \brief \b DGGHD3
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download DGGHD3 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgghd3.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgghd3.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgghd3.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE DGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
22 * LDQ, Z, LDZ, WORK, LWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER COMPQ, COMPZ
26 * INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
27 * ..
28 * .. Array Arguments ..
29 * DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
30 * $ Z( LDZ, * ), WORK( * )
31 * ..
32 *
33 *
34 *> \par Purpose:
35 * =============
36 *>
37 *> \verbatim
38 *>
39 *> DGGHD3 reduces a pair of real matrices (A,B) to generalized upper
40 *> Hessenberg form using orthogonal transformations, where A is a
41 *> general matrix and B is upper triangular. The form of the
42 *> generalized eigenvalue problem is
43 *> A*x = lambda*B*x,
44 *> and B is typically made upper triangular by computing its QR
45 *> factorization and moving the orthogonal matrix Q to the left side
46 *> of the equation.
47 *>
48 *> This subroutine simultaneously reduces A to a Hessenberg matrix H:
49 *> Q**T*A*Z = H
50 *> and transforms B to another upper triangular matrix T:
51 *> Q**T*B*Z = T
52 *> in order to reduce the problem to its standard form
53 *> H*y = lambda*T*y
54 *> where y = Z**T*x.
55 *>
56 *> The orthogonal matrices Q and Z are determined as products of Givens
57 *> rotations. They may either be formed explicitly, or they may be
58 *> postmultiplied into input matrices Q1 and Z1, so that
59 *>
60 *> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T
61 *>
62 *> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T
63 *>
64 *> If Q1 is the orthogonal matrix from the QR factorization of B in the
65 *> original equation A*x = lambda*B*x, then DGGHD3 reduces the original
66 *> problem to generalized Hessenberg form.
67 *>
68 *> This is a blocked variant of DGGHRD, using matrix-matrix
69 *> multiplications for parts of the computation to enhance performance.
70 *> \endverbatim
71 *
72 * Arguments:
73 * ==========
74 *
75 *> \param[in] COMPQ
76 *> \verbatim
77 *> COMPQ is CHARACTER*1
78 *> = 'N': do not compute Q;
79 *> = 'I': Q is initialized to the unit matrix, and the
80 *> orthogonal matrix Q is returned;
81 *> = 'V': Q must contain an orthogonal matrix Q1 on entry,
82 *> and the product Q1*Q is returned.
83 *> \endverbatim
84 *>
85 *> \param[in] COMPZ
86 *> \verbatim
87 *> COMPZ is CHARACTER*1
88 *> = 'N': do not compute Z;
89 *> = 'I': Z is initialized to the unit matrix, and the
90 *> orthogonal matrix Z is returned;
91 *> = 'V': Z must contain an orthogonal matrix Z1 on entry,
92 *> and the product Z1*Z is returned.
93 *> \endverbatim
94 *>
95 *> \param[in] N
96 *> \verbatim
97 *> N is INTEGER
98 *> The order of the matrices A and B. N >= 0.
99 *> \endverbatim
100 *>
101 *> \param[in] ILO
102 *> \verbatim
103 *> ILO is INTEGER
104 *> \endverbatim
105 *>
106 *> \param[in] IHI
107 *> \verbatim
108 *> IHI is INTEGER
109 *>
110 *> ILO and IHI mark the rows and columns of A which are to be
111 *> reduced. It is assumed that A is already upper triangular
112 *> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are
113 *> normally set by a previous call to DGGBAL; otherwise they
114 *> should be set to 1 and N respectively.
115 *> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
116 *> \endverbatim
117 *>
118 *> \param[in,out] A
119 *> \verbatim
120 *> A is DOUBLE PRECISION array, dimension (LDA, N)
121 *> On entry, the N-by-N general matrix to be reduced.
122 *> On exit, the upper triangle and the first subdiagonal of A
123 *> are overwritten with the upper Hessenberg matrix H, and the
124 *> rest is set to zero.
125 *> \endverbatim
126 *>
127 *> \param[in] LDA
128 *> \verbatim
129 *> LDA is INTEGER
130 *> The leading dimension of the array A. LDA >= max(1,N).
131 *> \endverbatim
132 *>
133 *> \param[in,out] B
134 *> \verbatim
135 *> B is DOUBLE PRECISION array, dimension (LDB, N)
136 *> On entry, the N-by-N upper triangular matrix B.
137 *> On exit, the upper triangular matrix T = Q**T B Z. The
138 *> elements below the diagonal are set to zero.
139 *> \endverbatim
140 *>
141 *> \param[in] LDB
142 *> \verbatim
143 *> LDB is INTEGER
144 *> The leading dimension of the array B. LDB >= max(1,N).
145 *> \endverbatim
146 *>
147 *> \param[in,out] Q
148 *> \verbatim
149 *> Q is DOUBLE PRECISION array, dimension (LDQ, N)
150 *> On entry, if COMPQ = 'V', the orthogonal matrix Q1,
151 *> typically from the QR factorization of B.
152 *> On exit, if COMPQ='I', the orthogonal matrix Q, and if
153 *> COMPQ = 'V', the product Q1*Q.
154 *> Not referenced if COMPQ='N'.
155 *> \endverbatim
156 *>
157 *> \param[in] LDQ
158 *> \verbatim
159 *> LDQ is INTEGER
160 *> The leading dimension of the array Q.
161 *> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
162 *> \endverbatim
163 *>
164 *> \param[in,out] Z
165 *> \verbatim
166 *> Z is DOUBLE PRECISION array, dimension (LDZ, N)
167 *> On entry, if COMPZ = 'V', the orthogonal matrix Z1.
168 *> On exit, if COMPZ='I', the orthogonal matrix Z, and if
169 *> COMPZ = 'V', the product Z1*Z.
170 *> Not referenced if COMPZ='N'.
171 *> \endverbatim
172 *>
173 *> \param[in] LDZ
174 *> \verbatim
175 *> LDZ is INTEGER
176 *> The leading dimension of the array Z.
177 *> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
178 *> \endverbatim
179 *>
180 *> \param[out] WORK
181 *> \verbatim
182 *> WORK is DOUBLE PRECISION array, dimension (LWORK)
183 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
184 *> \endverbatim
185 *>
186 *> \param[in] LWORK
187 *> \verbatim
188 *> LWORK is INTEGER
189 *> The length of the array WORK. LWORK >= 1.
190 *> For optimum performance LWORK >= 6*N*NB, where NB is the
191 *> optimal blocksize.
192 *>
193 *> If LWORK = -1, then a workspace query is assumed; the routine
194 *> only calculates the optimal size of the WORK array, returns
195 *> this value as the first entry of the WORK array, and no error
196 *> message related to LWORK is issued by XERBLA.
197 *> \endverbatim
198 *>
199 *> \param[out] INFO
200 *> \verbatim
201 *> INFO is INTEGER
202 *> = 0: successful exit.
203 *> < 0: if INFO = -i, the i-th argument had an illegal value.
204 *> \endverbatim
205 *
206 * Authors:
207 * ========
208 *
209 *> \author Univ. of Tennessee
210 *> \author Univ. of California Berkeley
211 *> \author Univ. of Colorado Denver
212 *> \author NAG Ltd.
213 *
214 *> \date January 2015
215 *
216 *> \ingroup doubleOTHERcomputational
217 *
218 *> \par Further Details:
219 * =====================
220 *>
221 *> \verbatim
222 *>
223 *> This routine reduces A to Hessenberg form and maintains B in
224 *> using a blocked variant of Moler and Stewart's original algorithm,
225 *> as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti
226 *> (BIT 2008).
227 *> \endverbatim
228 *>
229 * =====================================================================
230  SUBROUTINE dgghd3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
231  $ ldq, z, ldz, work, lwork, info )
232 *
233 * -- LAPACK computational routine (version 3.6.1) --
234 * -- LAPACK is a software package provided by Univ. of Tennessee, --
235 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
236 * January 2015
237 *
238  IMPLICIT NONE
239 *
240 * .. Scalar Arguments ..
241  CHARACTER COMPQ, COMPZ
242  INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
243 * ..
244 * .. Array Arguments ..
245  DOUBLE PRECISION A( lda, * ), B( ldb, * ), Q( ldq, * ),
246  $ z( ldz, * ), work( * )
247 * ..
248 *
249 * =====================================================================
250 *
251 * .. Parameters ..
252  DOUBLE PRECISION ZERO, ONE
253  parameter ( zero = 0.0d+0, one = 1.0d+0 )
254 * ..
255 * .. Local Scalars ..
256  LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
257  CHARACTER*1 COMPQ2, COMPZ2
258  INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
259  $ kacc22, len, lwkopt, n2nb, nb, nblst, nbmin,
260  $ nh, nnb, nx, ppw, ppwo, pw, top, topq
261  DOUBLE PRECISION C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
262 * ..
263 * .. External Functions ..
264  LOGICAL LSAME
265  INTEGER ILAENV
266  EXTERNAL ilaenv, lsame
267 * ..
268 * .. External Subroutines ..
269  EXTERNAL dgghrd, dlartg, dlaset, dorm22, drot, xerbla
270 * ..
271 * .. Intrinsic Functions ..
272  INTRINSIC dble, max
273 * ..
274 * .. Executable Statements ..
275 *
276 * Decode and test the input parameters.
277 *
278  info = 0
279  nb = ilaenv( 1, 'DGGHD3', ' ', n, ilo, ihi, -1 )
280  lwkopt = max( 6*n*nb, 1 )
281  work( 1 ) = dble( lwkopt )
282  initq = lsame( compq, 'I' )
283  wantq = initq .OR. lsame( compq, 'V' )
284  initz = lsame( compz, 'I' )
285  wantz = initz .OR. lsame( compz, 'V' )
286  lquery = ( lwork.EQ.-1 )
287 *
288  IF( .NOT.lsame( compq, 'N' ) .AND. .NOT.wantq ) THEN
289  info = -1
290  ELSE IF( .NOT.lsame( compz, 'N' ) .AND. .NOT.wantz ) THEN
291  info = -2
292  ELSE IF( n.LT.0 ) THEN
293  info = -3
294  ELSE IF( ilo.LT.1 ) THEN
295  info = -4
296  ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN
297  info = -5
298  ELSE IF( lda.LT.max( 1, n ) ) THEN
299  info = -7
300  ELSE IF( ldb.LT.max( 1, n ) ) THEN
301  info = -9
302  ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN
303  info = -11
304  ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN
305  info = -13
306  ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
307  info = -15
308  END IF
309  IF( info.NE.0 ) THEN
310  CALL xerbla( 'DGGHD3', -info )
311  RETURN
312  ELSE IF( lquery ) THEN
313  RETURN
314  END IF
315 *
316 * Initialize Q and Z if desired.
317 *
318  IF( initq )
319  $ CALL dlaset( 'All', n, n, zero, one, q, ldq )
320  IF( initz )
321  $ CALL dlaset( 'All', n, n, zero, one, z, ldz )
322 *
323 * Zero out lower triangle of B.
324 *
325  IF( n.GT.1 )
326  $ CALL dlaset( 'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )
327 *
328 * Quick return if possible
329 *
330  nh = ihi - ilo + 1
331  IF( nh.LE.1 ) THEN
332  work( 1 ) = one
333  RETURN
334  END IF
335 *
336 * Determine the blocksize.
337 *
338  nbmin = ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi, -1 )
339  IF( nb.GT.1 .AND. nb.LT.nh ) THEN
340 *
341 * Determine when to use unblocked instead of blocked code.
342 *
343  nx = max( nb, ilaenv( 3, 'DGGHD3', ' ', n, ilo, ihi, -1 ) )
344  IF( nx.LT.nh ) THEN
345 *
346 * Determine if workspace is large enough for blocked code.
347 *
348  IF( lwork.LT.lwkopt ) THEN
349 *
350 * Not enough workspace to use optimal NB: determine the
351 * minimum value of NB, and reduce NB or force use of
352 * unblocked code.
353 *
354  nbmin = max( 2, ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi,
355  $ -1 ) )
356  IF( lwork.GE.6*n*nbmin ) THEN
357  nb = lwork / ( 6*n )
358  ELSE
359  nb = 1
360  END IF
361  END IF
362  END IF
363  END IF
364 *
365  IF( nb.LT.nbmin .OR. nb.GE.nh ) THEN
366 *
367 * Use unblocked code below
368 *
369  jcol = ilo
370 *
371  ELSE
372 *
373 * Use blocked code
374 *
375  kacc22 = ilaenv( 16, 'DGGHD3', ' ', n, ilo, ihi, -1 )
376  blk22 = kacc22.EQ.2
377  DO jcol = ilo, ihi-2, nb
378  nnb = min( nb, ihi-jcol-1 )
379 *
380 * Initialize small orthogonal factors that will hold the
381 * accumulated Givens rotations in workspace.
382 * N2NB denotes the number of 2*NNB-by-2*NNB factors
383 * NBLST denotes the (possibly smaller) order of the last
384 * factor.
385 *
386  n2nb = ( ihi-jcol-1 ) / nnb - 1
387  nblst = ihi - jcol - n2nb*nnb
388  CALL dlaset( 'All', nblst, nblst, zero, one, work, nblst )
389  pw = nblst * nblst + 1
390  DO i = 1, n2nb
391  CALL dlaset( 'All', 2*nnb, 2*nnb, zero, one,
392  $ work( pw ), 2*nnb )
393  pw = pw + 4*nnb*nnb
394  END DO
395 *
396 * Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form.
397 *
398  DO j = jcol, jcol+nnb-1
399 *
400 * Reduce Jth column of A. Store cosines and sines in Jth
401 * column of A and B, respectively.
402 *
403  DO i = ihi, j+2, -1
404  temp = a( i-1, j )
405  CALL dlartg( temp, a( i, j ), c, s, a( i-1, j ) )
406  a( i, j ) = c
407  b( i, j ) = s
408  END DO
409 *
410 * Accumulate Givens rotations into workspace array.
411 *
412  ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
413  len = 2 + j - jcol
414  jrow = j + n2nb*nnb + 2
415  DO i = ihi, jrow, -1
416  c = a( i, j )
417  s = b( i, j )
418  DO jj = ppw, ppw+len-1
419  temp = work( jj + nblst )
420  work( jj + nblst ) = c*temp - s*work( jj )
421  work( jj ) = s*temp + c*work( jj )
422  END DO
423  len = len + 1
424  ppw = ppw - nblst - 1
425  END DO
426 *
427  ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
428  j0 = jrow - nnb
429  DO jrow = j0, j+2, -nnb
430  ppw = ppwo
431  len = 2 + j - jcol
432  DO i = jrow+nnb-1, jrow, -1
433  c = a( i, j )
434  s = b( i, j )
435  DO jj = ppw, ppw+len-1
436  temp = work( jj + 2*nnb )
437  work( jj + 2*nnb ) = c*temp - s*work( jj )
438  work( jj ) = s*temp + c*work( jj )
439  END DO
440  len = len + 1
441  ppw = ppw - 2*nnb - 1
442  END DO
443  ppwo = ppwo + 4*nnb*nnb
444  END DO
445 *
446 * TOP denotes the number of top rows in A and B that will
447 * not be updated during the next steps.
448 *
449  IF( jcol.LE.2 ) THEN
450  top = 0
451  ELSE
452  top = jcol
453  END IF
454 *
455 * Propagate transformations through B and replace stored
456 * left sines/cosines by right sines/cosines.
457 *
458  DO jj = n, j+1, -1
459 *
460 * Update JJth column of B.
461 *
462  DO i = min( jj+1, ihi ), j+2, -1
463  c = a( i, j )
464  s = b( i, j )
465  temp = b( i, jj )
466  b( i, jj ) = c*temp - s*b( i-1, jj )
467  b( i-1, jj ) = s*temp + c*b( i-1, jj )
468  END DO
469 *
470 * Annihilate B( JJ+1, JJ ).
471 *
472  IF( jj.LT.ihi ) THEN
473  temp = b( jj+1, jj+1 )
474  CALL dlartg( temp, b( jj+1, jj ), c, s,
475  $ b( jj+1, jj+1 ) )
476  b( jj+1, jj ) = zero
477  CALL drot( jj-top, b( top+1, jj+1 ), 1,
478  $ b( top+1, jj ), 1, c, s )
479  a( jj+1, j ) = c
480  b( jj+1, j ) = -s
481  END IF
482  END DO
483 *
484 * Update A by transformations from right.
485 * Explicit loop unrolling provides better performance
486 * compared to DLASR.
487 * CALL DLASR( 'Right', 'Variable', 'Backward', IHI-TOP,
488 * $ IHI-J, A( J+2, J ), B( J+2, J ),
489 * $ A( TOP+1, J+1 ), LDA )
490 *
491  jj = mod( ihi-j-1, 3 )
492  DO i = ihi-j-3, jj+1, -3
493  c = a( j+1+i, j )
494  s = -b( j+1+i, j )
495  c1 = a( j+2+i, j )
496  s1 = -b( j+2+i, j )
497  c2 = a( j+3+i, j )
498  s2 = -b( j+3+i, j )
499 *
500  DO k = top+1, ihi
501  temp = a( k, j+i )
502  temp1 = a( k, j+i+1 )
503  temp2 = a( k, j+i+2 )
504  temp3 = a( k, j+i+3 )
505  a( k, j+i+3 ) = c2*temp3 + s2*temp2
506  temp2 = -s2*temp3 + c2*temp2
507  a( k, j+i+2 ) = c1*temp2 + s1*temp1
508  temp1 = -s1*temp2 + c1*temp1
509  a( k, j+i+1 ) = c*temp1 + s*temp
510  a( k, j+i ) = -s*temp1 + c*temp
511  END DO
512  END DO
513 *
514  IF( jj.GT.0 ) THEN
515  DO i = jj, 1, -1
516  CALL drot( ihi-top, a( top+1, j+i+1 ), 1,
517  $ a( top+1, j+i ), 1, a( j+1+i, j ),
518  $ -b( j+1+i, j ) )
519  END DO
520  END IF
521 *
522 * Update (J+1)th column of A by transformations from left.
523 *
524  IF ( j .LT. jcol + nnb - 1 ) THEN
525  len = 1 + j - jcol
526 *
527 * Multiply with the trailing accumulated orthogonal
528 * matrix, which takes the form
529 *
530 * [ U11 U12 ]
531 * U = [ ],
532 * [ U21 U22 ]
533 *
534 * where U21 is a LEN-by-LEN matrix and U12 is lower
535 * triangular.
536 *
537  jrow = ihi - nblst + 1
538  CALL dgemv( 'Transpose', nblst, len, one, work,
539  $ nblst, a( jrow, j+1 ), 1, zero,
540  $ work( pw ), 1 )
541  ppw = pw + len
542  DO i = jrow, jrow+nblst-len-1
543  work( ppw ) = a( i, j+1 )
544  ppw = ppw + 1
545  END DO
546  CALL dtrmv( 'Lower', 'Transpose', 'Non-unit',
547  $ nblst-len, work( len*nblst + 1 ), nblst,
548  $ work( pw+len ), 1 )
549  CALL dgemv( 'Transpose', len, nblst-len, one,
550  $ work( (len+1)*nblst - len + 1 ), nblst,
551  $ a( jrow+nblst-len, j+1 ), 1, one,
552  $ work( pw+len ), 1 )
553  ppw = pw
554  DO i = jrow, jrow+nblst-1
555  a( i, j+1 ) = work( ppw )
556  ppw = ppw + 1
557  END DO
558 *
559 * Multiply with the other accumulated orthogonal
560 * matrices, which take the form
561 *
562 * [ U11 U12 0 ]
563 * [ ]
564 * U = [ U21 U22 0 ],
565 * [ ]
566 * [ 0 0 I ]
567 *
568 * where I denotes the (NNB-LEN)-by-(NNB-LEN) identity
569 * matrix, U21 is a LEN-by-LEN upper triangular matrix
570 * and U12 is an NNB-by-NNB lower triangular matrix.
571 *
572  ppwo = 1 + nblst*nblst
573  j0 = jrow - nnb
574  DO jrow = j0, jcol+1, -nnb
575  ppw = pw + len
576  DO i = jrow, jrow+nnb-1
577  work( ppw ) = a( i, j+1 )
578  ppw = ppw + 1
579  END DO
580  ppw = pw
581  DO i = jrow+nnb, jrow+nnb+len-1
582  work( ppw ) = a( i, j+1 )
583  ppw = ppw + 1
584  END DO
585  CALL dtrmv( 'Upper', 'Transpose', 'Non-unit', len,
586  $ work( ppwo + nnb ), 2*nnb, work( pw ),
587  $ 1 )
588  CALL dtrmv( 'Lower', 'Transpose', 'Non-unit', nnb,
589  $ work( ppwo + 2*len*nnb ),
590  $ 2*nnb, work( pw + len ), 1 )
591  CALL dgemv( 'Transpose', nnb, len, one,
592  $ work( ppwo ), 2*nnb, a( jrow, j+1 ), 1,
593  $ one, work( pw ), 1 )
594  CALL dgemv( 'Transpose', len, nnb, one,
595  $ work( ppwo + 2*len*nnb + nnb ), 2*nnb,
596  $ a( jrow+nnb, j+1 ), 1, one,
597  $ work( pw+len ), 1 )
598  ppw = pw
599  DO i = jrow, jrow+len+nnb-1
600  a( i, j+1 ) = work( ppw )
601  ppw = ppw + 1
602  END DO
603  ppwo = ppwo + 4*nnb*nnb
604  END DO
605  END IF
606  END DO
607 *
608 * Apply accumulated orthogonal matrices to A.
609 *
610  cola = n - jcol - nnb + 1
611  j = ihi - nblst + 1
612  CALL dgemm( 'Transpose', 'No Transpose', nblst,
613  $ cola, nblst, one, work, nblst,
614  $ a( j, jcol+nnb ), lda, zero, work( pw ),
615  $ nblst )
616  CALL dlacpy( 'All', nblst, cola, work( pw ), nblst,
617  $ a( j, jcol+nnb ), lda )
618  ppwo = nblst*nblst + 1
619  j0 = j - nnb
620  DO j = j0, jcol+1, -nnb
621  IF ( blk22 ) THEN
622 *
623 * Exploit the structure of
624 *
625 * [ U11 U12 ]
626 * U = [ ]
627 * [ U21 U22 ],
628 *
629 * where all blocks are NNB-by-NNB, U21 is upper
630 * triangular and U12 is lower triangular.
631 *
632  CALL dorm22( 'Left', 'Transpose', 2*nnb, cola, nnb,
633  $ nnb, work( ppwo ), 2*nnb,
634  $ a( j, jcol+nnb ), lda, work( pw ),
635  $ lwork-pw+1, ierr )
636  ELSE
637 *
638 * Ignore the structure of U.
639 *
640  CALL dgemm( 'Transpose', 'No Transpose', 2*nnb,
641  $ cola, 2*nnb, one, work( ppwo ), 2*nnb,
642  $ a( j, jcol+nnb ), lda, zero, work( pw ),
643  $ 2*nnb )
644  CALL dlacpy( 'All', 2*nnb, cola, work( pw ), 2*nnb,
645  $ a( j, jcol+nnb ), lda )
646  END IF
647  ppwo = ppwo + 4*nnb*nnb
648  END DO
649 *
650 * Apply accumulated orthogonal matrices to Q.
651 *
652  IF( wantq ) THEN
653  j = ihi - nblst + 1
654  IF ( initq ) THEN
655  topq = max( 2, j - jcol + 1 )
656  nh = ihi - topq + 1
657  ELSE
658  topq = 1
659  nh = n
660  END IF
661  CALL dgemm( 'No Transpose', 'No Transpose', nh,
662  $ nblst, nblst, one, q( topq, j ), ldq,
663  $ work, nblst, zero, work( pw ), nh )
664  CALL dlacpy( 'All', nh, nblst, work( pw ), nh,
665  $ q( topq, j ), ldq )
666  ppwo = nblst*nblst + 1
667  j0 = j - nnb
668  DO j = j0, jcol+1, -nnb
669  IF ( initq ) THEN
670  topq = max( 2, j - jcol + 1 )
671  nh = ihi - topq + 1
672  END IF
673  IF ( blk22 ) THEN
674 *
675 * Exploit the structure of U.
676 *
677  CALL dorm22( 'Right', 'No Transpose', nh, 2*nnb,
678  $ nnb, nnb, work( ppwo ), 2*nnb,
679  $ q( topq, j ), ldq, work( pw ),
680  $ lwork-pw+1, ierr )
681  ELSE
682 *
683 * Ignore the structure of U.
684 *
685  CALL dgemm( 'No Transpose', 'No Transpose', nh,
686  $ 2*nnb, 2*nnb, one, q( topq, j ), ldq,
687  $ work( ppwo ), 2*nnb, zero, work( pw ),
688  $ nh )
689  CALL dlacpy( 'All', nh, 2*nnb, work( pw ), nh,
690  $ q( topq, j ), ldq )
691  END IF
692  ppwo = ppwo + 4*nnb*nnb
693  END DO
694  END IF
695 *
696 * Accumulate right Givens rotations if required.
697 *
698  IF ( wantz .OR. top.GT.0 ) THEN
699 *
700 * Initialize small orthogonal factors that will hold the
701 * accumulated Givens rotations in workspace.
702 *
703  CALL dlaset( 'All', nblst, nblst, zero, one, work,
704  $ nblst )
705  pw = nblst * nblst + 1
706  DO i = 1, n2nb
707  CALL dlaset( 'All', 2*nnb, 2*nnb, zero, one,
708  $ work( pw ), 2*nnb )
709  pw = pw + 4*nnb*nnb
710  END DO
711 *
712 * Accumulate Givens rotations into workspace array.
713 *
714  DO j = jcol, jcol+nnb-1
715  ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
716  len = 2 + j - jcol
717  jrow = j + n2nb*nnb + 2
718  DO i = ihi, jrow, -1
719  c = a( i, j )
720  a( i, j ) = zero
721  s = b( i, j )
722  b( i, j ) = zero
723  DO jj = ppw, ppw+len-1
724  temp = work( jj + nblst )
725  work( jj + nblst ) = c*temp - s*work( jj )
726  work( jj ) = s*temp + c*work( jj )
727  END DO
728  len = len + 1
729  ppw = ppw - nblst - 1
730  END DO
731 *
732  ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
733  j0 = jrow - nnb
734  DO jrow = j0, j+2, -nnb
735  ppw = ppwo
736  len = 2 + j - jcol
737  DO i = jrow+nnb-1, jrow, -1
738  c = a( i, j )
739  a( i, j ) = zero
740  s = b( i, j )
741  b( i, j ) = zero
742  DO jj = ppw, ppw+len-1
743  temp = work( jj + 2*nnb )
744  work( jj + 2*nnb ) = c*temp - s*work( jj )
745  work( jj ) = s*temp + c*work( jj )
746  END DO
747  len = len + 1
748  ppw = ppw - 2*nnb - 1
749  END DO
750  ppwo = ppwo + 4*nnb*nnb
751  END DO
752  END DO
753  ELSE
754 *
755  CALL dlaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
756  $ a( jcol + 2, jcol ), lda )
757  CALL dlaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
758  $ b( jcol + 2, jcol ), ldb )
759  END IF
760 *
761 * Apply accumulated orthogonal matrices to A and B.
762 *
763  IF ( top.GT.0 ) THEN
764  j = ihi - nblst + 1
765  CALL dgemm( 'No Transpose', 'No Transpose', top,
766  $ nblst, nblst, one, a( 1, j ), lda,
767  $ work, nblst, zero, work( pw ), top )
768  CALL dlacpy( 'All', top, nblst, work( pw ), top,
769  $ a( 1, j ), lda )
770  ppwo = nblst*nblst + 1
771  j0 = j - nnb
772  DO j = j0, jcol+1, -nnb
773  IF ( blk22 ) THEN
774 *
775 * Exploit the structure of U.
776 *
777  CALL dorm22( 'Right', 'No Transpose', top, 2*nnb,
778  $ nnb, nnb, work( ppwo ), 2*nnb,
779  $ a( 1, j ), lda, work( pw ),
780  $ lwork-pw+1, ierr )
781  ELSE
782 *
783 * Ignore the structure of U.
784 *
785  CALL dgemm( 'No Transpose', 'No Transpose', top,
786  $ 2*nnb, 2*nnb, one, a( 1, j ), lda,
787  $ work( ppwo ), 2*nnb, zero,
788  $ work( pw ), top )
789  CALL dlacpy( 'All', top, 2*nnb, work( pw ), top,
790  $ a( 1, j ), lda )
791  END IF
792  ppwo = ppwo + 4*nnb*nnb
793  END DO
794 *
795  j = ihi - nblst + 1
796  CALL dgemm( 'No Transpose', 'No Transpose', top,
797  $ nblst, nblst, one, b( 1, j ), ldb,
798  $ work, nblst, zero, work( pw ), top )
799  CALL dlacpy( 'All', top, nblst, work( pw ), top,
800  $ b( 1, j ), ldb )
801  ppwo = nblst*nblst + 1
802  j0 = j - nnb
803  DO j = j0, jcol+1, -nnb
804  IF ( blk22 ) THEN
805 *
806 * Exploit the structure of U.
807 *
808  CALL dorm22( 'Right', 'No Transpose', top, 2*nnb,
809  $ nnb, nnb, work( ppwo ), 2*nnb,
810  $ b( 1, j ), ldb, work( pw ),
811  $ lwork-pw+1, ierr )
812  ELSE
813 *
814 * Ignore the structure of U.
815 *
816  CALL dgemm( 'No Transpose', 'No Transpose', top,
817  $ 2*nnb, 2*nnb, one, b( 1, j ), ldb,
818  $ work( ppwo ), 2*nnb, zero,
819  $ work( pw ), top )
820  CALL dlacpy( 'All', top, 2*nnb, work( pw ), top,
821  $ b( 1, j ), ldb )
822  END IF
823  ppwo = ppwo + 4*nnb*nnb
824  END DO
825  END IF
826 *
827 * Apply accumulated orthogonal matrices to Z.
828 *
829  IF( wantz ) THEN
830  j = ihi - nblst + 1
831  IF ( initq ) THEN
832  topq = max( 2, j - jcol + 1 )
833  nh = ihi - topq + 1
834  ELSE
835  topq = 1
836  nh = n
837  END IF
838  CALL dgemm( 'No Transpose', 'No Transpose', nh,
839  $ nblst, nblst, one, z( topq, j ), ldz,
840  $ work, nblst, zero, work( pw ), nh )
841  CALL dlacpy( 'All', nh, nblst, work( pw ), nh,
842  $ z( topq, j ), ldz )
843  ppwo = nblst*nblst + 1
844  j0 = j - nnb
845  DO j = j0, jcol+1, -nnb
846  IF ( initq ) THEN
847  topq = max( 2, j - jcol + 1 )
848  nh = ihi - topq + 1
849  END IF
850  IF ( blk22 ) THEN
851 *
852 * Exploit the structure of U.
853 *
854  CALL dorm22( 'Right', 'No Transpose', nh, 2*nnb,
855  $ nnb, nnb, work( ppwo ), 2*nnb,
856  $ z( topq, j ), ldz, work( pw ),
857  $ lwork-pw+1, ierr )
858  ELSE
859 *
860 * Ignore the structure of U.
861 *
862  CALL dgemm( 'No Transpose', 'No Transpose', nh,
863  $ 2*nnb, 2*nnb, one, z( topq, j ), ldz,
864  $ work( ppwo ), 2*nnb, zero, work( pw ),
865  $ nh )
866  CALL dlacpy( 'All', nh, 2*nnb, work( pw ), nh,
867  $ z( topq, j ), ldz )
868  END IF
869  ppwo = ppwo + 4*nnb*nnb
870  END DO
871  END IF
872  END DO
873  END IF
874 *
875 * Use unblocked code to reduce the rest of the matrix
876 * Avoid re-initialization of modified Q and Z.
877 *
878  compq2 = compq
879  compz2 = compz
880  IF ( jcol.NE.ilo ) THEN
881  IF ( wantq )
882  $ compq2 = 'V'
883  IF ( wantz )
884  $ compz2 = 'V'
885  END IF
886 *
887  IF ( jcol.LT.ihi )
888  $ CALL dgghrd( compq2, compz2, n, jcol, ihi, a, lda, b, ldb, q,
889  $ ldq, z, ldz, ierr )
890  work( 1 ) = dble( lwkopt )
891 *
892  RETURN
893 *
894 * End of DGGHD3
895 *
896  END
subroutine dgghrd(COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, LDQ, Z, LDZ, INFO)
DGGHRD
Definition: dgghrd.f:209
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: dlaset.f:112
subroutine dgghd3(COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, LDQ, Z, LDZ, WORK, LWORK, INFO)
DGGHD3
Definition: dgghd3.f:232
subroutine dgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
DGEMV
Definition: dgemv.f:158
subroutine dorm22(SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, WORK, LWORK, INFO)
DORM22 multiplies a general matrix by a banded orthogonal matrix.
Definition: dorm22.f:165
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:105
subroutine drot(N, DX, INCX, DY, INCY, C, S)
DROT
Definition: drot.f:53
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:189
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine dtrmv(UPLO, TRANS, DIAG, N, A, LDA, X, INCX)
DTRMV
Definition: dtrmv.f:149
subroutine dlartg(F, G, CS, SN, R)
DLARTG generates a plane rotation with real cosine and real sine.
Definition: dlartg.f:99