LAPACK  3.4.2
LAPACK: Linear Algebra PACKage
 All Files Functions Groups
slaqr5.f
Go to the documentation of this file.
1 *> \brief \b SLAQR5 performs a single small-bulge multi-shift QR sweep.
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SLAQR5 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slaqr5.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slaqr5.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slaqr5.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
22 * SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
23 * LDU, NV, WV, LDWV, NH, WH, LDWH )
24 *
25 * .. Scalar Arguments ..
26 * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27 * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28 * LOGICAL WANTT, WANTZ
29 * ..
30 * .. Array Arguments ..
31 * REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
32 * $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
33 * $ Z( LDZ, * )
34 * ..
35 *
36 *
37 *> \par Purpose:
38 * =============
39 *>
40 *> \verbatim
41 *>
42 *> SLAQR5, called by SLAQR0, performs a
43 *> single small-bulge multi-shift QR sweep.
44 *> \endverbatim
45 *
46 * Arguments:
47 * ==========
48 *
49 *> \param[in] WANTT
50 *> \verbatim
51 *> WANTT is logical scalar
52 *> WANTT = .true. if the quasi-triangular Schur factor
53 *> is being computed. WANTT is set to .false. otherwise.
54 *> \endverbatim
55 *>
56 *> \param[in] WANTZ
57 *> \verbatim
58 *> WANTZ is logical scalar
59 *> WANTZ = .true. if the orthogonal Schur factor is being
60 *> computed. WANTZ is set to .false. otherwise.
61 *> \endverbatim
62 *>
63 *> \param[in] KACC22
64 *> \verbatim
65 *> KACC22 is integer with value 0, 1, or 2.
66 *> Specifies the computation mode of far-from-diagonal
67 *> orthogonal updates.
68 *> = 0: SLAQR5 does not accumulate reflections and does not
69 *> use matrix-matrix multiply to update far-from-diagonal
70 *> matrix entries.
71 *> = 1: SLAQR5 accumulates reflections and uses matrix-matrix
72 *> multiply to update the far-from-diagonal matrix entries.
73 *> = 2: SLAQR5 accumulates reflections, uses matrix-matrix
74 *> multiply to update the far-from-diagonal matrix entries,
75 *> and takes advantage of 2-by-2 block structure during
76 *> matrix multiplies.
77 *> \endverbatim
78 *>
79 *> \param[in] N
80 *> \verbatim
81 *> N is integer scalar
82 *> N is the order of the Hessenberg matrix H upon which this
83 *> subroutine operates.
84 *> \endverbatim
85 *>
86 *> \param[in] KTOP
87 *> \verbatim
88 *> KTOP is integer scalar
89 *> \endverbatim
90 *>
91 *> \param[in] KBOT
92 *> \verbatim
93 *> KBOT is integer scalar
94 *> These are the first and last rows and columns of an
95 *> isolated diagonal block upon which the QR sweep is to be
96 *> applied. It is assumed without a check that
97 *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
98 *> and
99 *> either KBOT = N or H(KBOT+1,KBOT) = 0.
100 *> \endverbatim
101 *>
102 *> \param[in] NSHFTS
103 *> \verbatim
104 *> NSHFTS is integer scalar
105 *> NSHFTS gives the number of simultaneous shifts. NSHFTS
106 *> must be positive and even.
107 *> \endverbatim
108 *>
109 *> \param[in,out] SR
110 *> \verbatim
111 *> SR is REAL array of size (NSHFTS)
112 *> \endverbatim
113 *>
114 *> \param[in,out] SI
115 *> \verbatim
116 *> SI is REAL array of size (NSHFTS)
117 *> SR contains the real parts and SI contains the imaginary
118 *> parts of the NSHFTS shifts of origin that define the
119 *> multi-shift QR sweep. On output SR and SI may be
120 *> reordered.
121 *> \endverbatim
122 *>
123 *> \param[in,out] H
124 *> \verbatim
125 *> H is REAL array of size (LDH,N)
126 *> On input H contains a Hessenberg matrix. On output a
127 *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
128 *> to the isolated diagonal block in rows and columns KTOP
129 *> through KBOT.
130 *> \endverbatim
131 *>
132 *> \param[in] LDH
133 *> \verbatim
134 *> LDH is integer scalar
135 *> LDH is the leading dimension of H just as declared in the
136 *> calling procedure. LDH.GE.MAX(1,N).
137 *> \endverbatim
138 *>
139 *> \param[in] ILOZ
140 *> \verbatim
141 *> ILOZ is INTEGER
142 *> \endverbatim
143 *>
144 *> \param[in] IHIZ
145 *> \verbatim
146 *> IHIZ is INTEGER
147 *> Specify the rows of Z to which transformations must be
148 *> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
149 *> \endverbatim
150 *>
151 *> \param[in,out] Z
152 *> \verbatim
153 *> Z is REAL array of size (LDZ,IHI)
154 *> If WANTZ = .TRUE., then the QR Sweep orthogonal
155 *> similarity transformation is accumulated into
156 *> Z(ILOZ:IHIZ,ILO:IHI) from the right.
157 *> If WANTZ = .FALSE., then Z is unreferenced.
158 *> \endverbatim
159 *>
160 *> \param[in] LDZ
161 *> \verbatim
162 *> LDZ is integer scalar
163 *> LDA is the leading dimension of Z just as declared in
164 *> the calling procedure. LDZ.GE.N.
165 *> \endverbatim
166 *>
167 *> \param[out] V
168 *> \verbatim
169 *> V is REAL array of size (LDV,NSHFTS/2)
170 *> \endverbatim
171 *>
172 *> \param[in] LDV
173 *> \verbatim
174 *> LDV is integer scalar
175 *> LDV is the leading dimension of V as declared in the
176 *> calling procedure. LDV.GE.3.
177 *> \endverbatim
178 *>
179 *> \param[out] U
180 *> \verbatim
181 *> U is REAL array of size
182 *> (LDU,3*NSHFTS-3)
183 *> \endverbatim
184 *>
185 *> \param[in] LDU
186 *> \verbatim
187 *> LDU is integer scalar
188 *> LDU is the leading dimension of U just as declared in the
189 *> in the calling subroutine. LDU.GE.3*NSHFTS-3.
190 *> \endverbatim
191 *>
192 *> \param[in] NH
193 *> \verbatim
194 *> NH is integer scalar
195 *> NH is the number of columns in array WH available for
196 *> workspace. NH.GE.1.
197 *> \endverbatim
198 *>
199 *> \param[out] WH
200 *> \verbatim
201 *> WH is REAL array of size (LDWH,NH)
202 *> \endverbatim
203 *>
204 *> \param[in] LDWH
205 *> \verbatim
206 *> LDWH is integer scalar
207 *> Leading dimension of WH just as declared in the
208 *> calling procedure. LDWH.GE.3*NSHFTS-3.
209 *> \endverbatim
210 *>
211 *> \param[in] NV
212 *> \verbatim
213 *> NV is integer scalar
214 *> NV is the number of rows in WV agailable for workspace.
215 *> NV.GE.1.
216 *> \endverbatim
217 *>
218 *> \param[out] WV
219 *> \verbatim
220 *> WV is REAL array of size
221 *> (LDWV,3*NSHFTS-3)
222 *> \endverbatim
223 *>
224 *> \param[in] LDWV
225 *> \verbatim
226 *> LDWV is integer scalar
227 *> LDWV is the leading dimension of WV as declared in the
228 *> in the calling subroutine. LDWV.GE.NV.
229 *> \endverbatim
230 *
231 * Authors:
232 * ========
233 *
234 *> \author Univ. of Tennessee
235 *> \author Univ. of California Berkeley
236 *> \author Univ. of Colorado Denver
237 *> \author NAG Ltd.
238 *
239 *> \date September 2012
240 *
241 *> \ingroup realOTHERauxiliary
242 *
243 *> \par Contributors:
244 * ==================
245 *>
246 *> Karen Braman and Ralph Byers, Department of Mathematics,
247 *> University of Kansas, USA
248 *
249 *> \par References:
250 * ================
251 *>
252 *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
253 *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
254 *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
255 *> 929--947, 2002.
256 *>
257 * =====================================================================
258  SUBROUTINE slaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
259  $ sr, si, h, ldh, iloz, ihiz, z, ldz, v, ldv, u,
260  $ ldu, nv, wv, ldwv, nh, wh, ldwh )
261 *
262 * -- LAPACK auxiliary routine (version 3.4.2) --
263 * -- LAPACK is a software package provided by Univ. of Tennessee, --
264 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
265 * September 2012
266 *
267 * .. Scalar Arguments ..
268  INTEGER ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv,
269  $ ldwh, ldwv, ldz, n, nh, nshfts, nv
270  LOGICAL wantt, wantz
271 * ..
272 * .. Array Arguments ..
273  REAL h( ldh, * ), si( * ), sr( * ), u( ldu, * ),
274  $ v( ldv, * ), wh( ldwh, * ), wv( ldwv, * ),
275  $ z( ldz, * )
276 * ..
277 *
278 * ================================================================
279 * .. Parameters ..
280  REAL zero, one
281  parameter( zero = 0.0e0, one = 1.0e0 )
282 * ..
283 * .. Local Scalars ..
284  REAL alpha, beta, h11, h12, h21, h22, refsum,
285  $ safmax, safmin, scl, smlnum, swap, tst1, tst2,
286  $ ulp
287  INTEGER i, i2, i4, incol, j, j2, j4, jbot, jcol, jlen,
288  $ jrow, jtop, k, k1, kdu, kms, knz, krcol, kzs,
289  $ m, m22, mbot, mend, mstart, mtop, nbmps, ndcol,
290  $ ns, nu
291  LOGICAL accum, blk22, bmp22
292 * ..
293 * .. External Functions ..
294  REAL slamch
295  EXTERNAL slamch
296 * ..
297 * .. Intrinsic Functions ..
298 *
299  INTRINSIC abs, max, min, mod, real
300 * ..
301 * .. Local Arrays ..
302  REAL vt( 3 )
303 * ..
304 * .. External Subroutines ..
305  EXTERNAL sgemm, slabad, slacpy, slaqr1, slarfg, slaset,
306  $ strmm
307 * ..
308 * .. Executable Statements ..
309 *
310 * ==== If there are no shifts, then there is nothing to do. ====
311 *
312  IF( nshfts.LT.2 )
313  $ return
314 *
315 * ==== If the active block is empty or 1-by-1, then there
316 * . is nothing to do. ====
317 *
318  IF( ktop.GE.kbot )
319  $ return
320 *
321 * ==== Shuffle shifts into pairs of real shifts and pairs
322 * . of complex conjugate shifts assuming complex
323 * . conjugate shifts are already adjacent to one
324 * . another. ====
325 *
326  DO 10 i = 1, nshfts - 2, 2
327  IF( si( i ).NE.-si( i+1 ) ) THEN
328 *
329  swap = sr( i )
330  sr( i ) = sr( i+1 )
331  sr( i+1 ) = sr( i+2 )
332  sr( i+2 ) = swap
333 *
334  swap = si( i )
335  si( i ) = si( i+1 )
336  si( i+1 ) = si( i+2 )
337  si( i+2 ) = swap
338  END IF
339  10 continue
340 *
341 * ==== NSHFTS is supposed to be even, but if it is odd,
342 * . then simply reduce it by one. The shuffle above
343 * . ensures that the dropped shift is real and that
344 * . the remaining shifts are paired. ====
345 *
346  ns = nshfts - mod( nshfts, 2 )
347 *
348 * ==== Machine constants for deflation ====
349 *
350  safmin = slamch( 'SAFE MINIMUM' )
351  safmax = one / safmin
352  CALL slabad( safmin, safmax )
353  ulp = slamch( 'PRECISION' )
354  smlnum = safmin*( REAL( N ) / ulp )
355 *
356 * ==== Use accumulated reflections to update far-from-diagonal
357 * . entries ? ====
358 *
359  accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
360 *
361 * ==== If so, exploit the 2-by-2 block structure? ====
362 *
363  blk22 = ( ns.GT.2 ) .AND. ( kacc22.EQ.2 )
364 *
365 * ==== clear trash ====
366 *
367  IF( ktop+2.LE.kbot )
368  $ h( ktop+2, ktop ) = zero
369 *
370 * ==== NBMPS = number of 2-shift bulges in the chain ====
371 *
372  nbmps = ns / 2
373 *
374 * ==== KDU = width of slab ====
375 *
376  kdu = 6*nbmps - 3
377 *
378 * ==== Create and chase chains of NBMPS bulges ====
379 *
380  DO 220 incol = 3*( 1-nbmps ) + ktop - 1, kbot - 2, 3*nbmps - 2
381  ndcol = incol + kdu
382  IF( accum )
383  $ CALL slaset( 'ALL', kdu, kdu, zero, one, u, ldu )
384 *
385 * ==== Near-the-diagonal bulge chase. The following loop
386 * . performs the near-the-diagonal part of a small bulge
387 * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
388 * . chunk extends from column INCOL to column NDCOL
389 * . (including both column INCOL and column NDCOL). The
390 * . following loop chases a 3*NBMPS column long chain of
391 * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
392 * . may be less than KTOP and and NDCOL may be greater than
393 * . KBOT indicating phantom columns from which to chase
394 * . bulges before they are actually introduced or to which
395 * . to chase bulges beyond column KBOT.) ====
396 *
397  DO 150 krcol = incol, min( incol+3*nbmps-3, kbot-2 )
398 *
399 * ==== Bulges number MTOP to MBOT are active double implicit
400 * . shift bulges. There may or may not also be small
401 * . 2-by-2 bulge, if there is room. The inactive bulges
402 * . (if any) must wait until the active bulges have moved
403 * . down the diagonal to make room. The phantom matrix
404 * . paradigm described above helps keep track. ====
405 *
406  mtop = max( 1, ( ( ktop-1 )-krcol+2 ) / 3+1 )
407  mbot = min( nbmps, ( kbot-krcol ) / 3 )
408  m22 = mbot + 1
409  bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+3*( m22-1 ) ).EQ.
410  $ ( kbot-2 )
411 *
412 * ==== Generate reflections to chase the chain right
413 * . one column. (The minimum value of K is KTOP-1.) ====
414 *
415  DO 20 m = mtop, mbot
416  k = krcol + 3*( m-1 )
417  IF( k.EQ.ktop-1 ) THEN
418  CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
419  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
420  $ v( 1, m ) )
421  alpha = v( 1, m )
422  CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
423  ELSE
424  beta = h( k+1, k )
425  v( 2, m ) = h( k+2, k )
426  v( 3, m ) = h( k+3, k )
427  CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
428 *
429 * ==== A Bulge may collapse because of vigilant
430 * . deflation or destructive underflow. In the
431 * . underflow case, try the two-small-subdiagonals
432 * . trick to try to reinflate the bulge. ====
433 *
434  IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
435  $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
436 *
437 * ==== Typical case: not collapsed (yet). ====
438 *
439  h( k+1, k ) = beta
440  h( k+2, k ) = zero
441  h( k+3, k ) = zero
442  ELSE
443 *
444 * ==== Atypical case: collapsed. Attempt to
445 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
446 * . If the fill resulting from the new
447 * . reflector is too large, then abandon it.
448 * . Otherwise, use the new one. ====
449 *
450  CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
451  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
452  $ vt )
453  alpha = vt( 1 )
454  CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
455  refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
456  $ h( k+2, k ) )
457 *
458  IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
459  $ abs( refsum*vt( 3 ) ).GT.ulp*
460  $ ( abs( h( k, k ) )+abs( h( k+1,
461  $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
462 *
463 * ==== Starting a new bulge here would
464 * . create non-negligible fill. Use
465 * . the old one with trepidation. ====
466 *
467  h( k+1, k ) = beta
468  h( k+2, k ) = zero
469  h( k+3, k ) = zero
470  ELSE
471 *
472 * ==== Stating a new bulge here would
473 * . create only negligible fill.
474 * . Replace the old reflector with
475 * . the new one. ====
476 *
477  h( k+1, k ) = h( k+1, k ) - refsum
478  h( k+2, k ) = zero
479  h( k+3, k ) = zero
480  v( 1, m ) = vt( 1 )
481  v( 2, m ) = vt( 2 )
482  v( 3, m ) = vt( 3 )
483  END IF
484  END IF
485  END IF
486  20 continue
487 *
488 * ==== Generate a 2-by-2 reflection, if needed. ====
489 *
490  k = krcol + 3*( m22-1 )
491  IF( bmp22 ) THEN
492  IF( k.EQ.ktop-1 ) THEN
493  CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
494  $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
495  $ v( 1, m22 ) )
496  beta = v( 1, m22 )
497  CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
498  ELSE
499  beta = h( k+1, k )
500  v( 2, m22 ) = h( k+2, k )
501  CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
502  h( k+1, k ) = beta
503  h( k+2, k ) = zero
504  END IF
505  END IF
506 *
507 * ==== Multiply H by reflections from the left ====
508 *
509  IF( accum ) THEN
510  jbot = min( ndcol, kbot )
511  ELSE IF( wantt ) THEN
512  jbot = n
513  ELSE
514  jbot = kbot
515  END IF
516  DO 40 j = max( ktop, krcol ), jbot
517  mend = min( mbot, ( j-krcol+2 ) / 3 )
518  DO 30 m = mtop, mend
519  k = krcol + 3*( m-1 )
520  refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*
521  $ h( k+2, j )+v( 3, m )*h( k+3, j ) )
522  h( k+1, j ) = h( k+1, j ) - refsum
523  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
524  h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
525  30 continue
526  40 continue
527  IF( bmp22 ) THEN
528  k = krcol + 3*( m22-1 )
529  DO 50 j = max( k+1, ktop ), jbot
530  refsum = v( 1, m22 )*( h( k+1, j )+v( 2, m22 )*
531  $ h( k+2, j ) )
532  h( k+1, j ) = h( k+1, j ) - refsum
533  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
534  50 continue
535  END IF
536 *
537 * ==== Multiply H by reflections from the right.
538 * . Delay filling in the last row until the
539 * . vigilant deflation check is complete. ====
540 *
541  IF( accum ) THEN
542  jtop = max( ktop, incol )
543  ELSE IF( wantt ) THEN
544  jtop = 1
545  ELSE
546  jtop = ktop
547  END IF
548  DO 90 m = mtop, mbot
549  IF( v( 1, m ).NE.zero ) THEN
550  k = krcol + 3*( m-1 )
551  DO 60 j = jtop, min( kbot, k+3 )
552  refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
553  $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
554  h( j, k+1 ) = h( j, k+1 ) - refsum
555  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m )
556  h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m )
557  60 continue
558 *
559  IF( accum ) THEN
560 *
561 * ==== Accumulate U. (If necessary, update Z later
562 * . with with an efficient matrix-matrix
563 * . multiply.) ====
564 *
565  kms = k - incol
566  DO 70 j = max( 1, ktop-incol ), kdu
567  refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
568  $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
569  u( j, kms+1 ) = u( j, kms+1 ) - refsum
570  u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m )
571  u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m )
572  70 continue
573  ELSE IF( wantz ) THEN
574 *
575 * ==== U is not accumulated, so update Z
576 * . now by multiplying by reflections
577 * . from the right. ====
578 *
579  DO 80 j = iloz, ihiz
580  refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
581  $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
582  z( j, k+1 ) = z( j, k+1 ) - refsum
583  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m )
584  z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m )
585  80 continue
586  END IF
587  END IF
588  90 continue
589 *
590 * ==== Special case: 2-by-2 reflection (if needed) ====
591 *
592  k = krcol + 3*( m22-1 )
593  IF( bmp22 ) THEN
594  IF ( v( 1, m22 ).NE.zero ) THEN
595  DO 100 j = jtop, min( kbot, k+3 )
596  refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
597  $ h( j, k+2 ) )
598  h( j, k+1 ) = h( j, k+1 ) - refsum
599  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m22 )
600  100 continue
601 *
602  IF( accum ) THEN
603  kms = k - incol
604  DO 110 j = max( 1, ktop-incol ), kdu
605  refsum = v( 1, m22 )*( u( j, kms+1 )+
606  $ v( 2, m22 )*u( j, kms+2 ) )
607  u( j, kms+1 ) = u( j, kms+1 ) - refsum
608  u( j, kms+2 ) = u( j, kms+2 ) - refsum*
609  $ v( 2, m22 )
610  110 continue
611  ELSE IF( wantz ) THEN
612  DO 120 j = iloz, ihiz
613  refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
614  $ z( j, k+2 ) )
615  z( j, k+1 ) = z( j, k+1 ) - refsum
616  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 )
617  120 continue
618  END IF
619  END IF
620  END IF
621 *
622 * ==== Vigilant deflation check ====
623 *
624  mstart = mtop
625  IF( krcol+3*( mstart-1 ).LT.ktop )
626  $ mstart = mstart + 1
627  mend = mbot
628  IF( bmp22 )
629  $ mend = mend + 1
630  IF( krcol.EQ.kbot-2 )
631  $ mend = mend + 1
632  DO 130 m = mstart, mend
633  k = min( kbot-1, krcol+3*( m-1 ) )
634 *
635 * ==== The following convergence test requires that
636 * . the tradition small-compared-to-nearby-diagonals
637 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
638 * . criteria both be satisfied. The latter improves
639 * . accuracy in some examples. Falling back on an
640 * . alternate convergence criterion when TST1 or TST2
641 * . is zero (as done here) is traditional but probably
642 * . unnecessary. ====
643 *
644  IF( h( k+1, k ).NE.zero ) THEN
645  tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
646  IF( tst1.EQ.zero ) THEN
647  IF( k.GE.ktop+1 )
648  $ tst1 = tst1 + abs( h( k, k-1 ) )
649  IF( k.GE.ktop+2 )
650  $ tst1 = tst1 + abs( h( k, k-2 ) )
651  IF( k.GE.ktop+3 )
652  $ tst1 = tst1 + abs( h( k, k-3 ) )
653  IF( k.LE.kbot-2 )
654  $ tst1 = tst1 + abs( h( k+2, k+1 ) )
655  IF( k.LE.kbot-3 )
656  $ tst1 = tst1 + abs( h( k+3, k+1 ) )
657  IF( k.LE.kbot-4 )
658  $ tst1 = tst1 + abs( h( k+4, k+1 ) )
659  END IF
660  IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
661  $ THEN
662  h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
663  h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
664  h11 = max( abs( h( k+1, k+1 ) ),
665  $ abs( h( k, k )-h( k+1, k+1 ) ) )
666  h22 = min( abs( h( k+1, k+1 ) ),
667  $ abs( h( k, k )-h( k+1, k+1 ) ) )
668  scl = h11 + h12
669  tst2 = h22*( h11 / scl )
670 *
671  IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
672  $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
673  END IF
674  END IF
675  130 continue
676 *
677 * ==== Fill in the last row of each bulge. ====
678 *
679  mend = min( nbmps, ( kbot-krcol-1 ) / 3 )
680  DO 140 m = mtop, mend
681  k = krcol + 3*( m-1 )
682  refsum = v( 1, m )*v( 3, m )*h( k+4, k+3 )
683  h( k+4, k+1 ) = -refsum
684  h( k+4, k+2 ) = -refsum*v( 2, m )
685  h( k+4, k+3 ) = h( k+4, k+3 ) - refsum*v( 3, m )
686  140 continue
687 *
688 * ==== End of near-the-diagonal bulge chase. ====
689 *
690  150 continue
691 *
692 * ==== Use U (if accumulated) to update far-from-diagonal
693 * . entries in H. If required, use U to update Z as
694 * . well. ====
695 *
696  IF( accum ) THEN
697  IF( wantt ) THEN
698  jtop = 1
699  jbot = n
700  ELSE
701  jtop = ktop
702  jbot = kbot
703  END IF
704  IF( ( .NOT.blk22 ) .OR. ( incol.LT.ktop ) .OR.
705  $ ( ndcol.GT.kbot ) .OR. ( ns.LE.2 ) ) THEN
706 *
707 * ==== Updates not exploiting the 2-by-2 block
708 * . structure of U. K1 and NU keep track of
709 * . the location and size of U in the special
710 * . cases of introducing bulges and chasing
711 * . bulges off the bottom. In these special
712 * . cases and in case the number of shifts
713 * . is NS = 2, there is no 2-by-2 block
714 * . structure to exploit. ====
715 *
716  k1 = max( 1, ktop-incol )
717  nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
718 *
719 * ==== Horizontal Multiply ====
720 *
721  DO 160 jcol = min( ndcol, kbot ) + 1, jbot, nh
722  jlen = min( nh, jbot-jcol+1 )
723  CALL sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
724  $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
725  $ ldwh )
726  CALL slacpy( 'ALL', nu, jlen, wh, ldwh,
727  $ h( incol+k1, jcol ), ldh )
728  160 continue
729 *
730 * ==== Vertical multiply ====
731 *
732  DO 170 jrow = jtop, max( ktop, incol ) - 1, nv
733  jlen = min( nv, max( ktop, incol )-jrow )
734  CALL sgemm( 'N', 'N', jlen, nu, nu, one,
735  $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
736  $ ldu, zero, wv, ldwv )
737  CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
738  $ h( jrow, incol+k1 ), ldh )
739  170 continue
740 *
741 * ==== Z multiply (also vertical) ====
742 *
743  IF( wantz ) THEN
744  DO 180 jrow = iloz, ihiz, nv
745  jlen = min( nv, ihiz-jrow+1 )
746  CALL sgemm( 'N', 'N', jlen, nu, nu, one,
747  $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
748  $ ldu, zero, wv, ldwv )
749  CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
750  $ z( jrow, incol+k1 ), ldz )
751  180 continue
752  END IF
753  ELSE
754 *
755 * ==== Updates exploiting U's 2-by-2 block structure.
756 * . (I2, I4, J2, J4 are the last rows and columns
757 * . of the blocks.) ====
758 *
759  i2 = ( kdu+1 ) / 2
760  i4 = kdu
761  j2 = i4 - i2
762  j4 = kdu
763 *
764 * ==== KZS and KNZ deal with the band of zeros
765 * . along the diagonal of one of the triangular
766 * . blocks. ====
767 *
768  kzs = ( j4-j2 ) - ( ns+1 )
769  knz = ns + 1
770 *
771 * ==== Horizontal multiply ====
772 *
773  DO 190 jcol = min( ndcol, kbot ) + 1, jbot, nh
774  jlen = min( nh, jbot-jcol+1 )
775 *
776 * ==== Copy bottom of H to top+KZS of scratch ====
777 * (The first KZS rows get multiplied by zero.) ====
778 *
779  CALL slacpy( 'ALL', knz, jlen, h( incol+1+j2, jcol ),
780  $ ldh, wh( kzs+1, 1 ), ldwh )
781 *
782 * ==== Multiply by U21**T ====
783 *
784  CALL slaset( 'ALL', kzs, jlen, zero, zero, wh, ldwh )
785  CALL strmm( 'L', 'U', 'C', 'N', knz, jlen, one,
786  $ u( j2+1, 1+kzs ), ldu, wh( kzs+1, 1 ),
787  $ ldwh )
788 *
789 * ==== Multiply top of H by U11**T ====
790 *
791  CALL sgemm( 'C', 'N', i2, jlen, j2, one, u, ldu,
792  $ h( incol+1, jcol ), ldh, one, wh, ldwh )
793 *
794 * ==== Copy top of H to bottom of WH ====
795 *
796  CALL slacpy( 'ALL', j2, jlen, h( incol+1, jcol ), ldh,
797  $ wh( i2+1, 1 ), ldwh )
798 *
799 * ==== Multiply by U21**T ====
800 *
801  CALL strmm( 'L', 'L', 'C', 'N', j2, jlen, one,
802  $ u( 1, i2+1 ), ldu, wh( i2+1, 1 ), ldwh )
803 *
804 * ==== Multiply by U22 ====
805 *
806  CALL sgemm( 'C', 'N', i4-i2, jlen, j4-j2, one,
807  $ u( j2+1, i2+1 ), ldu,
808  $ h( incol+1+j2, jcol ), ldh, one,
809  $ wh( i2+1, 1 ), ldwh )
810 *
811 * ==== Copy it back ====
812 *
813  CALL slacpy( 'ALL', kdu, jlen, wh, ldwh,
814  $ h( incol+1, jcol ), ldh )
815  190 continue
816 *
817 * ==== Vertical multiply ====
818 *
819  DO 200 jrow = jtop, max( incol, ktop ) - 1, nv
820  jlen = min( nv, max( incol, ktop )-jrow )
821 *
822 * ==== Copy right of H to scratch (the first KZS
823 * . columns get multiplied by zero) ====
824 *
825  CALL slacpy( 'ALL', jlen, knz, h( jrow, incol+1+j2 ),
826  $ ldh, wv( 1, 1+kzs ), ldwv )
827 *
828 * ==== Multiply by U21 ====
829 *
830  CALL slaset( 'ALL', jlen, kzs, zero, zero, wv, ldwv )
831  CALL strmm( 'R', 'U', 'N', 'N', jlen, knz, one,
832  $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
833  $ ldwv )
834 *
835 * ==== Multiply by U11 ====
836 *
837  CALL sgemm( 'N', 'N', jlen, i2, j2, one,
838  $ h( jrow, incol+1 ), ldh, u, ldu, one, wv,
839  $ ldwv )
840 *
841 * ==== Copy left of H to right of scratch ====
842 *
843  CALL slacpy( 'ALL', jlen, j2, h( jrow, incol+1 ), ldh,
844  $ wv( 1, 1+i2 ), ldwv )
845 *
846 * ==== Multiply by U21 ====
847 *
848  CALL strmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
849  $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ), ldwv )
850 *
851 * ==== Multiply by U22 ====
852 *
853  CALL sgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
854  $ h( jrow, incol+1+j2 ), ldh,
855  $ u( j2+1, i2+1 ), ldu, one, wv( 1, 1+i2 ),
856  $ ldwv )
857 *
858 * ==== Copy it back ====
859 *
860  CALL slacpy( 'ALL', jlen, kdu, wv, ldwv,
861  $ h( jrow, incol+1 ), ldh )
862  200 continue
863 *
864 * ==== Multiply Z (also vertical) ====
865 *
866  IF( wantz ) THEN
867  DO 210 jrow = iloz, ihiz, nv
868  jlen = min( nv, ihiz-jrow+1 )
869 *
870 * ==== Copy right of Z to left of scratch (first
871 * . KZS columns get multiplied by zero) ====
872 *
873  CALL slacpy( 'ALL', jlen, knz,
874  $ z( jrow, incol+1+j2 ), ldz,
875  $ wv( 1, 1+kzs ), ldwv )
876 *
877 * ==== Multiply by U12 ====
878 *
879  CALL slaset( 'ALL', jlen, kzs, zero, zero, wv,
880  $ ldwv )
881  CALL strmm( 'R', 'U', 'N', 'N', jlen, knz, one,
882  $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
883  $ ldwv )
884 *
885 * ==== Multiply by U11 ====
886 *
887  CALL sgemm( 'N', 'N', jlen, i2, j2, one,
888  $ z( jrow, incol+1 ), ldz, u, ldu, one,
889  $ wv, ldwv )
890 *
891 * ==== Copy left of Z to right of scratch ====
892 *
893  CALL slacpy( 'ALL', jlen, j2, z( jrow, incol+1 ),
894  $ ldz, wv( 1, 1+i2 ), ldwv )
895 *
896 * ==== Multiply by U21 ====
897 *
898  CALL strmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
899  $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ),
900  $ ldwv )
901 *
902 * ==== Multiply by U22 ====
903 *
904  CALL sgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
905  $ z( jrow, incol+1+j2 ), ldz,
906  $ u( j2+1, i2+1 ), ldu, one,
907  $ wv( 1, 1+i2 ), ldwv )
908 *
909 * ==== Copy the result back to Z ====
910 *
911  CALL slacpy( 'ALL', jlen, kdu, wv, ldwv,
912  $ z( jrow, incol+1 ), ldz )
913  210 continue
914  END IF
915  END IF
916  END IF
917  220 continue
918 *
919 * ==== End of SLAQR5 ====
920 *
921  END