ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
zlamsh.f
Go to the documentation of this file.
1  SUBROUTINE zlamsh( S, LDS, NBULGE, JBLK, H, LDH, N, ULP )
2 *
3 * -- ScaLAPACK routine (version 1.7) --
4 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
5 * Courant Institute, Argonne National Lab, and Rice University
6 * May 28, 1999
7 *
8 * .. Scalar Arguments ..
9  INTEGER JBLK, LDH, LDS, N, NBULGE
10  DOUBLE PRECISION ULP
11 * ..
12 * .. Array Arguments ..
13  COMPLEX*16 H( LDH, * ), S( LDS, * )
14 * ..
15 *
16 * Purpose
17 * =======
18 *
19 * ZLAMSH sends multiple shifts through a small (single node) matrix to
20 * see how consecutive small subdiagonal elements are modified by
21 * subsequent shifts in an effort to maximize the number of bulges
22 * that can be sent through.
23 * ZLAMSH should only be called when there are multiple shifts/bulges
24 * (NBULGE > 1) and the first shift is starting in the middle of an
25 * unreduced Hessenberg matrix because of two or more consecutive
26 * small subdiagonal elements.
27 *
28 * Arguments
29 * =========
30 *
31 * S (local input/output) COMPLEX*16 array, ( LDS,* )
32 * On entry, the matrix of shifts. Only the 2x2 diagonal of S
33 * is referenced. It is assumed that S has JBLK double shifts
34 * (size 2).
35 * On exit, the data is rearranged in the best order for
36 * applying.
37 *
38 * LDS (local input) INTEGER
39 * On entry, the leading dimension of S. Unchanged on exit.
40 * 1 < NBULGE <= JBLK <= LDS/2
41 *
42 * NBULGE (local input/output) INTEGER
43 * On entry, the number of bulges to send through H ( >1 ).
44 * NBULGE should be less than the maximum determined (JBLK).
45 * 1 < NBULGE <= JBLK <= LDS/2
46 * On exit, the maximum number of bulges that can be sent
47 * through.
48 *
49 * JBLK (local input) INTEGER
50 * On entry, the number of shifts determined for S.
51 * Unchanged on exit.
52 *
53 * H (local input/output) COMPLEX*16 array ( LDH,N )
54 * On entry, the local matrix to apply the shifts on.
55 * H should be aligned so that the starting row is 2.
56 * On exit, the data is destroyed.
57 *
58 * LDH (local input) INTEGER
59 * On entry, the leading dimension of H. Unchanged on exit.
60 *
61 * N (local input) INTEGER
62 * On entry, the size of H. If all the bulges are expected to
63 * go through, N should be at least 4*NBULGE+2.
64 * Otherwise, NBULGE may be reduced by this routine.
65 *
66 * ULP (local input) DOUBLE PRECISION
67 * On entry, machine precision
68 * Unchanged on exit.
69 *
70 * Further Details
71 * ===============
72 *
73 * Implemented by: M. Fahey, May 28, 1999
74 *
75 * =====================================================================
76 *
77 * .. Parameters ..
78  DOUBLE PRECISION RONE, TEN
79  parameter( rone = 1.0d+0, ten = 10.0d+0 )
80  COMPLEX*16 ZERO
81  parameter( zero = ( 0.0d+0, 0.0d+0 ) )
82 * ..
83 * .. Local Scalars ..
84  INTEGER I, IBULGE, IVAL, J, K, M, NR
85  DOUBLE PRECISION DVAL, S1, TST1
86  COMPLEX*16 CDUM, H00, H10, H11, H12, H21, H22, H33, H33S,
87  $ H43H34, H44, H44S, SUM, T1, T2, T3, V1, V2, V3
88 * ..
89 * .. Local Arrays ..
90  COMPLEX*16 V( 3 )
91 * ..
92 * .. External Subroutines ..
93  EXTERNAL zcopy, zlarfg
94 * ..
95 * .. Intrinsic Functions ..
96  INTRINSIC abs, dble, dconjg, dimag, max, min
97 * ..
98 * .. Statement Functions ..
99  DOUBLE PRECISION CABS1
100 * ..
101 * .. Statement Function definitions ..
102  cabs1( cdum ) = abs( dble( cdum ) ) + abs( dimag( cdum ) )
103 * ..
104 * .. Executable Statements ..
105 *
106  m = 2
107  DO 50 ibulge = 1, nbulge
108  h44 = s( 2*jblk-2*ibulge+2, 2*jblk-2*ibulge+2 )
109  h33 = s( 2*jblk-2*ibulge+1, 2*jblk-2*ibulge+1 )
110  h43h34 = s( 2*jblk-2*ibulge+1, 2*jblk-2*ibulge+2 )*
111  $ s( 2*jblk-2*ibulge+2, 2*jblk-2*ibulge+1 )
112  h11 = h( m, m )
113  h22 = h( m+1, m+1 )
114  h21 = h( m+1, m )
115  h12 = h( m, m+1 )
116  h44s = h44 - h11
117  h33s = h33 - h11
118  v1 = ( h33s*h44s-h43h34 ) / h21 + h12
119  v2 = h22 - h11 - h33s - h44s
120  v3 = h( m+2, m+1 )
121  s1 = cabs1( v1 ) + cabs1( v2 ) + cabs1( v3 )
122  v1 = v1 / s1
123  v2 = v2 / s1
124  v3 = v3 / s1
125  v( 1 ) = v1
126  v( 2 ) = v2
127  v( 3 ) = v3
128  h00 = h( m-1, m-1 )
129  h10 = h( m, m-1 )
130  tst1 = cabs1( v1 )*( cabs1( h00 )+cabs1( h11 )+cabs1( h22 ) )
131  IF( cabs1( h10 )*( cabs1( v2 )+cabs1( v3 ) ).GT.ulp*tst1 ) THEN
132 * Find minimum
133  dval = ( cabs1( h10 )*( cabs1( v2 )+cabs1( v3 ) ) ) /
134  $ ( ulp*tst1 )
135  ival = ibulge
136  DO 10 i = ibulge + 1, nbulge
137  h44 = s( 2*jblk-2*i+2, 2*jblk-2*i+2 )
138  h33 = s( 2*jblk-2*i+1, 2*jblk-2*i+1 )
139  h43h34 = s( 2*jblk-2*i+1, 2*jblk-2*i+2 )*
140  $ s( 2*jblk-2*i+2, 2*jblk-2*i+1 )
141  h11 = h( m, m )
142  h22 = h( m+1, m+1 )
143  h21 = h( m+1, m )
144  h12 = h( m, m+1 )
145  h44s = h44 - h11
146  h33s = h33 - h11
147  v1 = ( h33s*h44s-h43h34 ) / h21 + h12
148  v2 = h22 - h11 - h33s - h44s
149  v3 = h( m+2, m+1 )
150  s1 = cabs1( v1 ) + cabs1( v2 ) + cabs1( v3 )
151  v1 = v1 / s1
152  v2 = v2 / s1
153  v3 = v3 / s1
154  v( 1 ) = v1
155  v( 2 ) = v2
156  v( 3 ) = v3
157  h00 = h( m-1, m-1 )
158  h10 = h( m, m-1 )
159  tst1 = cabs1( v1 )*( cabs1( h00 )+cabs1( h11 )+
160  $ cabs1( h22 ) )
161  IF( ( dval.GT.( cabs1( h10 )*( cabs1( v2 )+
162  $ cabs1( v3 ) ) ) / ( ulp*tst1 ) ) .AND.
163  $ ( dval.GT.rone ) ) THEN
164  dval = ( cabs1( h10 )*( cabs1( v2 )+cabs1( v3 ) ) ) /
165  $ ( ulp*tst1 )
166  ival = i
167  END IF
168  10 CONTINUE
169  IF( ( dval.LT.ten ) .AND. ( ival.NE.ibulge ) ) THEN
170  h44 = s( 2*jblk-2*ival+2, 2*jblk-2*ival+2 )
171  h33 = s( 2*jblk-2*ival+1, 2*jblk-2*ival+1 )
172  h43h34 = s( 2*jblk-2*ival+1, 2*jblk-2*ival+2 )
173  h10 = s( 2*jblk-2*ival+2, 2*jblk-2*ival+1 )
174  s( 2*jblk-2*ival+2, 2*jblk-2*ival+2 ) = s( 2*jblk-2*
175  $ ibulge+2, 2*jblk-2*ibulge+2 )
176  s( 2*jblk-2*ival+1, 2*jblk-2*ival+1 ) = s( 2*jblk-2*
177  $ ibulge+1, 2*jblk-2*ibulge+1 )
178  s( 2*jblk-2*ival+1, 2*jblk-2*ival+2 ) = s( 2*jblk-2*
179  $ ibulge+1, 2*jblk-2*ibulge+2 )
180  s( 2*jblk-2*ival+2, 2*jblk-2*ival+1 ) = s( 2*jblk-2*
181  $ ibulge+2, 2*jblk-2*ibulge+1 )
182  s( 2*jblk-2*ibulge+2, 2*jblk-2*ibulge+2 ) = h44
183  s( 2*jblk-2*ibulge+1, 2*jblk-2*ibulge+1 ) = h33
184  s( 2*jblk-2*ibulge+1, 2*jblk-2*ibulge+2 ) = h43h34
185  s( 2*jblk-2*ibulge+2, 2*jblk-2*ibulge+1 ) = h10
186  END IF
187  h44 = s( 2*jblk-2*ibulge+2, 2*jblk-2*ibulge+2 )
188  h33 = s( 2*jblk-2*ibulge+1, 2*jblk-2*ibulge+1 )
189  h43h34 = s( 2*jblk-2*ibulge+1, 2*jblk-2*ibulge+2 )*
190  $ s( 2*jblk-2*ibulge+2, 2*jblk-2*ibulge+1 )
191  h11 = h( m, m )
192  h22 = h( m+1, m+1 )
193  h21 = h( m+1, m )
194  h12 = h( m, m+1 )
195  h44s = h44 - h11
196  h33s = h33 - h11
197  v1 = ( h33s*h44s-h43h34 ) / h21 + h12
198  v2 = h22 - h11 - h33s - h44s
199  v3 = h( m+2, m+1 )
200  s1 = cabs1( v1 ) + cabs1( v2 ) + cabs1( v3 )
201  v1 = v1 / s1
202  v2 = v2 / s1
203  v3 = v3 / s1
204  v( 1 ) = v1
205  v( 2 ) = v2
206  v( 3 ) = v3
207  h00 = h( m-1, m-1 )
208  h10 = h( m, m-1 )
209  tst1 = cabs1( v1 )*( cabs1( h00 )+cabs1( h11 )+
210  $ cabs1( h22 ) )
211  END IF
212  IF( cabs1( h10 )*( cabs1( v2 )+cabs1( v3 ) ).GT.ten*ulp*tst1 )
213  $ THEN
214 * IBULGE better not be 1 here or we have a bug!
215  nbulge = max( ibulge-1, 1 )
216  RETURN
217  END IF
218  DO 40 k = m, n - 1
219  nr = min( 3, n-k+1 )
220  IF( k.GT.m )
221  $ CALL zcopy( nr, h( k, k-1 ), 1, v, 1 )
222  CALL zlarfg( nr, v( 1 ), v( 2 ), 1, t1 )
223  IF( k.GT.m ) THEN
224  h( k, k-1 ) = v( 1 )
225  h( k+1, k-1 ) = zero
226  IF( k.LT.n-1 )
227  $ h( k+2, k-1 ) = zero
228  ELSE
229 * H(m,m-1) must be updated,
230 *
231  h( k, k-1 ) = h( k, k-1 ) - dconjg( t1 )*h( k, k-1 )
232  END IF
233  v2 = v( 2 )
234  t2 = t1*v2
235  IF( nr.EQ.3 ) THEN
236  v3 = v( 3 )
237  t3 = t1*v3
238  DO 20 j = k, n
239  sum = dconjg( t1 )*h( k, j ) +
240  $ dconjg( t2 )*h( k+1, j ) +
241  $ dconjg( t3 )*h( k+2, j )
242  h( k, j ) = h( k, j ) - sum
243  h( k+1, j ) = h( k+1, j ) - sum*v2
244  h( k+2, j ) = h( k+2, j ) - sum*v3
245  20 CONTINUE
246  DO 30 j = 1, min( k+3, n )
247  sum = t1*h( j, k ) + t2*h( j, k+1 ) + t3*h( j, k+2 )
248  h( j, k ) = h( j, k ) - sum
249  h( j, k+1 ) = h( j, k+1 ) - sum*dconjg( v2 )
250  h( j, k+2 ) = h( j, k+2 ) - sum*dconjg( v3 )
251  30 CONTINUE
252  END IF
253  40 CONTINUE
254  50 CONTINUE
255 *
256  RETURN
257 *
258 * End of ZLAMSH
259 *
260  END
max
#define max(A, B)
Definition: pcgemr.c:180
zlamsh
subroutine zlamsh(S, LDS, NBULGE, JBLK, H, LDH, N, ULP)
Definition: zlamsh.f:2
min
#define min(A, B)
Definition: pcgemr.c:181