SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
dlamsh.f
Go to the documentation of this file.
1 SUBROUTINE dlamsh ( S, LDS, NBULGE, JBLK, H, LDH, N, ULP )
2*
3* -- ScaLAPACK auxiliary routine (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 1, 1997
7*
8* .. Scalar Arguments ..
9 INTEGER LDS, NBULGE, JBLK, LDH, N
10 DOUBLE PRECISION ULP
11* ..
12* .. Array Arguments ..
13 DOUBLE PRECISION S(LDS,*), H(LDH,*)
14* ..
15*
16* Purpose
17* =======
18*
19* DLAMSH sends multiple shifts through a small (single node) matrix to
20* see how consecutive small subdiagonal elements are modified by
21* subsequent shifts in an effort to maximize the number of bulges
22* that can be sent through.
23* DLAMSH should only be called when there are multiple shifts/bulges
24* (NBULGE > 1) and the first shift is starting in the middle of an
25* unreduced Hessenberg matrix because of two or more consecutive small
26* subdiagonal elements.
27*
28* Arguments
29* =========
30*
31* S (local input/output) DOUBLE PRECISION array, (LDS,*)
32* On entry, the matrix of shifts. Only the 2x2 diagonal of S is
33* referenced. It is assumed that S has JBLK double shifts
34* (size 2).
35* On exit, the data is rearranged in the best order for
36* applying.
37*
38* LDS (local input) INTEGER
39* On entry, the leading dimension of S. Unchanged on exit.
40* 1 < NBULGE <= JBLK <= LDS/2
41*
42* NBULGE (local input/output) INTEGER
43* On entry, the number of bulges to send through H ( >1 ).
44* NBULGE should be less than the maximum determined (JBLK).
45* 1 < NBULGE <= JBLK <= LDS/2
46* On exit, the maximum number of bulges that can be sent
47* through.
48*
49* JBLK (local input) INTEGER
50* On entry, the number of shifts determined for S.
51* Unchanged on exit.
52*
53* H (local input/output) DOUBLE PRECISION array (LDH,N)
54* On entry, the local matrix to apply the shifts on.
55* H should be aligned so that the starting row is 2.
56* On exit, the data is destroyed.
57*
58* LDS (local input) INTEGER
59* On entry, the leading dimension of S. Unchanged on exit.
60*
61* N (local input) INTEGER
62* On entry, the size of H. If all the bulges are expected to
63* go through, N should be at least 4*NBULGE+2.
64* Otherwise, NBULGE may be reduced by this routine.
65*
66* ULP (local input) DOUBLE PRECISION
67* On entry, machine precision
68* Unchanged on exit.
69*
70* Implemented by: G. Henry, May 1, 1997
71*
72* =====================================================================
73*
74* .. Parameters ..
75 DOUBLE PRECISION ZERO, TEN
76 parameter( zero = 0.0d+0, ten = 10.0d+0 )
77* ..
78* .. Local Scalars ..
79 INTEGER K, IBULGE, M, NR, J, IVAL, I
80 DOUBLE PRECISION H44, H33, H43H34, H11, H22, H21, H12, H44S,
81 $ H33S, V1, V2, V3, H00, H10, TST1, T1, T2, T3,
82 $ SUM, S1, DVAL
83* ..
84* .. Local Arrays ..
85 DOUBLE PRECISION V(3)
86* ..
87* .. External Subroutines ..
88 EXTERNAL dlarfg, dcopy
89* ..
90* .. Intrinsic Functions ..
91 INTRINSIC max, abs
92* ..
93* .. Executable Statements ..
94*
95 m = 2
96 DO 10 ibulge = 1, nbulge
97 h44 = s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+2)
98 h33 = s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+1)
99 h43h34 = s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+2)*
100 $ s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+1)
101 h11 = h( m, m )
102 h22 = h( m+1, m+1 )
103 h21 = h( m+1, m )
104 h12 = h( m, m+1 )
105 h44s = h44 - h11
106 h33s = h33 - h11
107 v1 = ( h33s*h44s-h43h34 ) / h21 + h12
108 v2 = h22 - h11 - h33s - h44s
109 v3 = h( m+2, m+1 )
110 s1 = abs( v1 ) + abs( v2 ) + abs( v3 )
111 v1 = v1 / s1
112 v2 = v2 / s1
113 v3 = v3 / s1
114 v( 1 ) = v1
115 v( 2 ) = v2
116 v( 3 ) = v3
117 h00 = h( m-1, m-1 )
118 h10 = h( m, m-1 )
119 tst1 = abs( v1 )*( abs( h00 )+abs( h11 )+abs( h22 ) )
120 IF( abs( h10 )*( abs( v2 )+abs( v3 ) ).GT.ulp*tst1 ) THEN
121* Find minimum
122 dval = (abs(h10)*(abs(v2)+abs(v3))) / (ulp*tst1)
123 ival = ibulge
124 DO 15 i = ibulge+1, nbulge
125 h44 = s(2*jblk-2*i+2, 2*jblk-2*i+2)
126 h33 = s(2*jblk-2*i+1,2*jblk-2*i+1)
127 h43h34 = s(2*jblk-2*i+1,2*jblk-2*i+2)*
128 $ s(2*jblk-2*i+2, 2*jblk-2*i+1)
129 h11 = h( m, m )
130 h22 = h( m+1, m+1 )
131 h21 = h( m+1, m )
132 h12 = h( m, m+1 )
133 h44s = h44 - h11
134 h33s = h33 - h11
135 v1 = ( h33s*h44s-h43h34 ) / h21 + h12
136 v2 = h22 - h11 - h33s - h44s
137 v3 = h( m+2, m+1 )
138 s1 = abs( v1 ) + abs( v2 ) + abs( v3 )
139 v1 = v1 / s1
140 v2 = v2 / s1
141 v3 = v3 / s1
142 v( 1 ) = v1
143 v( 2 ) = v2
144 v( 3 ) = v3
145 h00 = h( m-1, m-1 )
146 h10 = h( m, m-1 )
147 tst1 = abs( v1 )*( abs( h00 )+abs( h11 )+abs( h22 ) )
148 IF ( (dval.GT.(abs(h10)*(abs(v2)+abs(v3)))/(ulp*tst1))
149 $ .AND. ( dval .GT. 1.d0 ) ) THEN
150 dval = (abs(h10)*(abs(v2)+abs(v3))) / (ulp*tst1)
151 ival = i
152 END IF
153 15 CONTINUE
154 IF ( (dval .LT. ten) .AND. (ival .NE. ibulge) ) THEN
155 h44 = s(2*jblk-2*ival+2, 2*jblk-2*ival+2)
156 h33 = s(2*jblk-2*ival+1,2*jblk-2*ival+1)
157 h43h34 = s(2*jblk-2*ival+1,2*jblk-2*ival+2)
158 h10 = s(2*jblk-2*ival+2, 2*jblk-2*ival+1)
159 s(2*jblk-2*ival+2,2*jblk-2*ival+2) =
160 $ s(2*jblk-2*ibulge+2,2*jblk-2*ibulge+2)
161 s(2*jblk-2*ival+1,2*jblk-2*ival+1) =
162 $ s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+1)
163 s(2*jblk-2*ival+1,2*jblk-2*ival+2) =
164 $ s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+2)
165 s(2*jblk-2*ival+2, 2*jblk-2*ival+1) =
166 $ s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+1)
167 s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+2) = h44
168 s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+1) = h33
169 s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+2) = h43h34
170 s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+1) = h10
171 END IF
172 h44 = s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+2)
173 h33 = s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+1)
174 h43h34 = s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+2)*
175 $ s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+1)
176 h11 = h( m, m )
177 h22 = h( m+1, m+1 )
178 h21 = h( m+1, m )
179 h12 = h( m, m+1 )
180 h44s = h44 - h11
181 h33s = h33 - h11
182 v1 = ( h33s*h44s-h43h34 ) / h21 + h12
183 v2 = h22 - h11 - h33s - h44s
184 v3 = h( m+2, m+1 )
185 s1 = abs( v1 ) + abs( v2 ) + abs( v3 )
186 v1 = v1 / s1
187 v2 = v2 / s1
188 v3 = v3 / s1
189 v( 1 ) = v1
190 v( 2 ) = v2
191 v( 3 ) = v3
192 h00 = h( m-1, m-1 )
193 h10 = h( m, m-1 )
194 tst1 = abs( v1 )*( abs( h00 )+abs( h11 )+abs( h22 ) )
195 END IF
196 IF( abs( h10 )*( abs( v2 )+abs( v3 ) ).GT.ten*ulp*tst1 ) THEN
197* IBULGE better not be 1 here or we have a bug!
198 nbulge = max(ibulge -1,1)
199 RETURN
200 END IF
201 DO 120 k = m, n - 1
202 nr = min( 3, n-k+1 )
203 IF( k.GT.m )
204 $ CALL dcopy( nr, h( k, k-1 ), 1, v, 1 )
205 CALL dlarfg( nr, v( 1 ), v( 2 ), 1, t1 )
206 IF( k.GT.m ) THEN
207 h( k, k-1 ) = v( 1 )
208 h( k+1, k-1 ) = zero
209 IF( k.LT.n-1 )
210 $ h( k+2, k-1 ) = zero
211 ELSE
212 h( k, k-1 ) = -h( k, k-1 )
213 END IF
214 v2 = v( 2 )
215 t2 = t1*v2
216 IF( nr.EQ.3 ) THEN
217 v3 = v( 3 )
218 t3 = t1*v3
219 DO 60 j = k, n
220 sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j )
221 h( k, j ) = h( k, j ) - sum*t1
222 h( k+1, j ) = h( k+1, j ) - sum*t2
223 h( k+2, j ) = h( k+2, j ) - sum*t3
224 60 CONTINUE
225 DO 70 j = 1, min( k+3, n )
226 sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 )
227 h( j, k ) = h( j, k ) - sum*t1
228 h( j, k+1 ) = h( j, k+1 ) - sum*t2
229 h( j, k+2 ) = h( j, k+2 ) - sum*t3
230 70 CONTINUE
231 END IF
232 120 CONTINUE
233 10 CONTINUE
234*
235 RETURN
236 END
subroutine dlamsh(s, lds, nbulge, jblk, h, ldh, n, ulp)
Definition dlamsh.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181