ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
slamsh.f
Go to the documentation of this file.
1  SUBROUTINE slamsh ( S, LDS, NBULGE, JBLK, H, LDH, N, ULP )
2 *
3 * -- ScaLAPACK auxiliary routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * .. Scalar Arguments ..
9  INTEGER LDS, NBULGE, JBLK, LDH, N
10  REAL ULP
11 * ..
12 * .. Array Arguments ..
13  REAL S(LDS,*), H(LDH,*)
14 * ..
15 *
16 * Purpose
17 * =======
18 *
19 * SLAMSH sends multiple shifts through a small (single node) matrix to
20 * see how consecutive small subdiagonal elements are modified by
21 * subsequent shifts in an effort to maximize the number of bulges
22 * that can be sent through.
23 * SLAMSH should only be called when there are multiple shifts/bulges
24 * (NBULGE > 1) and the first shift is starting in the middle of an
25 * unreduced Hessenberg matrix because of two or more consecutive small
26 * subdiagonal elements.
27 *
28 * Arguments
29 * =========
30 *
31 * S (local input/output) REAL array, (LDS,*)
32 * On entry, the matrix of shifts. Only the 2x2 diagonal of S is
33 * referenced. It is assumed that S has JBLK double shifts
34 * (size 2).
35 * On exit, the data is rearranged in the best order for
36 * applying.
37 *
38 * LDS (local input) INTEGER
39 * On entry, the leading dimension of S. Unchanged on exit.
40 * 1 < NBULGE <= JBLK <= LDS/2
41 *
42 * NBULGE (local input/output) INTEGER
43 * On entry, the number of bulges to send through H ( >1 ).
44 * NBULGE should be less than the maximum determined (JBLK).
45 * 1 < NBULGE <= JBLK <= LDS/2
46 * On exit, the maximum number of bulges that can be sent
47 * through.
48 *
49 * JBLK (local input) INTEGER
50 * On entry, the number of shifts determined for S.
51 * Unchanged on exit.
52 *
53 * H (local input/output) REAL array (LDH,N)
54 * On entry, the local matrix to apply the shifts on.
55 * H should be aligned so that the starting row is 2.
56 * On exit, the data is destroyed.
57 *
58 * LDS (local input) INTEGER
59 * On entry, the leading dimension of S. Unchanged on exit.
60 *
61 * N (local input) INTEGER
62 * On entry, the size of H. If all the bulges are expected to
63 * go through, N should be at least 4*NBULGE+2.
64 * Otherwise, NBULGE may be reduced by this routine.
65 *
66 * ULP (local input) REAL
67 * On entry, machine precision
68 * Unchanged on exit.
69 *
70 * Implemented by: G. Henry, May 1, 1997
71 *
72 * =====================================================================
73 *
74 * .. Parameters ..
75  REAL ZERO, TEN
76  parameter( zero = 0.0e+0, ten = 10.0e+0 )
77 * ..
78 * .. Local Scalars ..
79  INTEGER K, IBULGE, M, NR, J, IVAL, I
80  REAL H44, H33, H43H34, H11, H22, H21, H12, H44S,
81  $ H33S, V1, V2, V3, H00, H10, TST1, T1, T2, T3,
82  $ SUM, S1, DVAL
83 * ..
84 * .. Local Arrays ..
85  REAL V(3)
86 * ..
87 * .. External Subroutines ..
88  EXTERNAL slarfg, scopy
89 * ..
90 * .. Intrinsic Functions ..
91  INTRINSIC max, abs
92 * ..
93 * .. Executable Statements ..
94 *
95  m = 2
96  DO 10 ibulge = 1, nbulge
97  h44 = s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+2)
98  h33 = s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+1)
99  h43h34 = s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+2)*
100  $ s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+1)
101  h11 = h( m, m )
102  h22 = h( m+1, m+1 )
103  h21 = h( m+1, m )
104  h12 = h( m, m+1 )
105  h44s = h44 - h11
106  h33s = h33 - h11
107  v1 = ( h33s*h44s-h43h34 ) / h21 + h12
108  v2 = h22 - h11 - h33s - h44s
109  v3 = h( m+2, m+1 )
110  s1 = abs( v1 ) + abs( v2 ) + abs( v3 )
111  v1 = v1 / s1
112  v2 = v2 / s1
113  v3 = v3 / s1
114  v( 1 ) = v1
115  v( 2 ) = v2
116  v( 3 ) = v3
117  h00 = h( m-1, m-1 )
118  h10 = h( m, m-1 )
119  tst1 = abs( v1 )*( abs( h00 )+abs( h11 )+abs( h22 ) )
120  IF( abs( h10 )*( abs( v2 )+abs( v3 ) ).GT.ulp*tst1 ) THEN
121 * Find minimum
122  dval = (abs(h10)*(abs(v2)+abs(v3))) / (ulp*tst1)
123  ival = ibulge
124  DO 15 i = ibulge+1, nbulge
125  h44 = s(2*jblk-2*i+2, 2*jblk-2*i+2)
126  h33 = s(2*jblk-2*i+1,2*jblk-2*i+1)
127  h43h34 = s(2*jblk-2*i+1,2*jblk-2*i+2)*
128  $ s(2*jblk-2*i+2, 2*jblk-2*i+1)
129  h11 = h( m, m )
130  h22 = h( m+1, m+1 )
131  h21 = h( m+1, m )
132  h12 = h( m, m+1 )
133  h44s = h44 - h11
134  h33s = h33 - h11
135  v1 = ( h33s*h44s-h43h34 ) / h21 + h12
136  v2 = h22 - h11 - h33s - h44s
137  v3 = h( m+2, m+1 )
138  s1 = abs( v1 ) + abs( v2 ) + abs( v3 )
139  v1 = v1 / s1
140  v2 = v2 / s1
141  v3 = v3 / s1
142  v( 1 ) = v1
143  v( 2 ) = v2
144  v( 3 ) = v3
145  h00 = h( m-1, m-1 )
146  h10 = h( m, m-1 )
147  tst1 = abs( v1 )*( abs( h00 )+abs( h11 )+abs( h22 ) )
148  IF ( (dval.GT.(abs(h10)*(abs(v2)+abs(v3)))/(ulp*tst1))
149  $ .AND. ( dval .GT. 1.d0 ) ) THEN
150  dval = (abs(h10)*(abs(v2)+abs(v3))) / (ulp*tst1)
151  ival = i
152  END IF
153  15 CONTINUE
154  IF ( (dval .LT. ten) .AND. (ival .NE. ibulge) ) THEN
155  h44 = s(2*jblk-2*ival+2, 2*jblk-2*ival+2)
156  h33 = s(2*jblk-2*ival+1,2*jblk-2*ival+1)
157  h43h34 = s(2*jblk-2*ival+1,2*jblk-2*ival+2)
158  h10 = s(2*jblk-2*ival+2, 2*jblk-2*ival+1)
159  s(2*jblk-2*ival+2,2*jblk-2*ival+2) =
160  $ s(2*jblk-2*ibulge+2,2*jblk-2*ibulge+2)
161  s(2*jblk-2*ival+1,2*jblk-2*ival+1) =
162  $ s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+1)
163  s(2*jblk-2*ival+1,2*jblk-2*ival+2) =
164  $ s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+2)
165  s(2*jblk-2*ival+2, 2*jblk-2*ival+1) =
166  $ s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+1)
167  s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+2) = h44
168  s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+1) = h33
169  s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+2) = h43h34
170  s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+1) = h10
171  END IF
172  h44 = s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+2)
173  h33 = s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+1)
174  h43h34 = s(2*jblk-2*ibulge+1,2*jblk-2*ibulge+2)*
175  $ s(2*jblk-2*ibulge+2, 2*jblk-2*ibulge+1)
176  h11 = h( m, m )
177  h22 = h( m+1, m+1 )
178  h21 = h( m+1, m )
179  h12 = h( m, m+1 )
180  h44s = h44 - h11
181  h33s = h33 - h11
182  v1 = ( h33s*h44s-h43h34 ) / h21 + h12
183  v2 = h22 - h11 - h33s - h44s
184  v3 = h( m+2, m+1 )
185  s1 = abs( v1 ) + abs( v2 ) + abs( v3 )
186  v1 = v1 / s1
187  v2 = v2 / s1
188  v3 = v3 / s1
189  v( 1 ) = v1
190  v( 2 ) = v2
191  v( 3 ) = v3
192  h00 = h( m-1, m-1 )
193  h10 = h( m, m-1 )
194  tst1 = abs( v1 )*( abs( h00 )+abs( h11 )+abs( h22 ) )
195  END IF
196  IF( abs( h10 )*( abs( v2 )+abs( v3 ) ).GT.ten*ulp*tst1 ) THEN
197 * IBULGE better not be 1 here or we have a bug!
198  nbulge = max(ibulge -1,1)
199  RETURN
200  END IF
201  DO 120 k = m, n - 1
202  nr = min( 3, n-k+1 )
203  IF( k.GT.m )
204  $ CALL scopy( nr, h( k, k-1 ), 1, v, 1 )
205  CALL slarfg( nr, v( 1 ), v( 2 ), 1, t1 )
206  IF( k.GT.m ) THEN
207  h( k, k-1 ) = v( 1 )
208  h( k+1, k-1 ) = zero
209  IF( k.LT.n-1 )
210  $ h( k+2, k-1 ) = zero
211  ELSE
212  h( k, k-1 ) = -h( k, k-1 )
213  END IF
214  v2 = v( 2 )
215  t2 = t1*v2
216  IF( nr.EQ.3 ) THEN
217  v3 = v( 3 )
218  t3 = t1*v3
219  DO 60 j = k, n
220  sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j )
221  h( k, j ) = h( k, j ) - sum*t1
222  h( k+1, j ) = h( k+1, j ) - sum*t2
223  h( k+2, j ) = h( k+2, j ) - sum*t3
224  60 CONTINUE
225  DO 70 j = 1, min( k+3, n )
226  sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 )
227  h( j, k ) = h( j, k ) - sum*t1
228  h( j, k+1 ) = h( j, k+1 ) - sum*t2
229  h( j, k+2 ) = h( j, k+2 ) - sum*t3
230  70 CONTINUE
231  END IF
232  120 CONTINUE
233  10 CONTINUE
234 *
235  RETURN
236  END
max
#define max(A, B)
Definition: pcgemr.c:180
slamsh
subroutine slamsh(S, LDS, NBULGE, JBLK, H, LDH, N, ULP)
Definition: slamsh.f:2
min
#define min(A, B)
Definition: pcgemr.c:181