ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
sdbtrf.f
Go to the documentation of this file.
1  SUBROUTINE sdbtrf( M, N, KL, KU, AB, LDAB, INFO )
2 *
3 * -- ScaLAPACK auxiliary routine (version 2.0) --
4 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
5 *
6 * Written by Andrew J. Cleary, University of Tennessee.
7 * August, 1996.
8 * Modified from SGBTRF:
9 * -- LAPACK routine (preliminary version) --
10 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
11 * Courant Institute, Argonne National Lab, and Rice University
12 * August 6, 1991
13 *
14 * .. Scalar Arguments ..
15  INTEGER INFO, KL, KU, LDAB, M, N
16 * ..
17 * .. Array Arguments ..
18  REAL AB( LDAB, * )
19 * ..
20 *
21 * Purpose
22 * =======
23 *
24 * Sdbtrf computes an LU factorization of a real m-by-n band matrix A
25 * without using partial pivoting or row interchanges.
26 *
27 * This is the blocked version of the algorithm, calling Level 3 BLAS.
28 *
29 * Arguments
30 * =========
31 *
32 * M (input) INTEGER
33 * The number of rows of the matrix A. M >= 0.
34 *
35 * N (input) INTEGER
36 * The number of columns of the matrix A. N >= 0.
37 *
38 * KL (input) INTEGER
39 * The number of subdiagonals within the band of A. KL >= 0.
40 *
41 * KU (input) INTEGER
42 * The number of superdiagonals within the band of A. KU >= 0.
43 *
44 * AB (input/output) REAL array, dimension (LDAB,N)
45 * On entry, the matrix A in band storage, in rows KL+1 to
46 * 2*KL+KU+1; rows 1 to KL of the array need not be set.
47 * The j-th column of A is stored in the j-th column of the
48 * array AB as follows:
49 * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
50 *
51 * On exit, details of the factorization: U is stored as an
52 * upper triangular band matrix with KL+KU superdiagonals in
53 * rows 1 to KL+KU+1, and the multipliers used during the
54 * factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
55 * See below for further details.
56 *
57 * LDAB (input) INTEGER
58 * The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
59 *
60 * INFO (output) INTEGER
61 * = 0: successful exit
62 * < 0: if INFO = -i, the i-th argument had an illegal value
63 * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
64 * has been completed, but the factor U is exactly
65 * singular, and division by zero will occur if it is used
66 * to solve a system of equations.
67 *
68 * Further Details
69 * ===============
70 *
71 * The band storage scheme is illustrated by the following example, when
72 * M = N = 6, KL = 2, KU = 1:
73 *
74 * On entry: On exit:
75 *
76 * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
77 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
78 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
79 * a31 a42 a53 a64 * * m31 m42 m53 m64 * *
80 *
81 * Array elements marked * are not used by the routine.
82 *
83 * =====================================================================
84 *
85 * .. Parameters ..
86  REAL ONE, ZERO
87  parameter( one = 1.0e+0 )
88  parameter( zero = 0.0e+0 )
89  INTEGER NBMAX, LDWORK
90  parameter( nbmax = 64, ldwork = nbmax+1 )
91 * ..
92 * .. Local Scalars ..
93  INTEGER I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP,
94  $ JU, KM, KV, NB, NW
95 * ..
96 * .. Local Arrays ..
97  REAL WORK13( LDWORK, NBMAX ),
98  $ WORK31( LDWORK, NBMAX )
99 * ..
100 * .. External Functions ..
101  INTEGER ILAENV, ISAMAX
102  EXTERNAL ilaenv, isamax
103 * ..
104 * .. External Subroutines ..
105  EXTERNAL scopy, sdbtf2, sgemm, sger, sscal,
106  $ sswap, strsm, xerbla
107 * ..
108 * .. Intrinsic Functions ..
109  INTRINSIC max, min
110 * ..
111 * .. Executable Statements ..
112 *
113 * KV is the number of superdiagonals in the factor U
114 *
115  kv = ku
116 *
117 * Test the input parameters.
118 *
119  info = 0
120  IF( m.LT.0 ) THEN
121  info = -1
122  ELSE IF( n.LT.0 ) THEN
123  info = -2
124  ELSE IF( kl.LT.0 ) THEN
125  info = -3
126  ELSE IF( ku.LT.0 ) THEN
127  info = -4
128  ELSE IF( ldab.LT.min( min( kl+kv+1,m ),n ) ) THEN
129  info = -6
130  END IF
131  IF( info.NE.0 ) THEN
132  CALL xerbla( 'SDBTRF', -info )
133  RETURN
134  END IF
135 *
136 * Quick return if possible
137 *
138  IF( m.EQ.0 .OR. n.EQ.0 )
139  $ RETURN
140 *
141 * Determine the block size for this environment
142 *
143  nb = ilaenv( 1, 'SDBTRF', ' ', m, n, kl, ku )
144 *
145 * The block size must not exceed the limit set by the size of the
146 * local arrays WORK13 and WORK31.
147 *
148  nb = min( nb, nbmax )
149 *
150  IF( nb.LE.1 .OR. nb.GT.kl ) THEN
151 *
152 * Use unblocked code
153 *
154  CALL sdbtf2( m, n, kl, ku, ab, ldab, info )
155  ELSE
156 *
157 * Use blocked code
158 *
159 * Zero the superdiagonal elements of the work array WORK13
160 *
161  DO 20 j = 1, nb
162  DO 10 i = 1, j - 1
163  work13( i, j ) = zero
164  10 CONTINUE
165  20 CONTINUE
166 *
167 * Zero the subdiagonal elements of the work array WORK31
168 *
169  DO 40 j = 1, nb
170  DO 30 i = j + 1, nb
171  work31( i, j ) = zero
172  30 CONTINUE
173  40 CONTINUE
174 *
175 * JU is the index of the last column affected by the current
176 * stage of the factorization
177 *
178  ju = 1
179 *
180  DO 180 j = 1, min( m, n ), nb
181  jb = min( nb, min( m, n )-j+1 )
182 *
183 * The active part of the matrix is partitioned
184 *
185 * A11 A12 A13
186 * A21 A22 A23
187 * A31 A32 A33
188 *
189 * Here A11, A21 and A31 denote the current block of JB columns
190 * which is about to be factorized. The number of rows in the
191 * partitioning are JB, I2, I3 respectively, and the numbers
192 * of columns are JB, J2, J3. The superdiagonal elements of A13
193 * and the subdiagonal elements of A31 lie outside the band.
194 *
195  i2 = min( kl-jb, m-j-jb+1 )
196  i3 = min( jb, m-j-kl+1 )
197 *
198 * J2 and J3 are computed after JU has been updated.
199 *
200 * Factorize the current block of JB columns
201 *
202  DO 80 jj = j, j + jb - 1
203 *
204 * Find pivot and test for singularity. KM is the number of
205 * subdiagonal elements in the current column.
206 *
207  km = min( kl, m-jj )
208  jp = 1
209  IF( ab( kv+jp, jj ).NE.zero ) THEN
210  ju = max( ju, min( jj+ku+jp-1, n ) )
211 *
212 * Compute multipliers
213 *
214  CALL sscal( km, one / ab( kv+1, jj ), ab( kv+2, jj ),
215  $ 1 )
216 *
217 * Update trailing submatrix within the band and within
218 * the current block. JM is the index of the last column
219 * which needs to be updated.
220 *
221  jm = min( ju, j+jb-1 )
222  IF( jm.GT.jj ) THEN
223  CALL sger( km, jm-jj, -one, ab( kv+2, jj ), 1,
224  $ ab( kv, jj+1 ), ldab-1,
225  $ ab( kv+1, jj+1 ), ldab-1 )
226  END IF
227  END IF
228 *
229 * Copy current column of A31 into the work array WORK31
230 *
231  nw = min( jj-j+1, i3 )
232  IF( nw.GT.0 )
233  $ CALL scopy( nw, ab( kv+kl+1-jj+j, jj ), 1,
234  $ work31( 1, jj-j+1 ), 1 )
235  80 CONTINUE
236  IF( j+jb.LE.n ) THEN
237 *
238 * Apply the row interchanges to the other blocks.
239 *
240  j2 = min( ju-j+1, kv ) - jb
241  j3 = max( 0, ju-j-kv+1 )
242 *
243 * Update the relevant part of the trailing submatrix
244 *
245  IF( j2.GT.0 ) THEN
246 *
247 * Update A12
248 *
249  CALL strsm( 'Left', 'Lower', 'No transpose', 'Unit',
250  $ jb, j2, one, ab( kv+1, j ), ldab-1,
251  $ ab( kv+1-jb, j+jb ), ldab-1 )
252 *
253  IF( i2.GT.0 ) THEN
254 *
255 * Update A22
256 *
257  CALL sgemm( 'No transpose', 'No transpose', i2, j2,
258  $ jb, -one, ab( kv+1+jb, j ), ldab-1,
259  $ ab( kv+1-jb, j+jb ), ldab-1, one,
260  $ ab( kv+1, j+jb ), ldab-1 )
261  END IF
262 *
263  IF( i3.GT.0 ) THEN
264 *
265 * Update A32
266 *
267  CALL sgemm( 'No transpose', 'No transpose', i3, j2,
268  $ jb, -one, work31, ldwork,
269  $ ab( kv+1-jb, j+jb ), ldab-1, one,
270  $ ab( kv+kl+1-jb, j+jb ), ldab-1 )
271  END IF
272  END IF
273 *
274  IF( j3.GT.0 ) THEN
275 *
276 * Copy the lower triangle of A13 into the work array
277 * WORK13
278 *
279  DO 130 jj = 1, j3
280  DO 120 ii = jj, jb
281  work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 )
282  120 CONTINUE
283  130 CONTINUE
284 *
285 * Update A13 in the work array
286 *
287  CALL strsm( 'Left', 'Lower', 'No transpose', 'Unit',
288  $ jb, j3, one, ab( kv+1, j ), ldab-1,
289  $ work13, ldwork )
290 *
291  IF( i2.GT.0 ) THEN
292 *
293 * Update A23
294 *
295  CALL sgemm( 'No transpose', 'No transpose', i2, j3,
296  $ jb, -one, ab( kv+1+jb, j ), ldab-1,
297  $ work13, ldwork, one, ab( 1+jb, j+kv ),
298  $ ldab-1 )
299  END IF
300 *
301  IF( i3.GT.0 ) THEN
302 *
303 * Update A33
304 *
305  CALL sgemm( 'No transpose', 'No transpose', i3, j3,
306  $ jb, -one, work31, ldwork, work13,
307  $ ldwork, one, ab( 1+kl, j+kv ), ldab-1 )
308  END IF
309 *
310 * Copy the lower triangle of A13 back into place
311 *
312  DO 150 jj = 1, j3
313  DO 140 ii = jj, jb
314  ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj )
315  140 CONTINUE
316  150 CONTINUE
317  END IF
318  ELSE
319  END IF
320 *
321 * copy the upper triangle of A31 back into place
322 *
323  DO 170 jj = j + jb - 1, j, -1
324 *
325 * Copy the current column of A31 back into place
326 *
327  nw = min( i3, jj-j+1 )
328  IF( nw.GT.0 )
329  $ CALL scopy( nw, work31( 1, jj-j+1 ), 1,
330  $ ab( kv+kl+1-jj+j, jj ), 1 )
331  170 CONTINUE
332  180 CONTINUE
333  END IF
334 *
335  RETURN
336 *
337 * End of SDBTRF
338 *
339  END
max
#define max(A, B)
Definition: pcgemr.c:180
sdbtf2
subroutine sdbtf2(M, N, KL, KU, AB, LDAB, INFO)
Definition: sdbtf2.f:2
sdbtrf
subroutine sdbtrf(M, N, KL, KU, AB, LDAB, INFO)
Definition: sdbtrf.f:2
min
#define min(A, B)
Definition: pcgemr.c:181