SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
sdbtrf.f
Go to the documentation of this file.
1 SUBROUTINE sdbtrf( M, N, KL, KU, AB, LDAB, INFO )
2*
3* -- ScaLAPACK auxiliary routine (version 2.0) --
4* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
5*
6* Written by Andrew J. Cleary, University of Tennessee.
7* August, 1996.
8* Modified from SGBTRF:
9* -- LAPACK routine (preliminary version) --
10* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
11* Courant Institute, Argonne National Lab, and Rice University
12* August 6, 1991
13*
14* .. Scalar Arguments ..
15 INTEGER INFO, KL, KU, LDAB, M, N
16* ..
17* .. Array Arguments ..
18 REAL AB( LDAB, * )
19* ..
20*
21* Purpose
22* =======
23*
24* Sdbtrf computes an LU factorization of a real m-by-n band matrix A
25* without using partial pivoting or row interchanges.
26*
27* This is the blocked version of the algorithm, calling Level 3 BLAS.
28*
29* Arguments
30* =========
31*
32* M (input) INTEGER
33* The number of rows of the matrix A. M >= 0.
34*
35* N (input) INTEGER
36* The number of columns of the matrix A. N >= 0.
37*
38* KL (input) INTEGER
39* The number of subdiagonals within the band of A. KL >= 0.
40*
41* KU (input) INTEGER
42* The number of superdiagonals within the band of A. KU >= 0.
43*
44* AB (input/output) REAL array, dimension (LDAB,N)
45* On entry, the matrix A in band storage, in rows KL+1 to
46* 2*KL+KU+1; rows 1 to KL of the array need not be set.
47* The j-th column of A is stored in the j-th column of the
48* array AB as follows:
49* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
50*
51* On exit, details of the factorization: U is stored as an
52* upper triangular band matrix with KL+KU superdiagonals in
53* rows 1 to KL+KU+1, and the multipliers used during the
54* factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
55* See below for further details.
56*
57* LDAB (input) INTEGER
58* The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
59*
60* INFO (output) INTEGER
61* = 0: successful exit
62* < 0: if INFO = -i, the i-th argument had an illegal value
63* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
64* has been completed, but the factor U is exactly
65* singular, and division by zero will occur if it is used
66* to solve a system of equations.
67*
68* Further Details
69* ===============
70*
71* The band storage scheme is illustrated by the following example, when
72* M = N = 6, KL = 2, KU = 1:
73*
74* On entry: On exit:
75*
76* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
77* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
78* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
79* a31 a42 a53 a64 * * m31 m42 m53 m64 * *
80*
81* Array elements marked * are not used by the routine.
82*
83* =====================================================================
84*
85* .. Parameters ..
86 REAL ONE, ZERO
87 parameter( one = 1.0e+0 )
88 parameter( zero = 0.0e+0 )
89 INTEGER NBMAX, LDWORK
90 parameter( nbmax = 64, ldwork = nbmax+1 )
91* ..
92* .. Local Scalars ..
93 INTEGER I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP,
94 $ JU, KM, KV, NB, NW
95* ..
96* .. Local Arrays ..
97 REAL WORK13( LDWORK, NBMAX ),
98 $ WORK31( LDWORK, NBMAX )
99* ..
100* .. External Functions ..
101 INTEGER ILAENV, ISAMAX
102 EXTERNAL ilaenv, isamax
103* ..
104* .. External Subroutines ..
105 EXTERNAL scopy, sdbtf2, sgemm, sger, sscal,
106 $ sswap, strsm, xerbla
107* ..
108* .. Intrinsic Functions ..
109 INTRINSIC max, min
110* ..
111* .. Executable Statements ..
112*
113* KV is the number of superdiagonals in the factor U
114*
115 kv = ku
116*
117* Test the input parameters.
118*
119 info = 0
120 IF( m.LT.0 ) THEN
121 info = -1
122 ELSE IF( n.LT.0 ) THEN
123 info = -2
124 ELSE IF( kl.LT.0 ) THEN
125 info = -3
126 ELSE IF( ku.LT.0 ) THEN
127 info = -4
128 ELSE IF( ldab.LT.min( min( kl+kv+1,m ),n ) ) THEN
129 info = -6
130 END IF
131 IF( info.NE.0 ) THEN
132 CALL xerbla( 'SDBTRF', -info )
133 RETURN
134 END IF
135*
136* Quick return if possible
137*
138 IF( m.EQ.0 .OR. n.EQ.0 )
139 $ RETURN
140*
141* Determine the block size for this environment
142*
143 nb = ilaenv( 1, 'SDBTRF', ' ', m, n, kl, ku )
144*
145* The block size must not exceed the limit set by the size of the
146* local arrays WORK13 and WORK31.
147*
148 nb = min( nb, nbmax )
149*
150 IF( nb.LE.1 .OR. nb.GT.kl ) THEN
151*
152* Use unblocked code
153*
154 CALL sdbtf2( m, n, kl, ku, ab, ldab, info )
155 ELSE
156*
157* Use blocked code
158*
159* Zero the superdiagonal elements of the work array WORK13
160*
161 DO 20 j = 1, nb
162 DO 10 i = 1, j - 1
163 work13( i, j ) = zero
164 10 CONTINUE
165 20 CONTINUE
166*
167* Zero the subdiagonal elements of the work array WORK31
168*
169 DO 40 j = 1, nb
170 DO 30 i = j + 1, nb
171 work31( i, j ) = zero
172 30 CONTINUE
173 40 CONTINUE
174*
175* JU is the index of the last column affected by the current
176* stage of the factorization
177*
178 ju = 1
179*
180 DO 180 j = 1, min( m, n ), nb
181 jb = min( nb, min( m, n )-j+1 )
182*
183* The active part of the matrix is partitioned
184*
185* A11 A12 A13
186* A21 A22 A23
187* A31 A32 A33
188*
189* Here A11, A21 and A31 denote the current block of JB columns
190* which is about to be factorized. The number of rows in the
191* partitioning are JB, I2, I3 respectively, and the numbers
192* of columns are JB, J2, J3. The superdiagonal elements of A13
193* and the subdiagonal elements of A31 lie outside the band.
194*
195 i2 = min( kl-jb, m-j-jb+1 )
196 i3 = min( jb, m-j-kl+1 )
197*
198* J2 and J3 are computed after JU has been updated.
199*
200* Factorize the current block of JB columns
201*
202 DO 80 jj = j, j + jb - 1
203*
204* Find pivot and test for singularity. KM is the number of
205* subdiagonal elements in the current column.
206*
207 km = min( kl, m-jj )
208 jp = 1
209 IF( ab( kv+jp, jj ).NE.zero ) THEN
210 ju = max( ju, min( jj+ku+jp-1, n ) )
211*
212* Compute multipliers
213*
214 CALL sscal( km, one / ab( kv+1, jj ), ab( kv+2, jj ),
215 $ 1 )
216*
217* Update trailing submatrix within the band and within
218* the current block. JM is the index of the last column
219* which needs to be updated.
220*
221 jm = min( ju, j+jb-1 )
222 IF( jm.GT.jj ) THEN
223 CALL sger( km, jm-jj, -one, ab( kv+2, jj ), 1,
224 $ ab( kv, jj+1 ), ldab-1,
225 $ ab( kv+1, jj+1 ), ldab-1 )
226 END IF
227 END IF
228*
229* Copy current column of A31 into the work array WORK31
230*
231 nw = min( jj-j+1, i3 )
232 IF( nw.GT.0 )
233 $ CALL scopy( nw, ab( kv+kl+1-jj+j, jj ), 1,
234 $ work31( 1, jj-j+1 ), 1 )
235 80 CONTINUE
236 IF( j+jb.LE.n ) THEN
237*
238* Apply the row interchanges to the other blocks.
239*
240 j2 = min( ju-j+1, kv ) - jb
241 j3 = max( 0, ju-j-kv+1 )
242*
243* Update the relevant part of the trailing submatrix
244*
245 IF( j2.GT.0 ) THEN
246*
247* Update A12
248*
249 CALL strsm( 'Left', 'Lower', 'No transpose', 'Unit',
250 $ jb, j2, one, ab( kv+1, j ), ldab-1,
251 $ ab( kv+1-jb, j+jb ), ldab-1 )
252*
253 IF( i2.GT.0 ) THEN
254*
255* Update A22
256*
257 CALL sgemm( 'No transpose', 'No transpose', i2, j2,
258 $ jb, -one, ab( kv+1+jb, j ), ldab-1,
259 $ ab( kv+1-jb, j+jb ), ldab-1, one,
260 $ ab( kv+1, j+jb ), ldab-1 )
261 END IF
262*
263 IF( i3.GT.0 ) THEN
264*
265* Update A32
266*
267 CALL sgemm( 'No transpose', 'No transpose', i3, j2,
268 $ jb, -one, work31, ldwork,
269 $ ab( kv+1-jb, j+jb ), ldab-1, one,
270 $ ab( kv+kl+1-jb, j+jb ), ldab-1 )
271 END IF
272 END IF
273*
274 IF( j3.GT.0 ) THEN
275*
276* Copy the lower triangle of A13 into the work array
277* WORK13
278*
279 DO 130 jj = 1, j3
280 DO 120 ii = jj, jb
281 work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 )
282 120 CONTINUE
283 130 CONTINUE
284*
285* Update A13 in the work array
286*
287 CALL strsm( 'Left', 'Lower', 'No transpose', 'Unit',
288 $ jb, j3, one, ab( kv+1, j ), ldab-1,
289 $ work13, ldwork )
290*
291 IF( i2.GT.0 ) THEN
292*
293* Update A23
294*
295 CALL sgemm( 'No transpose', 'No transpose', i2, j3,
296 $ jb, -one, ab( kv+1+jb, j ), ldab-1,
297 $ work13, ldwork, one, ab( 1+jb, j+kv ),
298 $ ldab-1 )
299 END IF
300*
301 IF( i3.GT.0 ) THEN
302*
303* Update A33
304*
305 CALL sgemm( 'No transpose', 'No transpose', i3, j3,
306 $ jb, -one, work31, ldwork, work13,
307 $ ldwork, one, ab( 1+kl, j+kv ), ldab-1 )
308 END IF
309*
310* Copy the lower triangle of A13 back into place
311*
312 DO 150 jj = 1, j3
313 DO 140 ii = jj, jb
314 ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj )
315 140 CONTINUE
316 150 CONTINUE
317 END IF
318 ELSE
319 END IF
320*
321* copy the upper triangle of A31 back into place
322*
323 DO 170 jj = j + jb - 1, j, -1
324*
325* Copy the current column of A31 back into place
326*
327 nw = min( i3, jj-j+1 )
328 IF( nw.GT.0 )
329 $ CALL scopy( nw, work31( 1, jj-j+1 ), 1,
330 $ ab( kv+kl+1-jj+j, jj ), 1 )
331 170 CONTINUE
332 180 CONTINUE
333 END IF
334*
335 RETURN
336*
337* End of SDBTRF
338*
339 END
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine sdbtf2(m, n, kl, ku, ab, ldab, info)
Definition sdbtf2.f:2
subroutine sdbtrf(m, n, kl, ku, ab, ldab, info)
Definition sdbtrf.f:2