LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ sgbtrf()

subroutine sgbtrf ( integer m,
integer n,
integer kl,
integer ku,
real, dimension( ldab, * ) ab,
integer ldab,
integer, dimension( * ) ipiv,
integer info )

SGBTRF

Download SGBTRF + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> SGBTRF computes an LU factorization of a real m-by-n band matrix A
!> using partial pivoting with row interchanges.
!>
!> This is the blocked version of the algorithm, calling Level 3 BLAS.
!> 
Parameters
[in]M
!>          M is INTEGER
!>          The number of rows of the matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the matrix A.  N >= 0.
!> 
[in]KL
!>          KL is INTEGER
!>          The number of subdiagonals within the band of A.  KL >= 0.
!> 
[in]KU
!>          KU is INTEGER
!>          The number of superdiagonals within the band of A.  KU >= 0.
!> 
[in,out]AB
!>          AB is REAL array, dimension (LDAB,N)
!>          On entry, the matrix A in band storage, in rows KL+1 to
!>          2*KL+KU+1; rows 1 to KL of the array need not be set.
!>          The j-th column of A is stored in the j-th column of the
!>          array AB as follows:
!>          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
!>
!>          On exit, details of the factorization: U is stored as an
!>          upper triangular band matrix with KL+KU superdiagonals in
!>          rows 1 to KL+KU+1, and the multipliers used during the
!>          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
!>          See below for further details.
!> 
[in]LDAB
!>          LDAB is INTEGER
!>          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
!> 
[out]IPIV
!>          IPIV is INTEGER array, dimension (min(M,N))
!>          The pivot indices; for 1 <= i <= min(M,N), row i of the
!>          matrix was interchanged with row IPIV(i).
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0: successful exit
!>          < 0: if INFO = -i, the i-th argument had an illegal value
!>          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
!>               has been completed, but the factor U is exactly
!>               singular, and division by zero will occur if it is used
!>               to solve a system of equations.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>
!>  The band storage scheme is illustrated by the following example, when
!>  M = N = 6, KL = 2, KU = 1:
!>
!>  On entry:                       On exit:
!>
!>      *    *    *    +    +    +       *    *    *   u14  u25  u36
!>      *    *    +    +    +    +       *    *   u13  u24  u35  u46
!>      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
!>     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
!>     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
!>     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
!>
!>  Array elements marked * are not used by the routine; elements marked
!>  + need not be set on entry, but are required by the routine to store
!>  elements of U because of fill-in resulting from the row interchanges.
!> 

Definition at line 141 of file sgbtrf.f.

142*
143* -- LAPACK computational routine --
144* -- LAPACK is a software package provided by Univ. of Tennessee, --
145* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
146*
147* .. Scalar Arguments ..
148 INTEGER INFO, KL, KU, LDAB, M, N
149* ..
150* .. Array Arguments ..
151 INTEGER IPIV( * )
152 REAL AB( LDAB, * )
153* ..
154*
155* =====================================================================
156*
157* .. Parameters ..
158 REAL ONE, ZERO
159 parameter( one = 1.0e+0, zero = 0.0e+0 )
160 INTEGER NBMAX, LDWORK
161 parameter( nbmax = 64, ldwork = nbmax+1 )
162* ..
163* .. Local Scalars ..
164 INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
165 $ JU, K2, KM, KV, NB, NW
166 REAL TEMP
167* ..
168* .. Local Arrays ..
169 REAL WORK13( LDWORK, NBMAX ),
170 $ WORK31( LDWORK, NBMAX )
171* ..
172* .. External Functions ..
173 INTEGER ILAENV, ISAMAX
174 EXTERNAL ilaenv, isamax
175* ..
176* .. External Subroutines ..
177 EXTERNAL scopy, sgbtf2, sgemm, sger, slaswp,
178 $ sscal,
179 $ sswap, strsm, xerbla
180* ..
181* .. Intrinsic Functions ..
182 INTRINSIC max, min
183* ..
184* .. Executable Statements ..
185*
186* KV is the number of superdiagonals in the factor U, allowing for
187* fill-in
188*
189 kv = ku + kl
190*
191* Test the input parameters.
192*
193 info = 0
194 IF( m.LT.0 ) THEN
195 info = -1
196 ELSE IF( n.LT.0 ) THEN
197 info = -2
198 ELSE IF( kl.LT.0 ) THEN
199 info = -3
200 ELSE IF( ku.LT.0 ) THEN
201 info = -4
202 ELSE IF( ldab.LT.kl+kv+1 ) THEN
203 info = -6
204 END IF
205 IF( info.NE.0 ) THEN
206 CALL xerbla( 'SGBTRF', -info )
207 RETURN
208 END IF
209*
210* Quick return if possible
211*
212 IF( m.EQ.0 .OR. n.EQ.0 )
213 $ RETURN
214*
215* Determine the block size for this environment
216*
217 nb = ilaenv( 1, 'SGBTRF', ' ', m, n, kl, ku )
218*
219* The block size must not exceed the limit set by the size of the
220* local arrays WORK13 and WORK31.
221*
222 nb = min( nb, nbmax )
223*
224 IF( nb.LE.1 .OR. nb.GT.kl ) THEN
225*
226* Use unblocked code
227*
228 CALL sgbtf2( m, n, kl, ku, ab, ldab, ipiv, info )
229 ELSE
230*
231* Use blocked code
232*
233* Zero the superdiagonal elements of the work array WORK13
234*
235 DO 20 j = 1, nb
236 DO 10 i = 1, j - 1
237 work13( i, j ) = zero
238 10 CONTINUE
239 20 CONTINUE
240*
241* Zero the subdiagonal elements of the work array WORK31
242*
243 DO 40 j = 1, nb
244 DO 30 i = j + 1, nb
245 work31( i, j ) = zero
246 30 CONTINUE
247 40 CONTINUE
248*
249* Gaussian elimination with partial pivoting
250*
251* Set fill-in elements in columns KU+2 to KV to zero
252*
253 DO 60 j = ku + 2, min( kv, n )
254 DO 50 i = kv - j + 2, kl
255 ab( i, j ) = zero
256 50 CONTINUE
257 60 CONTINUE
258*
259* JU is the index of the last column affected by the current
260* stage of the factorization
261*
262 ju = 1
263*
264 DO 180 j = 1, min( m, n ), nb
265 jb = min( nb, min( m, n )-j+1 )
266*
267* The active part of the matrix is partitioned
268*
269* A11 A12 A13
270* A21 A22 A23
271* A31 A32 A33
272*
273* Here A11, A21 and A31 denote the current block of JB columns
274* which is about to be factorized. The number of rows in the
275* partitioning are JB, I2, I3 respectively, and the numbers
276* of columns are JB, J2, J3. The superdiagonal elements of A13
277* and the subdiagonal elements of A31 lie outside the band.
278*
279 i2 = min( kl-jb, m-j-jb+1 )
280 i3 = min( jb, m-j-kl+1 )
281*
282* J2 and J3 are computed after JU has been updated.
283*
284* Factorize the current block of JB columns
285*
286 DO 80 jj = j, j + jb - 1
287*
288* Set fill-in elements in column JJ+KV to zero
289*
290 IF( jj+kv.LE.n ) THEN
291 DO 70 i = 1, kl
292 ab( i, jj+kv ) = zero
293 70 CONTINUE
294 END IF
295*
296* Find pivot and test for singularity. KM is the number of
297* subdiagonal elements in the current column.
298*
299 km = min( kl, m-jj )
300 jp = isamax( km+1, ab( kv+1, jj ), 1 )
301 ipiv( jj ) = jp + jj - j
302 IF( ab( kv+jp, jj ).NE.zero ) THEN
303 ju = max( ju, min( jj+ku+jp-1, n ) )
304 IF( jp.NE.1 ) THEN
305*
306* Apply interchange to columns J to J+JB-1
307*
308 IF( jp+jj-1.LT.j+kl ) THEN
309*
310 CALL sswap( jb, ab( kv+1+jj-j, j ), ldab-1,
311 $ ab( kv+jp+jj-j, j ), ldab-1 )
312 ELSE
313*
314* The interchange affects columns J to JJ-1 of A31
315* which are stored in the work array WORK31
316*
317 CALL sswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
318 $ work31( jp+jj-j-kl, 1 ), ldwork )
319 CALL sswap( j+jb-jj, ab( kv+1, jj ), ldab-1,
320 $ ab( kv+jp, jj ), ldab-1 )
321 END IF
322 END IF
323*
324* Compute multipliers
325*
326 CALL sscal( km, one / ab( kv+1, jj ), ab( kv+2,
327 $ jj ),
328 $ 1 )
329*
330* Update trailing submatrix within the band and within
331* the current block. JM is the index of the last column
332* which needs to be updated.
333*
334 jm = min( ju, j+jb-1 )
335 IF( jm.GT.jj )
336 $ CALL sger( km, jm-jj, -one, ab( kv+2, jj ), 1,
337 $ ab( kv, jj+1 ), ldab-1,
338 $ ab( kv+1, jj+1 ), ldab-1 )
339 ELSE
340*
341* If pivot is zero, set INFO to the index of the pivot
342* unless a zero pivot has already been found.
343*
344 IF( info.EQ.0 )
345 $ info = jj
346 END IF
347*
348* Copy current column of A31 into the work array WORK31
349*
350 nw = min( jj-j+1, i3 )
351 IF( nw.GT.0 )
352 $ CALL scopy( nw, ab( kv+kl+1-jj+j, jj ), 1,
353 $ work31( 1, jj-j+1 ), 1 )
354 80 CONTINUE
355 IF( j+jb.LE.n ) THEN
356*
357* Apply the row interchanges to the other blocks.
358*
359 j2 = min( ju-j+1, kv ) - jb
360 j3 = max( 0, ju-j-kv+1 )
361*
362* Use SLASWP to apply the row interchanges to A12, A22, and
363* A32.
364*
365 CALL slaswp( j2, ab( kv+1-jb, j+jb ), ldab-1, 1, jb,
366 $ ipiv( j ), 1 )
367*
368* Adjust the pivot indices.
369*
370 DO 90 i = j, j + jb - 1
371 ipiv( i ) = ipiv( i ) + j - 1
372 90 CONTINUE
373*
374* Apply the row interchanges to A13, A23, and A33
375* columnwise.
376*
377 k2 = j - 1 + jb + j2
378 DO 110 i = 1, j3
379 jj = k2 + i
380 DO 100 ii = j + i - 1, j + jb - 1
381 ip = ipiv( ii )
382 IF( ip.NE.ii ) THEN
383 temp = ab( kv+1+ii-jj, jj )
384 ab( kv+1+ii-jj, jj ) = ab( kv+1+ip-jj, jj )
385 ab( kv+1+ip-jj, jj ) = temp
386 END IF
387 100 CONTINUE
388 110 CONTINUE
389*
390* Update the relevant part of the trailing submatrix
391*
392 IF( j2.GT.0 ) THEN
393*
394* Update A12
395*
396 CALL strsm( 'Left', 'Lower', 'No transpose',
397 $ 'Unit',
398 $ jb, j2, one, ab( kv+1, j ), ldab-1,
399 $ ab( kv+1-jb, j+jb ), ldab-1 )
400*
401 IF( i2.GT.0 ) THEN
402*
403* Update A22
404*
405 CALL sgemm( 'No transpose', 'No transpose', i2,
406 $ j2,
407 $ jb, -one, ab( kv+1+jb, j ), ldab-1,
408 $ ab( kv+1-jb, j+jb ), ldab-1, one,
409 $ ab( kv+1, j+jb ), ldab-1 )
410 END IF
411*
412 IF( i3.GT.0 ) THEN
413*
414* Update A32
415*
416 CALL sgemm( 'No transpose', 'No transpose', i3,
417 $ j2,
418 $ jb, -one, work31, ldwork,
419 $ ab( kv+1-jb, j+jb ), ldab-1, one,
420 $ ab( kv+kl+1-jb, j+jb ), ldab-1 )
421 END IF
422 END IF
423*
424 IF( j3.GT.0 ) THEN
425*
426* Copy the lower triangle of A13 into the work array
427* WORK13
428*
429 DO 130 jj = 1, j3
430 DO 120 ii = jj, jb
431 work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 )
432 120 CONTINUE
433 130 CONTINUE
434*
435* Update A13 in the work array
436*
437 CALL strsm( 'Left', 'Lower', 'No transpose',
438 $ 'Unit',
439 $ jb, j3, one, ab( kv+1, j ), ldab-1,
440 $ work13, ldwork )
441*
442 IF( i2.GT.0 ) THEN
443*
444* Update A23
445*
446 CALL sgemm( 'No transpose', 'No transpose', i2,
447 $ j3,
448 $ jb, -one, ab( kv+1+jb, j ), ldab-1,
449 $ work13, ldwork, one, ab( 1+jb, j+kv ),
450 $ ldab-1 )
451 END IF
452*
453 IF( i3.GT.0 ) THEN
454*
455* Update A33
456*
457 CALL sgemm( 'No transpose', 'No transpose', i3,
458 $ j3,
459 $ jb, -one, work31, ldwork, work13,
460 $ ldwork, one, ab( 1+kl, j+kv ), ldab-1 )
461 END IF
462*
463* Copy the lower triangle of A13 back into place
464*
465 DO 150 jj = 1, j3
466 DO 140 ii = jj, jb
467 ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj )
468 140 CONTINUE
469 150 CONTINUE
470 END IF
471 ELSE
472*
473* Adjust the pivot indices.
474*
475 DO 160 i = j, j + jb - 1
476 ipiv( i ) = ipiv( i ) + j - 1
477 160 CONTINUE
478 END IF
479*
480* Partially undo the interchanges in the current block to
481* restore the upper triangular form of A31 and copy the upper
482* triangle of A31 back into place
483*
484 DO 170 jj = j + jb - 1, j, -1
485 jp = ipiv( jj ) - jj + 1
486 IF( jp.NE.1 ) THEN
487*
488* Apply interchange to columns J to JJ-1
489*
490 IF( jp+jj-1.LT.j+kl ) THEN
491*
492* The interchange does not affect A31
493*
494 CALL sswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
495 $ ab( kv+jp+jj-j, j ), ldab-1 )
496 ELSE
497*
498* The interchange does affect A31
499*
500 CALL sswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
501 $ work31( jp+jj-j-kl, 1 ), ldwork )
502 END IF
503 END IF
504*
505* Copy the current column of A31 back into place
506*
507 nw = min( i3, jj-j+1 )
508 IF( nw.GT.0 )
509 $ CALL scopy( nw, work31( 1, jj-j+1 ), 1,
510 $ ab( kv+kl+1-jj+j, jj ), 1 )
511 170 CONTINUE
512 180 CONTINUE
513 END IF
514*
515 RETURN
516*
517* End of SGBTRF
518*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine scopy(n, sx, incx, sy, incy)
SCOPY
Definition scopy.f:82
subroutine sgbtf2(m, n, kl, ku, ab, ldab, ipiv, info)
SGBTF2 computes the LU factorization of a general band matrix using the unblocked version of the algo...
Definition sgbtf2.f:143
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:188
subroutine sger(m, n, alpha, x, incx, y, incy, a, lda)
SGER
Definition sger.f:130
integer function isamax(n, sx, incx)
ISAMAX
Definition isamax.f:71
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:160
subroutine slaswp(n, a, lda, k1, k2, ipiv, incx)
SLASWP performs a series of row interchanges on a general rectangular matrix.
Definition slaswp.f:113
subroutine sscal(n, sa, sx, incx)
SSCAL
Definition sscal.f:79
subroutine sswap(n, sx, incx, sy, incy)
SSWAP
Definition sswap.f:82
subroutine strsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
STRSM
Definition strsm.f:181
Here is the call graph for this function:
Here is the caller graph for this function: