LAPACK  3.6.1
LAPACK: Linear Algebra PACKage
subroutine sgbtrf ( integer  M,
integer  N,
integer  KL,
integer  KU,
real, dimension( ldab, * )  AB,
integer  LDAB,
integer, dimension( * )  IPIV,
integer  INFO 
)

SGBTRF

Download SGBTRF + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SGBTRF computes an LU factorization of a real m-by-n band matrix A
 using partial pivoting with row interchanges.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]M
          M is INTEGER
          The number of rows of the matrix A.  M >= 0.
[in]N
          N is INTEGER
          The number of columns of the matrix A.  N >= 0.
[in]KL
          KL is INTEGER
          The number of subdiagonals within the band of A.  KL >= 0.
[in]KU
          KU is INTEGER
          The number of superdiagonals within the band of A.  KU >= 0.
[in,out]AB
          AB is REAL array, dimension (LDAB,N)
          On entry, the matrix A in band storage, in rows KL+1 to
          2*KL+KU+1; rows 1 to KL of the array need not be set.
          The j-th column of A is stored in the j-th column of the
          array AB as follows:
          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)

          On exit, details of the factorization: U is stored as an
          upper triangular band matrix with KL+KU superdiagonals in
          rows 1 to KL+KU+1, and the multipliers used during the
          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
          See below for further details.
[in]LDAB
          LDAB is INTEGER
          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
[out]IPIV
          IPIV is INTEGER array, dimension (min(M,N))
          The pivot indices; for 1 <= i <= min(M,N), row i of the
          matrix was interchanged with row IPIV(i).
[out]INFO
          INFO is INTEGER
          = 0: successful exit
          < 0: if INFO = -i, the i-th argument had an illegal value
          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
               has been completed, but the factor U is exactly
               singular, and division by zero will occur if it is used
               to solve a system of equations.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
November 2011
Further Details:
  The band storage scheme is illustrated by the following example, when
  M = N = 6, KL = 2, KU = 1:

  On entry:                       On exit:

      *    *    *    +    +    +       *    *    *   u14  u25  u36
      *    *    +    +    +    +       *    *   u13  u24  u35  u46
      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *

  Array elements marked * are not used by the routine; elements marked
  + need not be set on entry, but are required by the routine to store
  elements of U because of fill-in resulting from the row interchanges.

Definition at line 146 of file sgbtrf.f.

146 *
147 * -- LAPACK computational routine (version 3.4.0) --
148 * -- LAPACK is a software package provided by Univ. of Tennessee, --
149 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
150 * November 2011
151 *
152 * .. Scalar Arguments ..
153  INTEGER info, kl, ku, ldab, m, n
154 * ..
155 * .. Array Arguments ..
156  INTEGER ipiv( * )
157  REAL ab( ldab, * )
158 * ..
159 *
160 * =====================================================================
161 *
162 * .. Parameters ..
163  REAL one, zero
164  parameter ( one = 1.0e+0, zero = 0.0e+0 )
165  INTEGER nbmax, ldwork
166  parameter ( nbmax = 64, ldwork = nbmax+1 )
167 * ..
168 * .. Local Scalars ..
169  INTEGER i, i2, i3, ii, ip, j, j2, j3, jb, jj, jm, jp,
170  $ ju, k2, km, kv, nb, nw
171  REAL temp
172 * ..
173 * .. Local Arrays ..
174  REAL work13( ldwork, nbmax ),
175  $ work31( ldwork, nbmax )
176 * ..
177 * .. External Functions ..
178  INTEGER ilaenv, isamax
179  EXTERNAL ilaenv, isamax
180 * ..
181 * .. External Subroutines ..
182  EXTERNAL scopy, sgbtf2, sgemm, sger, slaswp, sscal,
183  $ sswap, strsm, xerbla
184 * ..
185 * .. Intrinsic Functions ..
186  INTRINSIC max, min
187 * ..
188 * .. Executable Statements ..
189 *
190 * KV is the number of superdiagonals in the factor U, allowing for
191 * fill-in
192 *
193  kv = ku + kl
194 *
195 * Test the input parameters.
196 *
197  info = 0
198  IF( m.LT.0 ) THEN
199  info = -1
200  ELSE IF( n.LT.0 ) THEN
201  info = -2
202  ELSE IF( kl.LT.0 ) THEN
203  info = -3
204  ELSE IF( ku.LT.0 ) THEN
205  info = -4
206  ELSE IF( ldab.LT.kl+kv+1 ) THEN
207  info = -6
208  END IF
209  IF( info.NE.0 ) THEN
210  CALL xerbla( 'SGBTRF', -info )
211  RETURN
212  END IF
213 *
214 * Quick return if possible
215 *
216  IF( m.EQ.0 .OR. n.EQ.0 )
217  $ RETURN
218 *
219 * Determine the block size for this environment
220 *
221  nb = ilaenv( 1, 'SGBTRF', ' ', m, n, kl, ku )
222 *
223 * The block size must not exceed the limit set by the size of the
224 * local arrays WORK13 and WORK31.
225 *
226  nb = min( nb, nbmax )
227 *
228  IF( nb.LE.1 .OR. nb.GT.kl ) THEN
229 *
230 * Use unblocked code
231 *
232  CALL sgbtf2( m, n, kl, ku, ab, ldab, ipiv, info )
233  ELSE
234 *
235 * Use blocked code
236 *
237 * Zero the superdiagonal elements of the work array WORK13
238 *
239  DO 20 j = 1, nb
240  DO 10 i = 1, j - 1
241  work13( i, j ) = zero
242  10 CONTINUE
243  20 CONTINUE
244 *
245 * Zero the subdiagonal elements of the work array WORK31
246 *
247  DO 40 j = 1, nb
248  DO 30 i = j + 1, nb
249  work31( i, j ) = zero
250  30 CONTINUE
251  40 CONTINUE
252 *
253 * Gaussian elimination with partial pivoting
254 *
255 * Set fill-in elements in columns KU+2 to KV to zero
256 *
257  DO 60 j = ku + 2, min( kv, n )
258  DO 50 i = kv - j + 2, kl
259  ab( i, j ) = zero
260  50 CONTINUE
261  60 CONTINUE
262 *
263 * JU is the index of the last column affected by the current
264 * stage of the factorization
265 *
266  ju = 1
267 *
268  DO 180 j = 1, min( m, n ), nb
269  jb = min( nb, min( m, n )-j+1 )
270 *
271 * The active part of the matrix is partitioned
272 *
273 * A11 A12 A13
274 * A21 A22 A23
275 * A31 A32 A33
276 *
277 * Here A11, A21 and A31 denote the current block of JB columns
278 * which is about to be factorized. The number of rows in the
279 * partitioning are JB, I2, I3 respectively, and the numbers
280 * of columns are JB, J2, J3. The superdiagonal elements of A13
281 * and the subdiagonal elements of A31 lie outside the band.
282 *
283  i2 = min( kl-jb, m-j-jb+1 )
284  i3 = min( jb, m-j-kl+1 )
285 *
286 * J2 and J3 are computed after JU has been updated.
287 *
288 * Factorize the current block of JB columns
289 *
290  DO 80 jj = j, j + jb - 1
291 *
292 * Set fill-in elements in column JJ+KV to zero
293 *
294  IF( jj+kv.LE.n ) THEN
295  DO 70 i = 1, kl
296  ab( i, jj+kv ) = zero
297  70 CONTINUE
298  END IF
299 *
300 * Find pivot and test for singularity. KM is the number of
301 * subdiagonal elements in the current column.
302 *
303  km = min( kl, m-jj )
304  jp = isamax( km+1, ab( kv+1, jj ), 1 )
305  ipiv( jj ) = jp + jj - j
306  IF( ab( kv+jp, jj ).NE.zero ) THEN
307  ju = max( ju, min( jj+ku+jp-1, n ) )
308  IF( jp.NE.1 ) THEN
309 *
310 * Apply interchange to columns J to J+JB-1
311 *
312  IF( jp+jj-1.LT.j+kl ) THEN
313 *
314  CALL sswap( jb, ab( kv+1+jj-j, j ), ldab-1,
315  $ ab( kv+jp+jj-j, j ), ldab-1 )
316  ELSE
317 *
318 * The interchange affects columns J to JJ-1 of A31
319 * which are stored in the work array WORK31
320 *
321  CALL sswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
322  $ work31( jp+jj-j-kl, 1 ), ldwork )
323  CALL sswap( j+jb-jj, ab( kv+1, jj ), ldab-1,
324  $ ab( kv+jp, jj ), ldab-1 )
325  END IF
326  END IF
327 *
328 * Compute multipliers
329 *
330  CALL sscal( km, one / ab( kv+1, jj ), ab( kv+2, jj ),
331  $ 1 )
332 *
333 * Update trailing submatrix within the band and within
334 * the current block. JM is the index of the last column
335 * which needs to be updated.
336 *
337  jm = min( ju, j+jb-1 )
338  IF( jm.GT.jj )
339  $ CALL sger( km, jm-jj, -one, ab( kv+2, jj ), 1,
340  $ ab( kv, jj+1 ), ldab-1,
341  $ ab( kv+1, jj+1 ), ldab-1 )
342  ELSE
343 *
344 * If pivot is zero, set INFO to the index of the pivot
345 * unless a zero pivot has already been found.
346 *
347  IF( info.EQ.0 )
348  $ info = jj
349  END IF
350 *
351 * Copy current column of A31 into the work array WORK31
352 *
353  nw = min( jj-j+1, i3 )
354  IF( nw.GT.0 )
355  $ CALL scopy( nw, ab( kv+kl+1-jj+j, jj ), 1,
356  $ work31( 1, jj-j+1 ), 1 )
357  80 CONTINUE
358  IF( j+jb.LE.n ) THEN
359 *
360 * Apply the row interchanges to the other blocks.
361 *
362  j2 = min( ju-j+1, kv ) - jb
363  j3 = max( 0, ju-j-kv+1 )
364 *
365 * Use SLASWP to apply the row interchanges to A12, A22, and
366 * A32.
367 *
368  CALL slaswp( j2, ab( kv+1-jb, j+jb ), ldab-1, 1, jb,
369  $ ipiv( j ), 1 )
370 *
371 * Adjust the pivot indices.
372 *
373  DO 90 i = j, j + jb - 1
374  ipiv( i ) = ipiv( i ) + j - 1
375  90 CONTINUE
376 *
377 * Apply the row interchanges to A13, A23, and A33
378 * columnwise.
379 *
380  k2 = j - 1 + jb + j2
381  DO 110 i = 1, j3
382  jj = k2 + i
383  DO 100 ii = j + i - 1, j + jb - 1
384  ip = ipiv( ii )
385  IF( ip.NE.ii ) THEN
386  temp = ab( kv+1+ii-jj, jj )
387  ab( kv+1+ii-jj, jj ) = ab( kv+1+ip-jj, jj )
388  ab( kv+1+ip-jj, jj ) = temp
389  END IF
390  100 CONTINUE
391  110 CONTINUE
392 *
393 * Update the relevant part of the trailing submatrix
394 *
395  IF( j2.GT.0 ) THEN
396 *
397 * Update A12
398 *
399  CALL strsm( 'Left', 'Lower', 'No transpose', 'Unit',
400  $ jb, j2, one, ab( kv+1, j ), ldab-1,
401  $ ab( kv+1-jb, j+jb ), ldab-1 )
402 *
403  IF( i2.GT.0 ) THEN
404 *
405 * Update A22
406 *
407  CALL sgemm( 'No transpose', 'No transpose', i2, j2,
408  $ jb, -one, ab( kv+1+jb, j ), ldab-1,
409  $ ab( kv+1-jb, j+jb ), ldab-1, one,
410  $ ab( kv+1, j+jb ), ldab-1 )
411  END IF
412 *
413  IF( i3.GT.0 ) THEN
414 *
415 * Update A32
416 *
417  CALL sgemm( 'No transpose', 'No transpose', i3, j2,
418  $ jb, -one, work31, ldwork,
419  $ ab( kv+1-jb, j+jb ), ldab-1, one,
420  $ ab( kv+kl+1-jb, j+jb ), ldab-1 )
421  END IF
422  END IF
423 *
424  IF( j3.GT.0 ) THEN
425 *
426 * Copy the lower triangle of A13 into the work array
427 * WORK13
428 *
429  DO 130 jj = 1, j3
430  DO 120 ii = jj, jb
431  work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 )
432  120 CONTINUE
433  130 CONTINUE
434 *
435 * Update A13 in the work array
436 *
437  CALL strsm( 'Left', 'Lower', 'No transpose', 'Unit',
438  $ jb, j3, one, ab( kv+1, j ), ldab-1,
439  $ work13, ldwork )
440 *
441  IF( i2.GT.0 ) THEN
442 *
443 * Update A23
444 *
445  CALL sgemm( 'No transpose', 'No transpose', i2, j3,
446  $ jb, -one, ab( kv+1+jb, j ), ldab-1,
447  $ work13, ldwork, one, ab( 1+jb, j+kv ),
448  $ ldab-1 )
449  END IF
450 *
451  IF( i3.GT.0 ) THEN
452 *
453 * Update A33
454 *
455  CALL sgemm( 'No transpose', 'No transpose', i3, j3,
456  $ jb, -one, work31, ldwork, work13,
457  $ ldwork, one, ab( 1+kl, j+kv ), ldab-1 )
458  END IF
459 *
460 * Copy the lower triangle of A13 back into place
461 *
462  DO 150 jj = 1, j3
463  DO 140 ii = jj, jb
464  ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj )
465  140 CONTINUE
466  150 CONTINUE
467  END IF
468  ELSE
469 *
470 * Adjust the pivot indices.
471 *
472  DO 160 i = j, j + jb - 1
473  ipiv( i ) = ipiv( i ) + j - 1
474  160 CONTINUE
475  END IF
476 *
477 * Partially undo the interchanges in the current block to
478 * restore the upper triangular form of A31 and copy the upper
479 * triangle of A31 back into place
480 *
481  DO 170 jj = j + jb - 1, j, -1
482  jp = ipiv( jj ) - jj + 1
483  IF( jp.NE.1 ) THEN
484 *
485 * Apply interchange to columns J to JJ-1
486 *
487  IF( jp+jj-1.LT.j+kl ) THEN
488 *
489 * The interchange does not affect A31
490 *
491  CALL sswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
492  $ ab( kv+jp+jj-j, j ), ldab-1 )
493  ELSE
494 *
495 * The interchange does affect A31
496 *
497  CALL sswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
498  $ work31( jp+jj-j-kl, 1 ), ldwork )
499  END IF
500  END IF
501 *
502 * Copy the current column of A31 back into place
503 *
504  nw = min( i3, jj-j+1 )
505  IF( nw.GT.0 )
506  $ CALL scopy( nw, work31( 1, jj-j+1 ), 1,
507  $ ab( kv+kl+1-jj+j, jj ), 1 )
508  170 CONTINUE
509  180 CONTINUE
510  END IF
511 *
512  RETURN
513 *
514 * End of SGBTRF
515 *
subroutine sger(M, N, ALPHA, X, INCX, Y, INCY, A, LDA)
SGER
Definition: sger.f:132
subroutine strsm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
STRSM
Definition: strsm.f:183
integer function isamax(N, SX, INCX)
ISAMAX
Definition: isamax.f:53
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:189
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine slaswp(N, A, LDA, K1, K2, IPIV, INCX)
SLASWP performs a series of row interchanges on a general rectangular matrix.
Definition: slaswp.f:116
subroutine sgbtf2(M, N, KL, KU, AB, LDAB, IPIV, INFO)
SGBTF2 computes the LU factorization of a general band matrix using the unblocked version of the algo...
Definition: sgbtf2.f:147
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
Definition: tstiee.f:83
subroutine sscal(N, SA, SX, INCX)
SSCAL
Definition: sscal.f:55
subroutine sswap(N, SX, INCX, SY, INCY)
SSWAP
Definition: sswap.f:53
subroutine scopy(N, SX, INCX, SY, INCY)
SCOPY
Definition: scopy.f:53

Here is the call graph for this function:

Here is the caller graph for this function: