◆ sgbtrf()

subroutine sgbtrf	(	integer	m,
		integer	n,
		integer	kl,
		integer	ku,
		real, dimension( ldab, * )	ab,
		integer	ldab,
		integer, dimension( * )	ipiv,
		integer	info )

SGBTRF

Download SGBTRF + dependencies [TGZ] [ZIP] [TXT]

Purpose:

!>
!> SGBTRF computes an LU factorization of a real m-by-n band matrix A
!> using partial pivoting with row interchanges.
!>
!> This is the blocked version of the algorithm, calling Level 3 BLAS.
!>

Parameters

[in]	M	!> M is INTEGER !> The number of rows of the matrix A. M >= 0. !>
[in]	N	!> N is INTEGER !> The number of columns of the matrix A. N >= 0. !>
[in]	KL	!> KL is INTEGER !> The number of subdiagonals within the band of A. KL >= 0. !>
[in]	KU	!> KU is INTEGER !> The number of superdiagonals within the band of A. KU >= 0. !>
[in,out]	AB	!> AB is REAL array, dimension (LDAB,N) !> On entry, the matrix A in band storage, in rows KL+1 to !> 2KL+KU+1; rows 1 to KL of the array need not be set. !> The j-th column of A is stored in the j-th column of the !> array AB as follows: !> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) !> !> On exit, details of the factorization: U is stored as an !> upper triangular band matrix with KL+KU superdiagonals in !> rows 1 to KL+KU+1, and the multipliers used during the !> factorization are stored in rows KL+KU+2 to 2KL+KU+1. !> See below for further details. !>
[in]	LDAB	!> LDAB is INTEGER !> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. !>
[out]	IPIV	!> IPIV is INTEGER array, dimension (min(M,N)) !> The pivot indices; for 1 <= i <= min(M,N), row i of the !> matrix was interchanged with row IPIV(i). !>
[out]	INFO	!> INFO is INTEGER !> = 0: successful exit !> < 0: if INFO = -i, the i-th argument had an illegal value !> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization !> has been completed, but the factor U is exactly !> singular, and division by zero will occur if it is used !> to solve a system of equations. !>

Author: Univ. of Tennessee; Univ. of California Berkeley; Univ. of Colorado Denver; NAG Ltd.

Further Details:

!>
!>  The band storage scheme is illustrated by the following example, when
!>  M = N = 6, KL = 2, KU = 1:
!>
!>  On entry:                       On exit:
!>
!>      *    *    *    +    +    +       *    *    *   u14  u25  u36
!>      *    *    +    +    +    +       *    *   u13  u24  u35  u46
!>      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
!>     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
!>     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
!>     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
!>
!>  Array elements marked * are not used by the routine; elements marked
!>  + need not be set on entry, but are required by the routine to store
!>  elements of U because of fill-in resulting from the row interchanges.
!>

Definition at line 141 of file sgbtrf.f.

*
*  -- LAPACK computational routine --
*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
*     .. Scalar Arguments ..
      INTEGER            INFO, KL, KU, LDAB, M, N
*     ..
*     .. Array Arguments ..
      INTEGER            IPIV( * )
      REAL               AB( LDAB, * )
*     ..
*
*  =====================================================================
*
*     .. Parameters ..
      REAL               ONE, ZERO
      parameter( one = 1.0e+0, zero = 0.0e+0 )
      INTEGER            NBMAX, LDWORK
      parameter( nbmax = 64, ldwork = nbmax+1 )
*     ..
*     .. Local Scalars ..
      INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
     $                   JU, K2, KM, KV, NB, NW
      REAL               TEMP
*     ..
*     .. Local Arrays ..
      REAL               WORK13( LDWORK, NBMAX ),
     $                   WORK31( LDWORK, NBMAX )
*     ..
*     .. External Functions ..
      INTEGER            ILAENV, ISAMAX
      EXTERNAL           ilaenv, isamax
*     ..
*     .. External Subroutines ..
      EXTERNAL           scopy, sgbtf2, sgemm, sger, slaswp,
     $                   sscal,
     $                   sswap, strsm, xerbla
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          max, min
*     ..
*     .. Executable Statements ..
*
*     KV is the number of superdiagonals in the factor U, allowing for
*     fill-in
*
      kv = ku + kl
*
*     Test the input parameters.
*
      info = 0
      IF( m.LT.0 ) THEN
         info = -1
      ELSE IF( n.LT.0 ) THEN
         info = -2
      ELSE IF( kl.LT.0 ) THEN
         info = -3
      ELSE IF( ku.LT.0 ) THEN
         info = -4
      ELSE IF( ldab.LT.kl+kv+1 ) THEN
         info = -6
      END IF
      IF( info.NE.0 ) THEN
         CALL xerbla( 'SGBTRF', -info )
         RETURN
      END IF
*
*     Quick return if possible
*
      IF( m.EQ.0 .OR. n.EQ.0 )
     $   RETURN
*
*     Determine the block size for this environment
*
      nb = ilaenv( 1, 'SGBTRF', ' ', m, n, kl, ku )
*
*     The block size must not exceed the limit set by the size of the
*     local arrays WORK13 and WORK31.
*
      nb = min( nb, nbmax )
*
      IF( nb.LE.1 .OR. nb.GT.kl ) THEN
*
*        Use unblocked code
*
         CALL sgbtf2( m, n, kl, ku, ab, ldab, ipiv, info )
      ELSE
*
*        Use blocked code
*
*        Zero the superdiagonal elements of the work array WORK13
*
         DO 20 j = 1, nb
            DO 10 i = 1, j - 1
               work13( i, j ) = zero
   10       CONTINUE
   20    CONTINUE
*
*        Zero the subdiagonal elements of the work array WORK31
*
         DO 40 j = 1, nb
            DO 30 i = j + 1, nb
               work31( i, j ) = zero
   30       CONTINUE
   40    CONTINUE
*
*        Gaussian elimination with partial pivoting
*
*        Set fill-in elements in columns KU+2 to KV to zero
*
         DO 60 j = ku + 2, min( kv, n )
            DO 50 i = kv - j + 2, kl
               ab( i, j ) = zero
   50       CONTINUE
   60    CONTINUE
*
*        JU is the index of the last column affected by the current
*        stage of the factorization
*
         ju = 1
*
         DO 180 j = 1, min( m, n ), nb
            jb = min( nb, min( m, n )-j+1 )
*
*           The active part of the matrix is partitioned
*
*              A11   A12   A13
*              A21   A22   A23
*              A31   A32   A33
*
*           Here A11, A21 and A31 denote the current block of JB columns
*           which is about to be factorized. The number of rows in the
*           partitioning are JB, I2, I3 respectively, and the numbers
*           of columns are JB, J2, J3. The superdiagonal elements of A13
*           and the subdiagonal elements of A31 lie outside the band.
*
            i2 = min( kl-jb, m-j-jb+1 )
            i3 = min( jb, m-j-kl+1 )
*
*           J2 and J3 are computed after JU has been updated.
*
*           Factorize the current block of JB columns
*
            DO 80 jj = j, j + jb - 1
*
*              Set fill-in elements in column JJ+KV to zero
*
               IF( jj+kv.LE.n ) THEN
                  DO 70 i = 1, kl
                     ab( i, jj+kv ) = zero
   70             CONTINUE
               END IF
*
*              Find pivot and test for singularity. KM is the number of
*              subdiagonal elements in the current column.
*
               km = min( kl, m-jj )
               jp = isamax( km+1, ab( kv+1, jj ), 1 )
               ipiv( jj ) = jp + jj - j
               IF( ab( kv+jp, jj ).NE.zero ) THEN
                  ju = max( ju, min( jj+ku+jp-1, n ) )
                  IF( jp.NE.1 ) THEN
*
*                    Apply interchange to columns J to J+JB-1
*
                     IF( jp+jj-1.LT.j+kl ) THEN
*
                        CALL sswap( jb, ab( kv+1+jj-j, j ), ldab-1,
     $                              ab( kv+jp+jj-j, j ), ldab-1 )
                     ELSE
*
*                       The interchange affects columns J to JJ-1 of A31
*                       which are stored in the work array WORK31
*
                        CALL sswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
     $                              work31( jp+jj-j-kl, 1 ), ldwork )
                        CALL sswap( j+jb-jj, ab( kv+1, jj ), ldab-1,
     $                              ab( kv+jp, jj ), ldab-1 )
                     END IF
                  END IF
*
*                 Compute multipliers
*
                  CALL sscal( km, one / ab( kv+1, jj ), ab( kv+2,
     $                        jj ),
     $                        1 )
*
*                 Update trailing submatrix within the band and within
*                 the current block. JM is the index of the last column
*                 which needs to be updated.
*
                  jm = min( ju, j+jb-1 )
                  IF( jm.GT.jj )
     $               CALL sger( km, jm-jj, -one, ab( kv+2, jj ), 1,
     $                          ab( kv, jj+1 ), ldab-1,
     $                          ab( kv+1, jj+1 ), ldab-1 )
               ELSE
*
*                 If pivot is zero, set INFO to the index of the pivot
*                 unless a zero pivot has already been found.
*
                  IF( info.EQ.0 )
     $               info = jj
               END IF
*
*              Copy current column of A31 into the work array WORK31
*
               nw = min( jj-j+1, i3 )
               IF( nw.GT.0 )
     $            CALL scopy( nw, ab( kv+kl+1-jj+j, jj ), 1,
     $                        work31( 1, jj-j+1 ), 1 )
   80       CONTINUE
            IF( j+jb.LE.n ) THEN
*
*              Apply the row interchanges to the other blocks.
*
               j2 = min( ju-j+1, kv ) - jb
               j3 = max( 0, ju-j-kv+1 )
*
*              Use SLASWP to apply the row interchanges to A12, A22, and
*              A32.
*
               CALL slaswp( j2, ab( kv+1-jb, j+jb ), ldab-1, 1, jb,
     $                      ipiv( j ), 1 )
*
*              Adjust the pivot indices.
*
               DO 90 i = j, j + jb - 1
                  ipiv( i ) = ipiv( i ) + j - 1
   90          CONTINUE
*
*              Apply the row interchanges to A13, A23, and A33
*              columnwise.
*
               k2 = j - 1 + jb + j2
               DO 110 i = 1, j3
                  jj = k2 + i
                  DO 100 ii = j + i - 1, j + jb - 1
                     ip = ipiv( ii )
                     IF( ip.NE.ii ) THEN
                        temp = ab( kv+1+ii-jj, jj )
                        ab( kv+1+ii-jj, jj ) = ab( kv+1+ip-jj, jj )
                        ab( kv+1+ip-jj, jj ) = temp
                     END IF
  100             CONTINUE
  110          CONTINUE
*
*              Update the relevant part of the trailing submatrix
*
               IF( j2.GT.0 ) THEN
*
*                 Update A12
*
                  CALL strsm( 'Left', 'Lower', 'No transpose',
     $                        'Unit',
     $                        jb, j2, one, ab( kv+1, j ), ldab-1,
     $                        ab( kv+1-jb, j+jb ), ldab-1 )
*
                  IF( i2.GT.0 ) THEN
*
*                    Update A22
*
                     CALL sgemm( 'No transpose', 'No transpose', i2,
     $                           j2,
     $                           jb, -one, ab( kv+1+jb, j ), ldab-1,
     $                           ab( kv+1-jb, j+jb ), ldab-1, one,
     $                           ab( kv+1, j+jb ), ldab-1 )
                  END IF
*
                  IF( i3.GT.0 ) THEN
*
*                    Update A32
*
                     CALL sgemm( 'No transpose', 'No transpose', i3,
     $                           j2,
     $                           jb, -one, work31, ldwork,
     $                           ab( kv+1-jb, j+jb ), ldab-1, one,
     $                           ab( kv+kl+1-jb, j+jb ), ldab-1 )
                  END IF
               END IF
*
               IF( j3.GT.0 ) THEN
*
*                 Copy the lower triangle of A13 into the work array
*                 WORK13
*
                  DO 130 jj = 1, j3
                     DO 120 ii = jj, jb
                        work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 )
  120                CONTINUE
  130             CONTINUE
*
*                 Update A13 in the work array
*
                  CALL strsm( 'Left', 'Lower', 'No transpose',
     $                        'Unit',
     $                        jb, j3, one, ab( kv+1, j ), ldab-1,
     $                        work13, ldwork )
*
                  IF( i2.GT.0 ) THEN
*
*                    Update A23
*
                     CALL sgemm( 'No transpose', 'No transpose', i2,
     $                           j3,
     $                           jb, -one, ab( kv+1+jb, j ), ldab-1,
     $                           work13, ldwork, one, ab( 1+jb, j+kv ),
     $                           ldab-1 )
                  END IF
*
                  IF( i3.GT.0 ) THEN
*
*                    Update A33
*
                     CALL sgemm( 'No transpose', 'No transpose', i3,
     $                           j3,
     $                           jb, -one, work31, ldwork, work13,
     $                           ldwork, one, ab( 1+kl, j+kv ), ldab-1 )
                  END IF
*
*                 Copy the lower triangle of A13 back into place
*
                  DO 150 jj = 1, j3
                     DO 140 ii = jj, jb
                        ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj )
  140                CONTINUE
  150             CONTINUE
               END IF
            ELSE
*
*              Adjust the pivot indices.
*
               DO 160 i = j, j + jb - 1
                  ipiv( i ) = ipiv( i ) + j - 1
  160          CONTINUE
            END IF
*
*           Partially undo the interchanges in the current block to
*           restore the upper triangular form of A31 and copy the upper
*           triangle of A31 back into place
*
            DO 170 jj = j + jb - 1, j, -1
               jp = ipiv( jj ) - jj + 1
               IF( jp.NE.1 ) THEN
*
*                 Apply interchange to columns J to JJ-1
*
                  IF( jp+jj-1.LT.j+kl ) THEN
*
*                    The interchange does not affect A31
*
                     CALL sswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
     $                           ab( kv+jp+jj-j, j ), ldab-1 )
                  ELSE
*
*                    The interchange does affect A31
*
                     CALL sswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
     $                           work31( jp+jj-j-kl, 1 ), ldwork )
                  END IF
               END IF
*
*              Copy the current column of A31 back into place
*
               nw = min( i3, jj-j+1 )
               IF( nw.GT.0 )
     $            CALL scopy( nw, work31( 1, jj-j+1 ), 1,
     $                        ab( kv+kl+1-jj+j, jj ), 1 )
  170       CONTINUE
  180    CONTINUE
      END IF
*
      RETURN
*
*     End of SGBTRF
*

Here is the call graph for this function:

Here is the caller graph for this function: