d0/d3a/cgbtrf_8f_source.html

00001       SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
00002 *
00003 *  -- LAPACK routine (version 3.2) --
00004 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
00005 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
00006 *     November 2006
00007 *
00008 *     .. Scalar Arguments ..
00009       INTEGER            INFO, KL, KU, LDAB, M, N
00010 *     ..
00011 *     .. Array Arguments ..
00012       INTEGER            IPIV( * )
00013       COMPLEX            AB( LDAB, * )
00014 *     ..
00015 *
00016 *  Purpose
00017 *  =======
00018 *
00019 *  CGBTRF computes an LU factorization of a complex m-by-n band matrix A
00020 *  using partial pivoting with row interchanges.
00021 *
00022 *  This is the blocked version of the algorithm, calling Level 3 BLAS.
00023 *
00024 *  Arguments
00025 *  =========
00026 *
00027 *  M       (input) INTEGER
00028 *          The number of rows of the matrix A.  M >= 0.
00029 *
00030 *  N       (input) INTEGER
00031 *          The number of columns of the matrix A.  N >= 0.
00032 *
00033 *  KL      (input) INTEGER
00034 *          The number of subdiagonals within the band of A.  KL >= 0.
00035 *
00036 *  KU      (input) INTEGER
00037 *          The number of superdiagonals within the band of A.  KU >= 0.
00038 *
00039 *  AB      (input/output) COMPLEX array, dimension (LDAB,N)
00040 *          On entry, the matrix A in band storage, in rows KL+1 to
00041 *          2*KL+KU+1; rows 1 to KL of the array need not be set.
00042 *          The j-th column of A is stored in the j-th column of the
00043 *          array AB as follows:
00044 *          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
00045 *
00046 *          On exit, details of the factorization: U is stored as an
00047 *          upper triangular band matrix with KL+KU superdiagonals in
00048 *          rows 1 to KL+KU+1, and the multipliers used during the
00049 *          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
00050 *          See below for further details.
00051 *
00052 *  LDAB    (input) INTEGER
00053 *          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
00054 *
00055 *  IPIV    (output) INTEGER array, dimension (min(M,N))
00056 *          The pivot indices; for 1 <= i <= min(M,N), row i of the
00057 *          matrix was interchanged with row IPIV(i).
00058 *
00059 *  INFO    (output) INTEGER
00060 *          = 0: successful exit
00061 *          < 0: if INFO = -i, the i-th argument had an illegal value
00062 *          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
00063 *               has been completed, but the factor U is exactly
00064 *               singular, and division by zero will occur if it is used
00065 *               to solve a system of equations.
00066 *
00067 *  Further Details
00068 *  ===============
00069 *
00070 *  The band storage scheme is illustrated by the following example, when
00071 *  M = N = 6, KL = 2, KU = 1:
00072 *
00073 *  On entry:                       On exit:
00074 *
00075 *      *    *    *    +    +    +       *    *    *   u14  u25  u36
00076 *      *    *    +    +    +    +       *    *   u13  u24  u35  u46
00077 *      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
00078 *     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
00079 *     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
00080 *     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
00081 *
00082 *  Array elements marked * are not used by the routine; elements marked
00083 *  + need not be set on entry, but are required by the routine to store
00084 *  elements of U because of fill-in resulting from the row interchanges.
00085 *
00086 *  =====================================================================
00087 *
00088 *     .. Parameters ..
00089       COMPLEX            ONE, ZERO
00090       PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
00091      $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
00092       INTEGER            NBMAX, LDWORK
00093       PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
00094 *     ..
00095 *     .. Local Scalars ..
00096       INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
00097      $                   JU, K2, KM, KV, NB, NW
00098       COMPLEX            TEMP
00099 *     ..
00100 *     .. Local Arrays ..
00101       COMPLEX            WORK13( LDWORK, NBMAX ),
00102      $                   WORK31( LDWORK, NBMAX )
00103 *     ..
00104 *     .. External Functions ..
00105       INTEGER            ICAMAX, ILAENV
00106       EXTERNAL           ICAMAX, ILAENV
00107 *     ..
00108 *     .. External Subroutines ..
00109       EXTERNAL           CCOPY, CGBTF2, CGEMM, CGERU, CLASWP, CSCAL,
00110      $                   CSWAP, CTRSM, XERBLA
00111 *     ..
00112 *     .. Intrinsic Functions ..
00113       INTRINSIC          MAX, MIN
00114 *     ..
00115 *     .. Executable Statements ..
00116 *
00117 *     KV is the number of superdiagonals in the factor U, allowing for
00118 *     fill-in
00119 *
00120       KV = KU + KL
00121 *
00122 *     Test the input parameters.
00123 *
00124       INFO = 0
00125       IF( M.LT.0 ) THEN
00126          INFO = -1
00127       ELSE IF( N.LT.0 ) THEN
00128          INFO = -2
00129       ELSE IF( KL.LT.0 ) THEN
00130          INFO = -3
00131       ELSE IF( KU.LT.0 ) THEN
00132          INFO = -4
00133       ELSE IF( LDAB.LT.KL+KV+1 ) THEN
00134          INFO = -6
00135       END IF
00136       IF( INFO.NE.0 ) THEN
00137          CALL XERBLA( 'CGBTRF', -INFO )
00138          RETURN
00139       END IF
00140 *
00141 *     Quick return if possible
00142 *
00143       IF( M.EQ.0 .OR. N.EQ.0 )
00144      $   RETURN
00145 *
00146 *     Determine the block size for this environment
00147 *
00148       NB = ILAENV( 1, 'CGBTRF', ' ', M, N, KL, KU )
00149 *
00150 *     The block size must not exceed the limit set by the size of the
00151 *     local arrays WORK13 and WORK31.
00152 *
00153       NB = MIN( NB, NBMAX )
00154 *
00155       IF( NB.LE.1 .OR. NB.GT.KL ) THEN
00156 *
00157 *        Use unblocked code
00158 *
00159          CALL CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
00160       ELSE
00161 *
00162 *        Use blocked code
00163 *
00164 *        Zero the superdiagonal elements of the work array WORK13
00165 *
00166          DO 20 J = 1, NB
00167             DO 10 I = 1, J - 1
00168                WORK13( I, J ) = ZERO
00169    10       CONTINUE
00170    20    CONTINUE
00171 *
00172 *        Zero the subdiagonal elements of the work array WORK31
00173 *
00174          DO 40 J = 1, NB
00175             DO 30 I = J + 1, NB
00176                WORK31( I, J ) = ZERO
00177    30       CONTINUE
00178    40    CONTINUE
00179 *
00180 *        Gaussian elimination with partial pivoting
00181 *
00182 *        Set fill-in elements in columns KU+2 to KV to zero
00183 *
00184          DO 60 J = KU + 2, MIN( KV, N )
00185             DO 50 I = KV - J + 2, KL
00186                AB( I, J ) = ZERO
00187    50       CONTINUE
00188    60    CONTINUE
00189 *
00190 *        JU is the index of the last column affected by the current
00191 *        stage of the factorization
00192 *
00193          JU = 1
00194 *
00195          DO 180 J = 1, MIN( M, N ), NB
00196             JB = MIN( NB, MIN( M, N )-J+1 )
00197 *
00198 *           The active part of the matrix is partitioned
00199 *
00200 *              A11   A12   A13
00201 *              A21   A22   A23
00202 *              A31   A32   A33
00203 *
00204 *           Here A11, A21 and A31 denote the current block of JB columns
00205 *           which is about to be factorized. The number of rows in the
00206 *           partitioning are JB, I2, I3 respectively, and the numbers
00207 *           of columns are JB, J2, J3. The superdiagonal elements of A13
00208 *           and the subdiagonal elements of A31 lie outside the band.
00209 *
00210             I2 = MIN( KL-JB, M-J-JB+1 )
00211             I3 = MIN( JB, M-J-KL+1 )
00212 *
00213 *           J2 and J3 are computed after JU has been updated.
00214 *
00215 *           Factorize the current block of JB columns
00216 *
00217             DO 80 JJ = J, J + JB - 1
00218 *
00219 *              Set fill-in elements in column JJ+KV to zero
00220 *
00221                IF( JJ+KV.LE.N ) THEN
00222                   DO 70 I = 1, KL
00223                      AB( I, JJ+KV ) = ZERO
00224    70             CONTINUE
00225                END IF
00226 *
00227 *              Find pivot and test for singularity. KM is the number of
00228 *              subdiagonal elements in the current column.
00229 *
00230                KM = MIN( KL, M-JJ )
00231                JP = ICAMAX( KM+1, AB( KV+1, JJ ), 1 )
00232                IPIV( JJ ) = JP + JJ - J
00233                IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
00234                   JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
00235                   IF( JP.NE.1 ) THEN
00236 *
00237 *                    Apply interchange to columns J to J+JB-1
00238 *
00239                      IF( JP+JJ-1.LT.J+KL ) THEN
00240 *
00241                         CALL CSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
00242      $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
00243                      ELSE
00244 *
00245 *                       The interchange affects columns J to JJ-1 of A31
00246 *                       which are stored in the work array WORK31
00247 *
00248                         CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
00249      $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
00250                         CALL CSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
00251      $                              AB( KV+JP, JJ ), LDAB-1 )
00252                      END IF
00253                   END IF
00254 *
00255 *                 Compute multipliers
00256 *
00257                   CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
00258      $                        1 )
00259 *
00260 *                 Update trailing submatrix within the band and within
00261 *                 the current block. JM is the index of the last column
00262 *                 which needs to be updated.
00263 *
00264                   JM = MIN( JU, J+JB-1 )
00265                   IF( JM.GT.JJ )
00266      $               CALL CGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
00267      $                           AB( KV, JJ+1 ), LDAB-1,
00268      $                           AB( KV+1, JJ+1 ), LDAB-1 )
00269                ELSE
00270 *
00271 *                 If pivot is zero, set INFO to the index of the pivot
00272 *                 unless a zero pivot has already been found.
00273 *
00274                   IF( INFO.EQ.0 )
00275      $               INFO = JJ
00276                END IF
00277 *
00278 *              Copy current column of A31 into the work array WORK31
00279 *
00280                NW = MIN( JJ-J+1, I3 )
00281                IF( NW.GT.0 )
00282      $            CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
00283      $                        WORK31( 1, JJ-J+1 ), 1 )
00284    80       CONTINUE
00285             IF( J+JB.LE.N ) THEN
00286 *
00287 *              Apply the row interchanges to the other blocks.
00288 *
00289                J2 = MIN( JU-J+1, KV ) - JB
00290                J3 = MAX( 0, JU-J-KV+1 )
00291 *
00292 *              Use CLASWP to apply the row interchanges to A12, A22, and
00293 *              A32.
00294 *
00295                CALL CLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
00296      $                      IPIV( J ), 1 )
00297 *
00298 *              Adjust the pivot indices.
00299 *
00300                DO 90 I = J, J + JB - 1
00301                   IPIV( I ) = IPIV( I ) + J - 1
00302    90          CONTINUE
00303 *
00304 *              Apply the row interchanges to A13, A23, and A33
00305 *              columnwise.
00306 *
00307                K2 = J - 1 + JB + J2
00308                DO 110 I = 1, J3
00309                   JJ = K2 + I
00310                   DO 100 II = J + I - 1, J + JB - 1
00311                      IP = IPIV( II )
00312                      IF( IP.NE.II ) THEN
00313                         TEMP = AB( KV+1+II-JJ, JJ )
00314                         AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
00315                         AB( KV+1+IP-JJ, JJ ) = TEMP
00316                      END IF
00317   100             CONTINUE
00318   110          CONTINUE
00319 *
00320 *              Update the relevant part of the trailing submatrix
00321 *
00322                IF( J2.GT.0 ) THEN
00323 *
00324 *                 Update A12
00325 *
00326                   CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
00327      $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
00328      $                        AB( KV+1-JB, J+JB ), LDAB-1 )
00329 *
00330                   IF( I2.GT.0 ) THEN
00331 *
00332 *                    Update A22
00333 *
00334                      CALL CGEMM( 'No transpose', 'No transpose', I2, J2,
00335      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
00336      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
00337      $                           AB( KV+1, J+JB ), LDAB-1 )
00338                   END IF
00339 *
00340                   IF( I3.GT.0 ) THEN
00341 *
00342 *                    Update A32
00343 *
00344                      CALL CGEMM( 'No transpose', 'No transpose', I3, J2,
00345      $                           JB, -ONE, WORK31, LDWORK,
00346      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
00347      $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
00348                   END IF
00349                END IF
00350 *
00351                IF( J3.GT.0 ) THEN
00352 *
00353 *                 Copy the lower triangle of A13 into the work array
00354 *                 WORK13
00355 *
00356                   DO 130 JJ = 1, J3
00357                      DO 120 II = JJ, JB
00358                         WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
00359   120                CONTINUE
00360   130             CONTINUE
00361 *
00362 *                 Update A13 in the work array
00363 *
00364                   CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
00365      $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
00366      $                        WORK13, LDWORK )
00367 *
00368                   IF( I2.GT.0 ) THEN
00369 *
00370 *                    Update A23
00371 *
00372                      CALL CGEMM( 'No transpose', 'No transpose', I2, J3,
00373      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
00374      $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
00375      $                           LDAB-1 )
00376                   END IF
00377 *
00378                   IF( I3.GT.0 ) THEN
00379 *
00380 *                    Update A33
00381 *
00382                      CALL CGEMM( 'No transpose', 'No transpose', I3, J3,
00383      $                           JB, -ONE, WORK31, LDWORK, WORK13,
00384      $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
00385                   END IF
00386 *
00387 *                 Copy the lower triangle of A13 back into place
00388 *
00389                   DO 150 JJ = 1, J3
00390                      DO 140 II = JJ, JB
00391                         AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
00392   140                CONTINUE
00393   150             CONTINUE
00394                END IF
00395             ELSE
00396 *
00397 *              Adjust the pivot indices.
00398 *
00399                DO 160 I = J, J + JB - 1
00400                   IPIV( I ) = IPIV( I ) + J - 1
00401   160          CONTINUE
00402             END IF
00403 *
00404 *           Partially undo the interchanges in the current block to
00405 *           restore the upper triangular form of A31 and copy the upper
00406 *           triangle of A31 back into place
00407 *
00408             DO 170 JJ = J + JB - 1, J, -1
00409                JP = IPIV( JJ ) - JJ + 1
00410                IF( JP.NE.1 ) THEN
00411 *
00412 *                 Apply interchange to columns J to JJ-1
00413 *
00414                   IF( JP+JJ-1.LT.J+KL ) THEN
00415 *
00416 *                    The interchange does not affect A31
00417 *
00418                      CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
00419      $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
00420                   ELSE
00421 *
00422 *                    The interchange does affect A31
00423 *
00424                      CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
00425      $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
00426                   END IF
00427                END IF
00428 *
00429 *              Copy the current column of A31 back into place
00430 *
00431                NW = MIN( I3, JJ-J+1 )
00432                IF( NW.GT.0 )
00433      $            CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
00434      $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
00435   170       CONTINUE
00436   180    CONTINUE
00437       END IF
00438 *
00439       RETURN
00440 *
00441 *     End of CGBTRF
00442 *
00443       END