LAPACK 3.3.0
|
00001 SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) 00002 * 00003 * -- LAPACK routine (version 3.2) -- 00004 * -- LAPACK is a software package provided by Univ. of Tennessee, -- 00005 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 00006 * November 2006 00007 * 00008 * .. Scalar Arguments .. 00009 INTEGER INFO, KL, KU, LDAB, M, N 00010 * .. 00011 * .. Array Arguments .. 00012 INTEGER IPIV( * ) 00013 COMPLEX AB( LDAB, * ) 00014 * .. 00015 * 00016 * Purpose 00017 * ======= 00018 * 00019 * CGBTRF computes an LU factorization of a complex m-by-n band matrix A 00020 * using partial pivoting with row interchanges. 00021 * 00022 * This is the blocked version of the algorithm, calling Level 3 BLAS. 00023 * 00024 * Arguments 00025 * ========= 00026 * 00027 * M (input) INTEGER 00028 * The number of rows of the matrix A. M >= 0. 00029 * 00030 * N (input) INTEGER 00031 * The number of columns of the matrix A. N >= 0. 00032 * 00033 * KL (input) INTEGER 00034 * The number of subdiagonals within the band of A. KL >= 0. 00035 * 00036 * KU (input) INTEGER 00037 * The number of superdiagonals within the band of A. KU >= 0. 00038 * 00039 * AB (input/output) COMPLEX array, dimension (LDAB,N) 00040 * On entry, the matrix A in band storage, in rows KL+1 to 00041 * 2*KL+KU+1; rows 1 to KL of the array need not be set. 00042 * The j-th column of A is stored in the j-th column of the 00043 * array AB as follows: 00044 * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) 00045 * 00046 * On exit, details of the factorization: U is stored as an 00047 * upper triangular band matrix with KL+KU superdiagonals in 00048 * rows 1 to KL+KU+1, and the multipliers used during the 00049 * factorization are stored in rows KL+KU+2 to 2*KL+KU+1. 00050 * See below for further details. 00051 * 00052 * LDAB (input) INTEGER 00053 * The leading dimension of the array AB. LDAB >= 2*KL+KU+1. 00054 * 00055 * IPIV (output) INTEGER array, dimension (min(M,N)) 00056 * The pivot indices; for 1 <= i <= min(M,N), row i of the 00057 * matrix was interchanged with row IPIV(i). 00058 * 00059 * INFO (output) INTEGER 00060 * = 0: successful exit 00061 * < 0: if INFO = -i, the i-th argument had an illegal value 00062 * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization 00063 * has been completed, but the factor U is exactly 00064 * singular, and division by zero will occur if it is used 00065 * to solve a system of equations. 00066 * 00067 * Further Details 00068 * =============== 00069 * 00070 * The band storage scheme is illustrated by the following example, when 00071 * M = N = 6, KL = 2, KU = 1: 00072 * 00073 * On entry: On exit: 00074 * 00075 * * * * + + + * * * u14 u25 u36 00076 * * * + + + + * * u13 u24 u35 u46 00077 * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 00078 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 00079 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * 00080 * a31 a42 a53 a64 * * m31 m42 m53 m64 * * 00081 * 00082 * Array elements marked * are not used by the routine; elements marked 00083 * + need not be set on entry, but are required by the routine to store 00084 * elements of U because of fill-in resulting from the row interchanges. 00085 * 00086 * ===================================================================== 00087 * 00088 * .. Parameters .. 00089 COMPLEX ONE, ZERO 00090 PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), 00091 $ ZERO = ( 0.0E+0, 0.0E+0 ) ) 00092 INTEGER NBMAX, LDWORK 00093 PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) 00094 * .. 00095 * .. Local Scalars .. 00096 INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, 00097 $ JU, K2, KM, KV, NB, NW 00098 COMPLEX TEMP 00099 * .. 00100 * .. Local Arrays .. 00101 COMPLEX WORK13( LDWORK, NBMAX ), 00102 $ WORK31( LDWORK, NBMAX ) 00103 * .. 00104 * .. External Functions .. 00105 INTEGER ICAMAX, ILAENV 00106 EXTERNAL ICAMAX, ILAENV 00107 * .. 00108 * .. External Subroutines .. 00109 EXTERNAL CCOPY, CGBTF2, CGEMM, CGERU, CLASWP, CSCAL, 00110 $ CSWAP, CTRSM, XERBLA 00111 * .. 00112 * .. Intrinsic Functions .. 00113 INTRINSIC MAX, MIN 00114 * .. 00115 * .. Executable Statements .. 00116 * 00117 * KV is the number of superdiagonals in the factor U, allowing for 00118 * fill-in 00119 * 00120 KV = KU + KL 00121 * 00122 * Test the input parameters. 00123 * 00124 INFO = 0 00125 IF( M.LT.0 ) THEN 00126 INFO = -1 00127 ELSE IF( N.LT.0 ) THEN 00128 INFO = -2 00129 ELSE IF( KL.LT.0 ) THEN 00130 INFO = -3 00131 ELSE IF( KU.LT.0 ) THEN 00132 INFO = -4 00133 ELSE IF( LDAB.LT.KL+KV+1 ) THEN 00134 INFO = -6 00135 END IF 00136 IF( INFO.NE.0 ) THEN 00137 CALL XERBLA( 'CGBTRF', -INFO ) 00138 RETURN 00139 END IF 00140 * 00141 * Quick return if possible 00142 * 00143 IF( M.EQ.0 .OR. N.EQ.0 ) 00144 $ RETURN 00145 * 00146 * Determine the block size for this environment 00147 * 00148 NB = ILAENV( 1, 'CGBTRF', ' ', M, N, KL, KU ) 00149 * 00150 * The block size must not exceed the limit set by the size of the 00151 * local arrays WORK13 and WORK31. 00152 * 00153 NB = MIN( NB, NBMAX ) 00154 * 00155 IF( NB.LE.1 .OR. NB.GT.KL ) THEN 00156 * 00157 * Use unblocked code 00158 * 00159 CALL CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) 00160 ELSE 00161 * 00162 * Use blocked code 00163 * 00164 * Zero the superdiagonal elements of the work array WORK13 00165 * 00166 DO 20 J = 1, NB 00167 DO 10 I = 1, J - 1 00168 WORK13( I, J ) = ZERO 00169 10 CONTINUE 00170 20 CONTINUE 00171 * 00172 * Zero the subdiagonal elements of the work array WORK31 00173 * 00174 DO 40 J = 1, NB 00175 DO 30 I = J + 1, NB 00176 WORK31( I, J ) = ZERO 00177 30 CONTINUE 00178 40 CONTINUE 00179 * 00180 * Gaussian elimination with partial pivoting 00181 * 00182 * Set fill-in elements in columns KU+2 to KV to zero 00183 * 00184 DO 60 J = KU + 2, MIN( KV, N ) 00185 DO 50 I = KV - J + 2, KL 00186 AB( I, J ) = ZERO 00187 50 CONTINUE 00188 60 CONTINUE 00189 * 00190 * JU is the index of the last column affected by the current 00191 * stage of the factorization 00192 * 00193 JU = 1 00194 * 00195 DO 180 J = 1, MIN( M, N ), NB 00196 JB = MIN( NB, MIN( M, N )-J+1 ) 00197 * 00198 * The active part of the matrix is partitioned 00199 * 00200 * A11 A12 A13 00201 * A21 A22 A23 00202 * A31 A32 A33 00203 * 00204 * Here A11, A21 and A31 denote the current block of JB columns 00205 * which is about to be factorized. The number of rows in the 00206 * partitioning are JB, I2, I3 respectively, and the numbers 00207 * of columns are JB, J2, J3. The superdiagonal elements of A13 00208 * and the subdiagonal elements of A31 lie outside the band. 00209 * 00210 I2 = MIN( KL-JB, M-J-JB+1 ) 00211 I3 = MIN( JB, M-J-KL+1 ) 00212 * 00213 * J2 and J3 are computed after JU has been updated. 00214 * 00215 * Factorize the current block of JB columns 00216 * 00217 DO 80 JJ = J, J + JB - 1 00218 * 00219 * Set fill-in elements in column JJ+KV to zero 00220 * 00221 IF( JJ+KV.LE.N ) THEN 00222 DO 70 I = 1, KL 00223 AB( I, JJ+KV ) = ZERO 00224 70 CONTINUE 00225 END IF 00226 * 00227 * Find pivot and test for singularity. KM is the number of 00228 * subdiagonal elements in the current column. 00229 * 00230 KM = MIN( KL, M-JJ ) 00231 JP = ICAMAX( KM+1, AB( KV+1, JJ ), 1 ) 00232 IPIV( JJ ) = JP + JJ - J 00233 IF( AB( KV+JP, JJ ).NE.ZERO ) THEN 00234 JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) 00235 IF( JP.NE.1 ) THEN 00236 * 00237 * Apply interchange to columns J to J+JB-1 00238 * 00239 IF( JP+JJ-1.LT.J+KL ) THEN 00240 * 00241 CALL CSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, 00242 $ AB( KV+JP+JJ-J, J ), LDAB-1 ) 00243 ELSE 00244 * 00245 * The interchange affects columns J to JJ-1 of A31 00246 * which are stored in the work array WORK31 00247 * 00248 CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, 00249 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) 00250 CALL CSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, 00251 $ AB( KV+JP, JJ ), LDAB-1 ) 00252 END IF 00253 END IF 00254 * 00255 * Compute multipliers 00256 * 00257 CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), 00258 $ 1 ) 00259 * 00260 * Update trailing submatrix within the band and within 00261 * the current block. JM is the index of the last column 00262 * which needs to be updated. 00263 * 00264 JM = MIN( JU, J+JB-1 ) 00265 IF( JM.GT.JJ ) 00266 $ CALL CGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, 00267 $ AB( KV, JJ+1 ), LDAB-1, 00268 $ AB( KV+1, JJ+1 ), LDAB-1 ) 00269 ELSE 00270 * 00271 * If pivot is zero, set INFO to the index of the pivot 00272 * unless a zero pivot has already been found. 00273 * 00274 IF( INFO.EQ.0 ) 00275 $ INFO = JJ 00276 END IF 00277 * 00278 * Copy current column of A31 into the work array WORK31 00279 * 00280 NW = MIN( JJ-J+1, I3 ) 00281 IF( NW.GT.0 ) 00282 $ CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, 00283 $ WORK31( 1, JJ-J+1 ), 1 ) 00284 80 CONTINUE 00285 IF( J+JB.LE.N ) THEN 00286 * 00287 * Apply the row interchanges to the other blocks. 00288 * 00289 J2 = MIN( JU-J+1, KV ) - JB 00290 J3 = MAX( 0, JU-J-KV+1 ) 00291 * 00292 * Use CLASWP to apply the row interchanges to A12, A22, and 00293 * A32. 00294 * 00295 CALL CLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, 00296 $ IPIV( J ), 1 ) 00297 * 00298 * Adjust the pivot indices. 00299 * 00300 DO 90 I = J, J + JB - 1 00301 IPIV( I ) = IPIV( I ) + J - 1 00302 90 CONTINUE 00303 * 00304 * Apply the row interchanges to A13, A23, and A33 00305 * columnwise. 00306 * 00307 K2 = J - 1 + JB + J2 00308 DO 110 I = 1, J3 00309 JJ = K2 + I 00310 DO 100 II = J + I - 1, J + JB - 1 00311 IP = IPIV( II ) 00312 IF( IP.NE.II ) THEN 00313 TEMP = AB( KV+1+II-JJ, JJ ) 00314 AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) 00315 AB( KV+1+IP-JJ, JJ ) = TEMP 00316 END IF 00317 100 CONTINUE 00318 110 CONTINUE 00319 * 00320 * Update the relevant part of the trailing submatrix 00321 * 00322 IF( J2.GT.0 ) THEN 00323 * 00324 * Update A12 00325 * 00326 CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', 00327 $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, 00328 $ AB( KV+1-JB, J+JB ), LDAB-1 ) 00329 * 00330 IF( I2.GT.0 ) THEN 00331 * 00332 * Update A22 00333 * 00334 CALL CGEMM( 'No transpose', 'No transpose', I2, J2, 00335 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, 00336 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, 00337 $ AB( KV+1, J+JB ), LDAB-1 ) 00338 END IF 00339 * 00340 IF( I3.GT.0 ) THEN 00341 * 00342 * Update A32 00343 * 00344 CALL CGEMM( 'No transpose', 'No transpose', I3, J2, 00345 $ JB, -ONE, WORK31, LDWORK, 00346 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, 00347 $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) 00348 END IF 00349 END IF 00350 * 00351 IF( J3.GT.0 ) THEN 00352 * 00353 * Copy the lower triangle of A13 into the work array 00354 * WORK13 00355 * 00356 DO 130 JJ = 1, J3 00357 DO 120 II = JJ, JB 00358 WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) 00359 120 CONTINUE 00360 130 CONTINUE 00361 * 00362 * Update A13 in the work array 00363 * 00364 CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', 00365 $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, 00366 $ WORK13, LDWORK ) 00367 * 00368 IF( I2.GT.0 ) THEN 00369 * 00370 * Update A23 00371 * 00372 CALL CGEMM( 'No transpose', 'No transpose', I2, J3, 00373 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, 00374 $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), 00375 $ LDAB-1 ) 00376 END IF 00377 * 00378 IF( I3.GT.0 ) THEN 00379 * 00380 * Update A33 00381 * 00382 CALL CGEMM( 'No transpose', 'No transpose', I3, J3, 00383 $ JB, -ONE, WORK31, LDWORK, WORK13, 00384 $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) 00385 END IF 00386 * 00387 * Copy the lower triangle of A13 back into place 00388 * 00389 DO 150 JJ = 1, J3 00390 DO 140 II = JJ, JB 00391 AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) 00392 140 CONTINUE 00393 150 CONTINUE 00394 END IF 00395 ELSE 00396 * 00397 * Adjust the pivot indices. 00398 * 00399 DO 160 I = J, J + JB - 1 00400 IPIV( I ) = IPIV( I ) + J - 1 00401 160 CONTINUE 00402 END IF 00403 * 00404 * Partially undo the interchanges in the current block to 00405 * restore the upper triangular form of A31 and copy the upper 00406 * triangle of A31 back into place 00407 * 00408 DO 170 JJ = J + JB - 1, J, -1 00409 JP = IPIV( JJ ) - JJ + 1 00410 IF( JP.NE.1 ) THEN 00411 * 00412 * Apply interchange to columns J to JJ-1 00413 * 00414 IF( JP+JJ-1.LT.J+KL ) THEN 00415 * 00416 * The interchange does not affect A31 00417 * 00418 CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, 00419 $ AB( KV+JP+JJ-J, J ), LDAB-1 ) 00420 ELSE 00421 * 00422 * The interchange does affect A31 00423 * 00424 CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, 00425 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) 00426 END IF 00427 END IF 00428 * 00429 * Copy the current column of A31 back into place 00430 * 00431 NW = MIN( I3, JJ-J+1 ) 00432 IF( NW.GT.0 ) 00433 $ CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1, 00434 $ AB( KV+KL+1-JJ+J, JJ ), 1 ) 00435 170 CONTINUE 00436 180 CONTINUE 00437 END IF 00438 * 00439 RETURN 00440 * 00441 * End of CGBTRF 00442 * 00443 END