001:       SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
002: *
003: *  -- LAPACK routine (version 3.2) --
004: *     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
005: *     November 2006
006: *
007: *     .. Scalar Arguments ..
008:       INTEGER            INFO, KL, KU, LDAB, M, N
009: *     ..
010: *     .. Array Arguments ..
011:       INTEGER            IPIV( * )
012:       COMPLEX*16         AB( LDAB, * )
013: *     ..
014: *
015: *  Purpose
016: *  =======
017: *
018: *  ZGBTRF computes an LU factorization of a complex m-by-n band matrix A
019: *  using partial pivoting with row interchanges.
020: *
021: *  This is the blocked version of the algorithm, calling Level 3 BLAS.
022: *
023: *  Arguments
024: *  =========
025: *
026: *  M       (input) INTEGER
027: *          The number of rows of the matrix A.  M >= 0.
028: *
029: *  N       (input) INTEGER
030: *          The number of columns of the matrix A.  N >= 0.
031: *
032: *  KL      (input) INTEGER
033: *          The number of subdiagonals within the band of A.  KL >= 0.
034: *
035: *  KU      (input) INTEGER
036: *          The number of superdiagonals within the band of A.  KU >= 0.
037: *
038: *  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
039: *          On entry, the matrix A in band storage, in rows KL+1 to
040: *          2*KL+KU+1; rows 1 to KL of the array need not be set.
041: *          The j-th column of A is stored in the j-th column of the
042: *          array AB as follows:
043: *          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
044: *
045: *          On exit, details of the factorization: U is stored as an
046: *          upper triangular band matrix with KL+KU superdiagonals in
047: *          rows 1 to KL+KU+1, and the multipliers used during the
048: *          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
049: *          See below for further details.
050: *
051: *  LDAB    (input) INTEGER
052: *          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
053: *
054: *  IPIV    (output) INTEGER array, dimension (min(M,N))
055: *          The pivot indices; for 1 <= i <= min(M,N), row i of the
056: *          matrix was interchanged with row IPIV(i).
057: *
058: *  INFO    (output) INTEGER
059: *          = 0: successful exit
060: *          < 0: if INFO = -i, the i-th argument had an illegal value
061: *          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
062: *               has been completed, but the factor U is exactly
063: *               singular, and division by zero will occur if it is used
064: *               to solve a system of equations.
065: *
066: *  Further Details
067: *  ===============
068: *
069: *  The band storage scheme is illustrated by the following example, when
070: *  M = N = 6, KL = 2, KU = 1:
071: *
072: *  On entry:                       On exit:
073: *
074: *      *    *    *    +    +    +       *    *    *   u14  u25  u36
075: *      *    *    +    +    +    +       *    *   u13  u24  u35  u46
076: *      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
077: *     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
078: *     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
079: *     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
080: *
081: *  Array elements marked * are not used by the routine; elements marked
082: *  + need not be set on entry, but are required by the routine to store
083: *  elements of U because of fill-in resulting from the row interchanges.
084: *
085: *  =====================================================================
086: *
087: *     .. Parameters ..
088:       COMPLEX*16         ONE, ZERO
089:       PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
090:      $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
091:       INTEGER            NBMAX, LDWORK
092:       PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
093: *     ..
094: *     .. Local Scalars ..
095:       INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
096:      $                   JU, K2, KM, KV, NB, NW
097:       COMPLEX*16         TEMP
098: *     ..
099: *     .. Local Arrays ..
100:       COMPLEX*16         WORK13( LDWORK, NBMAX ),
101:      $                   WORK31( LDWORK, NBMAX )
102: *     ..
103: *     .. External Functions ..
104:       INTEGER            ILAENV, IZAMAX
105:       EXTERNAL           ILAENV, IZAMAX
106: *     ..
107: *     .. External Subroutines ..
108:       EXTERNAL           XERBLA, ZCOPY, ZGBTF2, ZGEMM, ZGERU, ZLASWP,
109:      $                   ZSCAL, ZSWAP, ZTRSM
110: *     ..
111: *     .. Intrinsic Functions ..
112:       INTRINSIC          MAX, MIN
113: *     ..
114: *     .. Executable Statements ..
115: *
116: *     KV is the number of superdiagonals in the factor U, allowing for
117: *     fill-in
118: *
119:       KV = KU + KL
120: *
121: *     Test the input parameters.
122: *
123:       INFO = 0
124:       IF( M.LT.0 ) THEN
125:          INFO = -1
126:       ELSE IF( N.LT.0 ) THEN
127:          INFO = -2
128:       ELSE IF( KL.LT.0 ) THEN
129:          INFO = -3
130:       ELSE IF( KU.LT.0 ) THEN
131:          INFO = -4
132:       ELSE IF( LDAB.LT.KL+KV+1 ) THEN
133:          INFO = -6
134:       END IF
135:       IF( INFO.NE.0 ) THEN
136:          CALL XERBLA( 'ZGBTRF', -INFO )
137:          RETURN
138:       END IF
139: *
140: *     Quick return if possible
141: *
142:       IF( M.EQ.0 .OR. N.EQ.0 )
143:      $   RETURN
144: *
145: *     Determine the block size for this environment
146: *
147:       NB = ILAENV( 1, 'ZGBTRF', ' ', M, N, KL, KU )
148: *
149: *     The block size must not exceed the limit set by the size of the
150: *     local arrays WORK13 and WORK31.
151: *
152:       NB = MIN( NB, NBMAX )
153: *
154:       IF( NB.LE.1 .OR. NB.GT.KL ) THEN
155: *
156: *        Use unblocked code
157: *
158:          CALL ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
159:       ELSE
160: *
161: *        Use blocked code
162: *
163: *        Zero the superdiagonal elements of the work array WORK13
164: *
165:          DO 20 J = 1, NB
166:             DO 10 I = 1, J - 1
167:                WORK13( I, J ) = ZERO
168:    10       CONTINUE
169:    20    CONTINUE
170: *
171: *        Zero the subdiagonal elements of the work array WORK31
172: *
173:          DO 40 J = 1, NB
174:             DO 30 I = J + 1, NB
175:                WORK31( I, J ) = ZERO
176:    30       CONTINUE
177:    40    CONTINUE
178: *
179: *        Gaussian elimination with partial pivoting
180: *
181: *        Set fill-in elements in columns KU+2 to KV to zero
182: *
183:          DO 60 J = KU + 2, MIN( KV, N )
184:             DO 50 I = KV - J + 2, KL
185:                AB( I, J ) = ZERO
186:    50       CONTINUE
187:    60    CONTINUE
188: *
189: *        JU is the index of the last column affected by the current
190: *        stage of the factorization
191: *
192:          JU = 1
193: *
194:          DO 180 J = 1, MIN( M, N ), NB
195:             JB = MIN( NB, MIN( M, N )-J+1 )
196: *
197: *           The active part of the matrix is partitioned
198: *
199: *              A11   A12   A13
200: *              A21   A22   A23
201: *              A31   A32   A33
202: *
203: *           Here A11, A21 and A31 denote the current block of JB columns
204: *           which is about to be factorized. The number of rows in the
205: *           partitioning are JB, I2, I3 respectively, and the numbers
206: *           of columns are JB, J2, J3. The superdiagonal elements of A13
207: *           and the subdiagonal elements of A31 lie outside the band.
208: *
209:             I2 = MIN( KL-JB, M-J-JB+1 )
210:             I3 = MIN( JB, M-J-KL+1 )
211: *
212: *           J2 and J3 are computed after JU has been updated.
213: *
214: *           Factorize the current block of JB columns
215: *
216:             DO 80 JJ = J, J + JB - 1
217: *
218: *              Set fill-in elements in column JJ+KV to zero
219: *
220:                IF( JJ+KV.LE.N ) THEN
221:                   DO 70 I = 1, KL
222:                      AB( I, JJ+KV ) = ZERO
223:    70             CONTINUE
224:                END IF
225: *
226: *              Find pivot and test for singularity. KM is the number of
227: *              subdiagonal elements in the current column.
228: *
229:                KM = MIN( KL, M-JJ )
230:                JP = IZAMAX( KM+1, AB( KV+1, JJ ), 1 )
231:                IPIV( JJ ) = JP + JJ - J
232:                IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
233:                   JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
234:                   IF( JP.NE.1 ) THEN
235: *
236: *                    Apply interchange to columns J to J+JB-1
237: *
238:                      IF( JP+JJ-1.LT.J+KL ) THEN
239: *
240:                         CALL ZSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
241:      $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
242:                      ELSE
243: *
244: *                       The interchange affects columns J to JJ-1 of A31
245: *                       which are stored in the work array WORK31
246: *
247:                         CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
248:      $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
249:                         CALL ZSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
250:      $                              AB( KV+JP, JJ ), LDAB-1 )
251:                      END IF
252:                   END IF
253: *
254: *                 Compute multipliers
255: *
256:                   CALL ZSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
257:      $                        1 )
258: *
259: *                 Update trailing submatrix within the band and within
260: *                 the current block. JM is the index of the last column
261: *                 which needs to be updated.
262: *
263:                   JM = MIN( JU, J+JB-1 )
264:                   IF( JM.GT.JJ )
265:      $               CALL ZGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
266:      $                           AB( KV, JJ+1 ), LDAB-1,
267:      $                           AB( KV+1, JJ+1 ), LDAB-1 )
268:                ELSE
269: *
270: *                 If pivot is zero, set INFO to the index of the pivot
271: *                 unless a zero pivot has already been found.
272: *
273:                   IF( INFO.EQ.0 )
274:      $               INFO = JJ
275:                END IF
276: *
277: *              Copy current column of A31 into the work array WORK31
278: *
279:                NW = MIN( JJ-J+1, I3 )
280:                IF( NW.GT.0 )
281:      $            CALL ZCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
282:      $                        WORK31( 1, JJ-J+1 ), 1 )
283:    80       CONTINUE
284:             IF( J+JB.LE.N ) THEN
285: *
286: *              Apply the row interchanges to the other blocks.
287: *
288:                J2 = MIN( JU-J+1, KV ) - JB
289:                J3 = MAX( 0, JU-J-KV+1 )
290: *
291: *              Use ZLASWP to apply the row interchanges to A12, A22, and
292: *              A32.
293: *
294:                CALL ZLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
295:      $                      IPIV( J ), 1 )
296: *
297: *              Adjust the pivot indices.
298: *
299:                DO 90 I = J, J + JB - 1
300:                   IPIV( I ) = IPIV( I ) + J - 1
301:    90          CONTINUE
302: *
303: *              Apply the row interchanges to A13, A23, and A33
304: *              columnwise.
305: *
306:                K2 = J - 1 + JB + J2
307:                DO 110 I = 1, J3
308:                   JJ = K2 + I
309:                   DO 100 II = J + I - 1, J + JB - 1
310:                      IP = IPIV( II )
311:                      IF( IP.NE.II ) THEN
312:                         TEMP = AB( KV+1+II-JJ, JJ )
313:                         AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
314:                         AB( KV+1+IP-JJ, JJ ) = TEMP
315:                      END IF
316:   100             CONTINUE
317:   110          CONTINUE
318: *
319: *              Update the relevant part of the trailing submatrix
320: *
321:                IF( J2.GT.0 ) THEN
322: *
323: *                 Update A12
324: *
325:                   CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
326:      $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
327:      $                        AB( KV+1-JB, J+JB ), LDAB-1 )
328: *
329:                   IF( I2.GT.0 ) THEN
330: *
331: *                    Update A22
332: *
333:                      CALL ZGEMM( 'No transpose', 'No transpose', I2, J2,
334:      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
335:      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
336:      $                           AB( KV+1, J+JB ), LDAB-1 )
337:                   END IF
338: *
339:                   IF( I3.GT.0 ) THEN
340: *
341: *                    Update A32
342: *
343:                      CALL ZGEMM( 'No transpose', 'No transpose', I3, J2,
344:      $                           JB, -ONE, WORK31, LDWORK,
345:      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
346:      $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
347:                   END IF
348:                END IF
349: *
350:                IF( J3.GT.0 ) THEN
351: *
352: *                 Copy the lower triangle of A13 into the work array
353: *                 WORK13
354: *
355:                   DO 130 JJ = 1, J3
356:                      DO 120 II = JJ, JB
357:                         WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
358:   120                CONTINUE
359:   130             CONTINUE
360: *
361: *                 Update A13 in the work array
362: *
363:                   CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
364:      $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
365:      $                        WORK13, LDWORK )
366: *
367:                   IF( I2.GT.0 ) THEN
368: *
369: *                    Update A23
370: *
371:                      CALL ZGEMM( 'No transpose', 'No transpose', I2, J3,
372:      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
373:      $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
374:      $                           LDAB-1 )
375:                   END IF
376: *
377:                   IF( I3.GT.0 ) THEN
378: *
379: *                    Update A33
380: *
381:                      CALL ZGEMM( 'No transpose', 'No transpose', I3, J3,
382:      $                           JB, -ONE, WORK31, LDWORK, WORK13,
383:      $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
384:                   END IF
385: *
386: *                 Copy the lower triangle of A13 back into place
387: *
388:                   DO 150 JJ = 1, J3
389:                      DO 140 II = JJ, JB
390:                         AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
391:   140                CONTINUE
392:   150             CONTINUE
393:                END IF
394:             ELSE
395: *
396: *              Adjust the pivot indices.
397: *
398:                DO 160 I = J, J + JB - 1
399:                   IPIV( I ) = IPIV( I ) + J - 1
400:   160          CONTINUE
401:             END IF
402: *
403: *           Partially undo the interchanges in the current block to
404: *           restore the upper triangular form of A31 and copy the upper
405: *           triangle of A31 back into place
406: *
407:             DO 170 JJ = J + JB - 1, J, -1
408:                JP = IPIV( JJ ) - JJ + 1
409:                IF( JP.NE.1 ) THEN
410: *
411: *                 Apply interchange to columns J to JJ-1
412: *
413:                   IF( JP+JJ-1.LT.J+KL ) THEN
414: *
415: *                    The interchange does not affect A31
416: *
417:                      CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
418:      $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
419:                   ELSE
420: *
421: *                    The interchange does affect A31
422: *
423:                      CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
424:      $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
425:                   END IF
426:                END IF
427: *
428: *              Copy the current column of A31 back into place
429: *
430:                NW = MIN( I3, JJ-J+1 )
431:                IF( NW.GT.0 )
432:      $            CALL ZCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
433:      $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
434:   170       CONTINUE
435:   180    CONTINUE
436:       END IF
437: *
438:       RETURN
439: *
440: *     End of ZGBTRF
441: *
442:       END
443: