d3/d2b/VARIANTS_2lu_2REC_2cgetrf_8f_source.html

00001       SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO )
00002       IMPLICIT NONE
00003 *
00004 *  -- LAPACK routine (version 3.X) --
00005 *     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
00006 *     May 2008
00007 *
00008 *     .. Scalar Arguments ..
00009       INTEGER            INFO, LDA, M, N
00010 *     ..
00011 *     .. Array Arguments ..
00012       INTEGER            IPIV( * )
00013       COMPLEX            A( LDA, * )
00014 *     ..
00015 *
00016 *  Purpose
00017 *  =======
00018 *
00019 *  CGETRF computes an LU factorization of a general M-by-N matrix A
00020 *  using partial pivoting with row interchanges.
00021 *
00022 *  The factorization has the form
00023 *     A = P * L * U
00024 *  where P is a permutation matrix, L is lower triangular with unit
00025 *  diagonal elements (lower trapezoidal if m > n), and U is upper
00026 *  triangular (upper trapezoidal if m < n).
00027 *
00028 *  This code implements an iterative version of Sivan Toledo's recursive
00029 *  LU algorithm[1].  For square matrices, this iterative versions should
00030 *  be within a factor of two of the optimum number of memory transfers.
00031 *
00032 *  The pattern is as follows, with the large blocks of U being updated
00033 *  in one call to DTRSM, and the dotted lines denoting sections that
00034 *  have had all pending permutations applied:
00035 *
00036 *   1 2 3 4 5 6 7 8
00037 *  +-+-+---+-------+------
00038 *  | |1|   |       |
00039 *  |.+-+ 2 |       |
00040 *  | | |   |       |
00041 *  |.|.+-+-+   4   |
00042 *  | | | |1|       |
00043 *  | | |.+-+       |
00044 *  | | | | |       |
00045 *  |.|.|.|.+-+-+---+  8
00046 *  | | | | | |1|   |
00047 *  | | | | |.+-+ 2 |
00048 *  | | | | | | |   |
00049 *  | | | | |.|.+-+-+
00050 *  | | | | | | | |1|
00051 *  | | | | | | |.+-+
00052 *  | | | | | | | | |
00053 *  |.|.|.|.|.|.|.|.+-----
00054 *  | | | | | | | | |
00055 *
00056 *  The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
00057 *  the binary expansion of the current column.  Each Schur update is
00058 *  applied as soon as the necessary portion of U is available.
00059 *
00060 *  [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
00061 *  Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
00062 *  1065-1081. http://dx.doi.org/10.1137/S0895479896297744
00063 *
00064 *  Arguments
00065 *  =========
00066 *
00067 *  M       (input) INTEGER
00068 *          The number of rows of the matrix A.  M >= 0.
00069 *
00070 *  N       (input) INTEGER
00071 *          The number of columns of the matrix A.  N >= 0.
00072 *
00073 *  A       (input/output) COMPLEX array, dimension (LDA,N)
00074 *          On entry, the M-by-N matrix to be factored.
00075 *          On exit, the factors L and U from the factorization
00076 *          A = P*L*U; the unit diagonal elements of L are not stored.
00077 *
00078 *  LDA     (input) INTEGER
00079 *          The leading dimension of the array A.  LDA >= max(1,M).
00080 *
00081 *  IPIV    (output) INTEGER array, dimension (min(M,N))
00082 *          The pivot indices; for 1 <= i <= min(M,N), row i of the
00083 *          matrix was interchanged with row IPIV(i).
00084 *
00085 *  INFO    (output) INTEGER
00086 *          = 0:  successful exit
00087 *          < 0:  if INFO = -i, the i-th argument had an illegal value
00088 *          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
00089 *                has been completed, but the factor U is exactly
00090 *                singular, and division by zero will occur if it is used
00091 *                to solve a system of equations.
00092 *
00093 *  =====================================================================
00094 *
00095 *     .. Parameters ..
00096       COMPLEX            ONE, NEGONE
00097       REAL               ZERO
00098       PARAMETER          ( ONE = (1.0E+0, 0.0E+0) )
00099       PARAMETER          ( NEGONE = (-1.0E+0, 0.0E+0) )
00100       PARAMETER          ( ZERO = 0.0E+0 )
00101 *     ..
00102 *     .. Local Scalars ..
00103       REAL               SFMIN, PIVMAG
00104       COMPLEX            TMP
00105       INTEGER            I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
00106       INTEGER            KSTART, IPIVSTART, JPIVSTART, KCOLS
00107 *     ..
00108 *     .. External Functions ..
00109       REAL               SLAMCH
00110       INTEGER            ICAMAX
00111       LOGICAL            SISNAN
00112       EXTERNAL           SLAMCH, ICAMAX, SISNAN
00113 *     ..
00114 *     .. External Subroutines ..
00115       EXTERNAL           CTRSM, CSCAL, XERBLA, CLASWP
00116 *     ..
00117 *     .. Intrinsic Functions ..
00118       INTRINSIC          MAX, MIN, IAND, ABS
00119 *     ..
00120 *     .. Executable Statements ..
00121 *
00122 *     Test the input parameters.
00123 *
00124       INFO = 0
00125       IF( M.LT.0 ) THEN
00126          INFO = -1
00127       ELSE IF( N.LT.0 ) THEN
00128          INFO = -2
00129       ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
00130          INFO = -4
00131       END IF
00132       IF( INFO.NE.0 ) THEN
00133          CALL XERBLA( 'CGETRF', -INFO )
00134          RETURN
00135       END IF
00136 *
00137 *     Quick return if possible
00138 *
00139       IF( M.EQ.0 .OR. N.EQ.0 )
00140      $   RETURN
00141 *
00142 *     Compute machine safe minimum
00143 *
00144       SFMIN = SLAMCH( 'S' )
00145 *
00146       NSTEP = MIN( M, N )
00147       DO J = 1, NSTEP
00148          KAHEAD = IAND( J, -J )
00149          KSTART = J + 1 - KAHEAD
00150          KCOLS = MIN( KAHEAD, M-J )
00151 *
00152 *        Find pivot.
00153 *
00154          JP = J - 1 + ICAMAX( M-J+1, A( J, J ), 1 )
00155          IPIV( J ) = JP
00156
00157 !        Permute just this column.
00158          IF (JP .NE. J) THEN
00159             TMP = A( J, J )
00160             A( J, J ) = A( JP, J )
00161             A( JP, J ) = TMP
00162          END IF
00163
00164 !        Apply pending permutations to L
00165          NTOPIV = 1
00166          IPIVSTART = J
00167          JPIVSTART = J - NTOPIV
00168          DO WHILE ( NTOPIV .LT. KAHEAD )
00169             CALL CLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J,
00170      $           IPIV, 1 )
00171             IPIVSTART = IPIVSTART - NTOPIV;
00172             NTOPIV = NTOPIV * 2;
00173             JPIVSTART = JPIVSTART - NTOPIV;
00174          END DO
00175
00176 !        Permute U block to match L
00177          CALL CLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 )
00178
00179 !        Factor the current column
00180          PIVMAG = ABS( A( J, J ) )
00181          IF( PIVMAG.NE.ZERO .AND. .NOT.SISNAN( PIVMAG ) ) THEN
00182                IF( PIVMAG .GE. SFMIN ) THEN
00183                   CALL CSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
00184                ELSE
00185                  DO I = 1, M-J
00186                     A( J+I, J ) = A( J+I, J ) / A( J, J )
00187                  END DO
00188                END IF
00189          ELSE IF( PIVMAG .EQ. ZERO .AND. INFO .EQ. 0 ) THEN
00190             INFO = J
00191          END IF
00192
00193 !        Solve for U block.
00194          CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD,
00195      $        KCOLS, ONE, A( KSTART, KSTART ), LDA,
00196      $        A( KSTART, J+1 ), LDA )
00197 !        Schur complement.
00198          CALL CGEMM( 'No transpose', 'No transpose', M-J,
00199      $        KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA,
00200      $        A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA )
00201       END DO
00202
00203 !     Handle pivot permutations on the way out of the recursion
00204       NPIVED = IAND( NSTEP, -NSTEP )
00205       J = NSTEP - NPIVED
00206       DO WHILE ( J .GT. 0 )
00207          NTOPIV = IAND( J, -J )
00208          CALL CLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP,
00209      $        IPIV, 1 )
00210          J = J - NTOPIV
00211       END DO
00212
00213 !     If short and wide, handle the rest of the columns.
00214       IF ( M .LT. N ) THEN
00215          CALL CLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 )
00216          CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M,
00217      $        N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA )
00218       END IF
00219
00220       RETURN
00221 *
00222 *     End of CGETRF
00223 *
00224       END