dc/d59/_v_a_r_i_a_n_t_s_2lu_2_r_e_c_2sgetrf_8f_source.html

 C> \brief \b SGETRF VARIANT: iterative version of Sivan Toledo's recursive LU algorithm

 *

 *  =========== DOCUMENTATION ===========

 *

 * Online html documentation available at

 *            http://www.netlib.org/lapack/explore-html/

 *

 *  Definition:

 *  ===========

 *

 *       SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO )

 *

 *       .. Scalar Arguments ..

 *       INTEGER            INFO, LDA, M, N

 *       ..

 *       .. Array Arguments ..

 *       INTEGER            IPIV( * )

 *       REAL               A( LDA, * )

 *       ..

 *

 *  Purpose

 *  =======

 *

 C>\details \b Purpose:

 C>\verbatim

 C>

 C> SGETRF computes an LU factorization of a general M-by-N matrix A

 C> using partial pivoting with row interchanges.

 C>

 C> The factorization has the form

 C>    A = P * L * U

 C> where P is a permutation matrix, L is lower triangular with unit

 C> diagonal elements (lower trapezoidal if m > n), and U is upper

 C> triangular (upper trapezoidal if m < n).

 C>

 C> This code implements an iterative version of Sivan Toledo's recursive

 C> LU algorithm[1].  For square matrices, this iterative versions should

 C> be within a factor of two of the optimum number of memory transfers.

 C>

 C> The pattern is as follows, with the large blocks of U being updated

 C> in one call to STRSM, and the dotted lines denoting sections that

 C> have had all pending permutations applied:

 C>

 C>  1 2 3 4 5 6 7 8

 C> +-+-+---+-------+------

 C> | |1|   |       |

 C> |.+-+ 2 |       |

 C> | | |   |       |

 C> |.|.+-+-+   4   |

 C> | | | |1|       |

 C> | | |.+-+       |

 C> | | | | |       |

 C> |.|.|.|.+-+-+---+  8

 C> | | | | | |1|   |

 C> | | | | |.+-+ 2 |

 C> | | | | | | |   |

 C> | | | | |.|.+-+-+

 C> | | | | | | | |1|

 C> | | | | | | |.+-+

 C> | | | | | | | | |

 C> |.|.|.|.|.|.|.|.+-----

 C> | | | | | | | | |

 C>

 C> The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in

 C> the binary expansion of the current column.  Each Schur update is

 C> applied as soon as the necessary portion of U is available.

 C>

 C> [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with

 C> Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),

 C> 1065-1081. http://dx.doi.org/10.1137/S0895479896297744

 C>

 C>\endverbatim

 *

 *  Arguments:

 *  ==========

 *

 C> \param[in] M

 C> \verbatim

 C>          M is INTEGER

 C>          The number of rows of the matrix A.  M >= 0.

 C> \endverbatim

 C>

 C> \param[in] N

 C> \verbatim

 C>          N is INTEGER

 C>          The number of columns of the matrix A.  N >= 0.

 C> \endverbatim

 C>

 C> \param[in,out] A

 C> \verbatim

 C>          A is REAL array, dimension (LDA,N)

 C>          On entry, the M-by-N matrix to be factored.

 C>          On exit, the factors L and U from the factorization

 C>          A = P*L*U; the unit diagonal elements of L are not stored.

 C> \endverbatim

 C>

 C> \param[in] LDA

 C> \verbatim

 C>          LDA is INTEGER

 C>          The leading dimension of the array A.  LDA >= max(1,M).

 C> \endverbatim

 C>

 C> \param[out] IPIV

 C> \verbatim

 C>          IPIV is INTEGER array, dimension (min(M,N))

 C>          The pivot indices; for 1 <= i <= min(M,N), row i of the

 C>          matrix was interchanged with row IPIV(i).

 C> \endverbatim

 C>

 C> \param[out] INFO

 C> \verbatim

 C>          INFO is INTEGER

 C>          = 0:  successful exit

 C>          < 0:  if INFO = -i, the i-th argument had an illegal value

 C>          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization

 C>                has been completed, but the factor U is exactly

 C>                singular, and division by zero will occur if it is used

 C>                to solve a system of equations.

 C> \endverbatim

 C>

 *

 *  Authors:

 *  ========

 *

 C> \author Univ. of Tennessee

 C> \author Univ. of California Berkeley

 C> \author Univ. of Colorado Denver

 C> \author NAG Ltd.

 *

 C> \date November 2011

 *

 C> \ingroup variantsGEcomputational

 *

 *  =====================================================================

       SUBROUTINE sgetrf( M, N, A, LDA, IPIV, INFO )

 *

 *  -- LAPACK computational routine (version 3.X) --

 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

 *     November 2011

 *

 *     .. Scalar Arguments ..

       INTEGER            INFO, LDA, M, N

 *     ..

 *     .. Array Arguments ..

       INTEGER            IPIV( * )

       REAL               A( lda, * )

 *     ..

 *

 *  =====================================================================

 *

 *     .. Parameters ..

       REAL               ONE, ZERO, NEGONE

       parameter                ( one = 1.0e+0, zero = 0.0e+0 )

       parameter                ( negone = -1.0e+0 )

 *     ..

 *     .. Local Scalars ..

       REAL               SFMIN, TMP

       INTEGER            I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD

       INTEGER            KSTART, IPIVSTART, JPIVSTART, KCOLS

 *     ..

 *     .. External Functions ..

       REAL               SLAMCH

       INTEGER            ISAMAX

       LOGICAL            SISNAN

       EXTERNAL           slamch, isamax, sisnan

 *     ..

 *     .. External Subroutines ..

       EXTERNAL           strsm, sscal, xerbla, slaswp

 *     ..

 *     .. Intrinsic Functions ..

       INTRINSIC          max, min, iand

 *     ..

 *     .. Executable Statements ..

 *

 *     Test the input parameters.

 *

       info = 0

       IF( m.LT.0 ) THEN

          info = -1

       ELSE IF( n.LT.0 ) THEN

          info = -2

       ELSE IF( lda.LT.max( 1, m ) ) THEN

          info = -4

       END IF

       IF( info.NE.0 ) THEN

          CALL xerbla( 'SGETRF', -info )

          RETURN

       END IF

 *

 *     Quick return if possible

 *

       IF( m.EQ.0 .OR. n.EQ.0 )

      $   RETURN

 *

 *     Compute machine safe minimum

 *

       sfmin = slamch( 'S' )

 *

       nstep = min( m, n )

       DO j = 1, nstep

          kahead = iand( j, -j )

          kstart = j + 1 - kahead

          kcols = min( kahead, m-j )

 *

 *        Find pivot.

 *

          jp = j - 1 + isamax( m-j+1, a( j, j ), 1 )

          ipiv( j ) = jp


 !        Permute just this column.

          IF (jp .NE. j) THEN

             tmp = a( j, j )

             a( j, j ) = a( jp, j )

             a( jp, j ) = tmp

          END IF


 !        Apply pending permutations to L

          ntopiv = 1

          ipivstart = j

          jpivstart = j - ntopiv

          DO WHILE ( ntopiv .LT. kahead )

             CALL slaswp( ntopiv, a( 1, jpivstart ), lda, ipivstart, j,

      $           ipiv, 1 )

             ipivstart = ipivstart - ntopiv;

             ntopiv = ntopiv * 2;

             jpivstart = jpivstart - ntopiv;

          END DO


 !        Permute U block to match L

          CALL slaswp( kcols, a( 1,j+1 ), lda, kstart, j, ipiv, 1 )


 !        Factor the current column

          IF( a( j, j ).NE.zero .AND. .NOT.sisnan( a( j, j ) ) ) THEN

                IF( abs(a( j, j )) .GE. sfmin ) THEN

                   CALL sscal( m-j, one / a( j, j ), a( j+1, j ), 1 )

                ELSE

                  DO i = 1, m-j

                     a( j+i, j ) = a( j+i, j ) / a( j, j )

                  END DO

                END IF

          ELSE IF( a( j,j ) .EQ. zero .AND. info .EQ. 0 ) THEN

             info = j

          END IF


 !        Solve for U block.

          CALL strsm( 'Left', 'Lower', 'No transpose', 'Unit', kahead,

      $        kcols, one, a( kstart, kstart ), lda,

      $        a( kstart, j+1 ), lda )

 !        Schur complement.

          CALL sgemm( 'No transpose', 'No transpose', m-j,

      $        kcols, kahead, negone, a( j+1, kstart ), lda,

      $        a( kstart, j+1 ), lda, one, a( j+1, j+1 ), lda )

       END DO


 !     Handle pivot permutations on the way out of the recursion

       npived = iand( nstep, -nstep )

       j = nstep - npived

       DO WHILE ( j .GT. 0 )

          ntopiv = iand( j, -j )

          CALL slaswp( ntopiv, a( 1, j-ntopiv+1 ), lda, j+1, nstep,

      $        ipiv, 1 )

          j = j - ntopiv

       END DO


 !     If short and wide, handle the rest of the columns.

       IF ( m .LT. n ) THEN

          CALL slaswp( n-m, a( 1, m+kcols+1 ), lda, 1, m, ipiv, 1 )

          CALL strsm( 'Left', 'Lower', 'No transpose', 'Unit', m,

      $        n-m, one, a, lda, a( 1,m+kcols+1 ), lda )

       END IF


       RETURN

 *

 *     End of SGETRF

 *

       END

strsm
subroutine strsm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
STRSM
Definition: strsm.f:183

sgemm
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:189

xerbla
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62

slaswp
subroutine slaswp(N, A, LDA, K1, K2, IPIV, INCX)
SLASWP performs a series of row interchanges on a general rectangular matrix.
Definition: slaswp.f:116

sgetrf
subroutine sgetrf(M, N, A, LDA, IPIV, INFO)
SGETRF
Definition: sgetrf.f:110

sscal
subroutine sscal(N, SA, SX, INCX)
SSCAL
Definition: sscal.f:55