df/deb/clatrs3_8f_source.html

*> \brief \b CLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.

*

*  Definition:

*  ===========

*

*      SUBROUTINE CLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,

*                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO )

*

*       .. Scalar Arguments ..

*       CHARACTER          DIAG, NORMIN, TRANS, UPLO

*       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS

*       ..

*       .. Array Arguments ..

*       REAL               CNORM( * ), SCALE( * ), WORK( * )

*       COMPLEX            A( LDA, * ), X( LDX, * )

*       ..

*

*

*> \par Purpose:

*  =============

*>

*> \verbatim

*>

*> CLATRS3 solves one of the triangular systems

*>

*>    A * X = B * diag(scale),  A**T * X = B * diag(scale), or

*>    A**H * X = B * diag(scale)

*>

*> with scaling to prevent overflow.  Here A is an upper or lower

*> triangular matrix, A**T denotes the transpose of A, A**H denotes the

*> conjugate transpose of A. X and B are n-by-nrhs matrices and scale

*> is an nrhs-element vector of scaling factors. A scaling factor scale(j)

*> is usually less than or equal to 1, chosen such that X(:,j) is less

*> than the overflow threshold. If the matrix A is singular (A(j,j) = 0

*> for some j), then a non-trivial solution to A*X = 0 is returned. If

*> the system is so badly scaled that the solution cannot be represented

*> as (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.

*>

*> This is a BLAS-3 version of LATRS for solving several right

*> hand sides simultaneously.

*>

*> \endverbatim

*

*  Arguments:

*  ==========

*

*> \param[in] UPLO

*> \verbatim

*>          UPLO is CHARACTER*1

*>          Specifies whether the matrix A is upper or lower triangular.

*>          = 'U':  Upper triangular

*>          = 'L':  Lower triangular

*> \endverbatim

*>

*> \param[in] TRANS

*> \verbatim

*>          TRANS is CHARACTER*1

*>          Specifies the operation applied to A.

*>          = 'N':  Solve A * x = s*b  (No transpose)

*>          = 'T':  Solve A**T* x = s*b  (Transpose)

*>          = 'C':  Solve A**T* x = s*b  (Conjugate transpose)

*> \endverbatim

*>

*> \param[in] DIAG

*> \verbatim

*>          DIAG is CHARACTER*1

*>          Specifies whether or not the matrix A is unit triangular.

*>          = 'N':  Non-unit triangular

*>          = 'U':  Unit triangular

*> \endverbatim

*>

*> \param[in] NORMIN

*> \verbatim

*>          NORMIN is CHARACTER*1

*>          Specifies whether CNORM has been set or not.

*>          = 'Y':  CNORM contains the column norms on entry

*>          = 'N':  CNORM is not set on entry.  On exit, the norms will

*>                  be computed and stored in CNORM.

*> \endverbatim

*>

*> \param[in] N

*> \verbatim

*>          N is INTEGER

*>          The order of the matrix A.  N >= 0.

*> \endverbatim

*>

*> \param[in] NRHS

*> \verbatim

*>          NRHS is INTEGER

*>          The number of columns of X.  NRHS >= 0.

*> \endverbatim

*>

*> \param[in] A

*> \verbatim

*>          A is COMPLEX array, dimension (LDA,N)

*>          The triangular matrix A.  If UPLO = 'U', the leading n by n

*>          upper triangular part of the array A contains the upper

*>          triangular matrix, and the strictly lower triangular part of

*>          A is not referenced.  If UPLO = 'L', the leading n by n lower

*>          triangular part of the array A contains the lower triangular

*>          matrix, and the strictly upper triangular part of A is not

*>          referenced.  If DIAG = 'U', the diagonal elements of A are

*>          also not referenced and are assumed to be 1.

*> \endverbatim

*>

*> \param[in] LDA

*> \verbatim

*>          LDA is INTEGER

*>          The leading dimension of the array A.  LDA >= max (1,N).

*> \endverbatim

*>

*> \param[in,out] X

*> \verbatim

*>          X is COMPLEX array, dimension (LDX,NRHS)

*>          On entry, the right hand side B of the triangular system.

*>          On exit, X is overwritten by the solution matrix X.

*> \endverbatim

*>

*> \param[in] LDX

*> \verbatim

*>          LDX is INTEGER

*>          The leading dimension of the array X.  LDX >= max (1,N).

*> \endverbatim

*>

*> \param[out] SCALE

*> \verbatim

*>          SCALE is REAL array, dimension (NRHS)

*>          The scaling factor s(k) is for the triangular system

*>          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k).

*>          If SCALE = 0, the matrix A is singular or badly scaled.

*>          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)

*>          that is an exact or approximate solution to A*x(:,k) = 0

*>          is returned. If the system so badly scaled that solution

*>          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0

*>          is returned.

*> \endverbatim

*>

*> \param[in,out] CNORM

*> \verbatim

*>          CNORM is REAL array, dimension (N)

*>

*>          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)

*>          contains the norm of the off-diagonal part of the j-th column

*>          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal

*>          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)

*>          must be greater than or equal to the 1-norm.

*>

*>          If NORMIN = 'N', CNORM is an output argument and CNORM(j)

*>          returns the 1-norm of the offdiagonal part of the j-th column

*>          of A.

*> \endverbatim

*>

*> \param[out] WORK

*> \verbatim

*>          WORK is REAL array, dimension (MAX(1,LWORK)).

*>          On exit, if INFO = 0, WORK(1) returns the optimal size of

*>          WORK.

*> \endverbatim

*>

*> \param[in] LWORK

*> \verbatim

*>          LWORK is INTEGER

*>          The dimension of the array WORK.

*>

*>          If MIN(N,NRHS) = 0, LWORK >= 1, else

*>          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where

*>          NBA = (N + NB - 1)/NB and NB is the optimal block size.

*>

*>          If LWORK = -1, then a workspace query is assumed; the routine

*>          only calculates the optimal dimensions of the WORK array, returns

*>          this value as the first entry of the WORK array, and no error

*>          message related to LWORK is issued by XERBLA.

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>          = 0:  successful exit

*>          < 0:  if INFO = -k, the k-th argument had an illegal value

*> \endverbatim

*

*  Authors:

*  ========

*

*> \author Univ. of Tennessee

*> \author Univ. of California Berkeley

*> \author Univ. of Colorado Denver

*> \author NAG Ltd.

*

*> \ingroup latrs3

*> \par Further Details:

*  =====================

*  \verbatim

*  The algorithm follows the structure of a block triangular solve.

*  The diagonal block is solved with a call to the robust the triangular

*  solver LATRS for every right-hand side RHS = 1, ..., NRHS

*     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),

*  where op( A ) = A or op( A ) = A**T or op( A ) = A**H.

*  The linear block updates operate on block columns of X,

*     B( I, K ) - op(A( I, J )) * X( J, K )

*  and use GEMM. To avoid overflow in the linear block update, the worst case

*  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed

*  such that

*     || s * B( I, RHS )||_oo

*   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold

*

*  Once all columns of a block column have been rescaled (BLAS-1), the linear

*  update is executed with GEMM without overflow.

*

*  To limit rescaling, local scale factors track the scaling of column segments.

*  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA

*  per right-hand side column RHS = 1, ..., NRHS. The global scale factor

*  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )

*  I = 1, ..., NBA.

*  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )

*  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The

*  linear update of potentially inconsistently scaled vector segments

*     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )

*  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,

*  if necessary, rescales the blocks prior to calling GEMM.

*

*  \endverbatim

*  =====================================================================

*  References:

*  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).

*  Parallel robust solution of triangular linear systems. Concurrency

*  and Computation: Practice and Experience, 31(19), e5064.

*

*  Contributor:

*   Angelika Schwarz, Umea University, Sweden.

*

*  =====================================================================


      SUBROUTINE clatrs3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,

     $                    X, LDX, SCALE, CNORM, WORK, LWORK, INFO )

      IMPLICIT NONE

*

*     .. Scalar Arguments ..

      CHARACTER          DIAG, TRANS, NORMIN, UPLO

      INTEGER            INFO, LDA, LWORK, LDX, N, NRHS

*     ..

*     .. Array Arguments ..

      COMPLEX            A( LDA, * ), X( LDX, * )

      REAL               CNORM( * ), SCALE( * ), WORK( * )

*     ..

*

*  =====================================================================

*

*     .. Parameters ..

      REAL               ZERO, ONE

      parameter( zero = 0.0e+0, one = 1.0e+0 )

      COMPLEX            CZERO, CONE

      parameter( czero = ( 0.0e+0, 0.0e+0 ) )

      parameter( cone = ( 1.0e+0, 0.0e+0 ) )

      INTEGER            NBMAX, NBMIN, NBRHS, NRHSMIN

      parameter( nrhsmin = 2, nbrhs = 32 )

      parameter( nbmin = 8, nbmax = 64 )

*     ..

*     .. Local Arrays ..

      REAL               W( NBMAX ), XNRM( NBRHS )

*     ..

*     .. Local Scalars ..

      LOGICAL            LQUERY, NOTRAN, NOUNIT, UPPER

      INTEGER            AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,

     $                   jfirst, jinc, jlast, j1, j2, k, kk, k1, k2,

     $                   lanrm, lds, lscale, nb, nba, nbx, rhs, lwmin

      REAL               ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,

     $                   scamin, smlnum, tmax

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            ILAENV

      REAL               SLAMCH, CLANGE, SLARMM,

     $                   sroundup_lwork

      EXTERNAL           ilaenv, lsame, slamch,

     $                   clange, slarmm, sroundup_lwork

*     ..

*     .. External Subroutines ..

      EXTERNAL           clatrs, csscal, xerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, max, min

*     ..

*     .. Executable Statements ..

*

      info = 0

      upper = lsame( uplo, 'U' )

      notran = lsame( trans, 'N' )

      nounit = lsame( diag, 'N' )

      lquery = ( lwork.EQ.-1 )

*

*     Partition A and X into blocks.

*

      nb = max( nbmin, ilaenv( 1, 'CLATRS', '', n, n, -1, -1 ) )

      nb = min( nbmax, nb )

      nba = max( 1, (n + nb - 1) / nb )

      nbx = max( 1, (nrhs + nbrhs - 1) / nbrhs )

*

*     Compute the workspace

*

*     The workspace comprises two parts.

*     The first part stores the local scale factors. Each simultaneously

*     computed right-hand side requires one local scale factor per block

*     row. WORK( I + KK * LDS ) is the scale factor of the vector

*     segment associated with the I-th block row and the KK-th vector

*     in the block column.

*

      lscale = nba * max( nba, min( nrhs, nbrhs ) )

      lds = nba

*

*     The second part stores upper bounds of the triangular A. There are

*     a total of NBA x NBA blocks, of which only the upper triangular

*     part or the lower triangular part is referenced. The upper bound of

*     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).

*

      lanrm = nba * nba

      awrk = lscale

*

      IF( min( n, nrhs ).EQ.0 ) THEN

         lwmin = 1

      ELSE

         lwmin = lscale + lanrm

      END IF

      work( 1 ) = sroundup_lwork( lwmin )

*

*     Test the input parameters.

*

      IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN

         info = -1

      ELSE IF( .NOT.notran .AND. .NOT.lsame( trans, 'T' ) .AND. .NOT.

     $         lsame( trans, 'C' ) ) THEN

         info = -2

      ELSE IF( .NOT.nounit .AND. .NOT.lsame( diag, 'U' ) ) THEN

         info = -3

      ELSE IF( .NOT.lsame( normin, 'Y' ) .AND. .NOT.

     $         lsame( normin, 'N' ) ) THEN

         info = -4

      ELSE IF( n.LT.0 ) THEN

         info = -5

      ELSE IF( nrhs.LT.0 ) THEN

         info = -6

      ELSE IF( lda.LT.max( 1, n ) ) THEN

         info = -8

      ELSE IF( ldx.LT.max( 1, n ) ) THEN

         info = -10

      ELSE IF( .NOT.lquery .AND. lwork.LT.lwmin ) THEN

         info = -14

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'CLATRS3', -info )

         RETURN

      ELSE IF( lquery ) THEN

         RETURN

      END IF

*

*     Initialize scaling factors

*

      DO kk = 1, nrhs

         scale( kk ) = one

      END DO

*

*     Quick return if possible

*

      IF( min( n, nrhs ).EQ.0 )

     $   RETURN

*

*     Determine machine dependent constant to control overflow.

*

      bignum = slamch( 'Overflow' )

      smlnum = slamch( 'Safe Minimum' )

*

*     Use unblocked code for small problems

*

      IF( nrhs.LT.nrhsmin ) THEN

         CALL clatrs( uplo, trans, diag, normin, n, a, lda, x( 1,

     $                1 ),

     $                scale( 1 ), cnorm, info )

         DO k = 2, nrhs

            CALL clatrs( uplo, trans, diag, 'Y', n, a, lda, x( 1,

     $                   k ),

     $                   scale( k ), cnorm, info )

         END DO

         RETURN

      END IF

*

*     Compute norms of blocks of A excluding diagonal blocks and find

*     the block with the largest norm TMAX.

*

      tmax = zero

      DO j = 1, nba

         j1 = (j-1)*nb + 1

         j2 = min( j*nb, n ) + 1

         IF ( upper ) THEN

            ifirst = 1

            ilast = j - 1

         ELSE

            ifirst = j + 1

            ilast = nba

         END IF

         DO i = ifirst, ilast

            i1 = (i-1)*nb + 1

            i2 = min( i*nb, n ) + 1

*

*           Compute upper bound of A( I1:I2-1, J1:J2-1 ).

*

            IF( notran ) THEN

               anrm = clange( 'I', i2-i1, j2-j1, a( i1, j1 ), lda,

     $                        w )

               work( awrk + i+(j-1)*nba ) = anrm

            ELSE

               anrm = clange( '1', i2-i1, j2-j1, a( i1, j1 ), lda,

     $                        w )

               work( awrk + j+(i-1)*nba ) = anrm

            END IF

            tmax = max( tmax, anrm )

         END DO

      END DO

*

      IF( .NOT. tmax.LE.slamch('Overflow') ) THEN

*

*        Some matrix entries have huge absolute value. At least one upper

*        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point

*        number, either due to overflow in LANGE or due to Inf in A.

*        Fall back to LATRS. Set normin = 'N' for every right-hand side to

*        force computation of TSCAL in LATRS to avoid the likely overflow

*        in the computation of the column norms CNORM.

*

         DO k = 1, nrhs

            CALL clatrs( uplo, trans, diag, 'N', n, a, lda, x( 1,

     $                   k ),

     $                   scale( k ), cnorm, info )

         END DO

         RETURN

      END IF

*

*     Every right-hand side requires workspace to store NBA local scale

*     factors. To save workspace, X is computed successively in block columns

*     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient

*     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.

      DO k = 1, nbx

*        Loop over block columns (index = K) of X and, for column-wise scalings,

*        over individual columns (index = KK).

*        K1: column index of the first column in X( J, K )

*        K2: column index of the first column in X( J, K+1 )

*        so the K2 - K1 is the column count of the block X( J, K )

         k1 = (k-1)*nbrhs + 1

         k2 = min( k*nbrhs, nrhs ) + 1

*

*        Initialize local scaling factors of current block column X( J, K )

*

         DO kk = 1, k2-k1

            DO i = 1, nba

               work( i+kk*lds ) = one

            END DO

         END DO

*

         IF( notran ) THEN

*

*           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))

*

            IF( upper ) THEN

               jfirst = nba

               jlast = 1

               jinc = -1

            ELSE

               jfirst = 1

               jlast = nba

               jinc = 1

            END IF

         ELSE

*

*           Solve op(A) * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))

*           where op(A) = A**T or op(A) = A**H

*

            IF( upper ) THEN

               jfirst = 1

               jlast = nba

               jinc = 1

            ELSE

               jfirst = nba

               jlast = 1

               jinc = -1

            END IF

         END IF


         DO j = jfirst, jlast, jinc

*           J1: row index of the first row in A( J, J )

*           J2: row index of the first row in A( J+1, J+1 )

*           so that J2 - J1 is the row count of the block A( J, J )

            j1 = (j-1)*nb + 1

            j2 = min( j*nb, n ) + 1

*

*           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )

*

            DO kk = 1, k2-k1

               rhs = k1 + kk - 1

               IF( kk.EQ.1 ) THEN

                  CALL clatrs( uplo, trans, diag, 'N', j2-j1,

     $                         a( j1, j1 ), lda, x( j1, rhs ),

     $                         scaloc, cnorm, info )

               ELSE

                  CALL clatrs( uplo, trans, diag, 'Y', j2-j1,

     $                         a( j1, j1 ), lda, x( j1, rhs ),

     $                         scaloc, cnorm, info )

               END IF

*              Find largest absolute value entry in the vector segment

*              X( J1:J2-1, RHS ) as an upper bound for the worst case

*              growth in the linear updates.

               xnrm( kk ) = clange( 'I', j2-j1, 1, x( j1, rhs ),

     $                              ldx, w )

*

               IF( scaloc .EQ. zero ) THEN

*                 LATRS found that A is singular through A(j,j) = 0.

*                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0

*                 and compute op(A)*x = 0. Note that X(J1:J2-1, KK) is

*                 set by LATRS.

                  scale( rhs ) = zero

                  DO ii = 1, j1-1

                     x( ii, kk ) = czero

                  END DO

                  DO ii = j2, n

                     x( ii, kk ) = czero

                  END DO

*                 Discard the local scale factors.

                  DO ii = 1, nba

                     work( ii+kk*lds ) = one

                  END DO

                  scaloc = one

               ELSE IF( scaloc*work( j+kk*lds ) .EQ. zero ) THEN

*                 LATRS computed a valid scale factor, but combined with

*                 the current scaling the solution does not have a

*                 scale factor > 0.

*

*                 Set WORK( J+KK*LDS ) to smallest valid scale

*                 factor and increase SCALOC accordingly.

                  scal = work( j+kk*lds ) / smlnum

                  scaloc = scaloc * scal

                  work( j+kk*lds ) = smlnum

*                 If LATRS overestimated the growth, x may be

*                 rescaled to preserve a valid combined scale

*                 factor WORK( J, KK ) > 0.

                  rscal = one / scaloc

                  IF( xnrm( kk )*rscal .LE. bignum ) THEN

                     xnrm( kk ) = xnrm( kk ) * rscal

                     CALL csscal( j2-j1, rscal, x( j1, rhs ), 1 )

                     scaloc = one

                  ELSE

*                    The system op(A) * x = b is badly scaled and its

*                    solution cannot be represented as (1/scale) * x.

*                    Set x to zero. This approach deviates from LATRS

*                    where a completely meaningless non-zero vector

*                    is returned that is not a solution to op(A) * x = b.

                     scale( rhs ) = zero

                     DO ii = 1, n

                        x( ii, kk ) = czero

                     END DO

*                    Discard the local scale factors.

                     DO ii = 1, nba

                        work( ii+kk*lds ) = one

                     END DO

                     scaloc = one

                  END IF

               END IF

               scaloc = scaloc * work( j+kk*lds )

               work( j+kk*lds ) = scaloc

            END DO

*

*           Linear block updates

*

            IF( notran ) THEN

               IF( upper ) THEN

                  ifirst = j - 1

                  ilast = 1

                  iinc = -1

               ELSE

                  ifirst = j + 1

                  ilast = nba

                  iinc = 1

               END IF

            ELSE

               IF( upper ) THEN

                  ifirst = j + 1

                  ilast = nba

                  iinc = 1

               ELSE

                  ifirst = j - 1

                  ilast = 1

                  iinc = -1

               END IF

            END IF

*

            DO i = ifirst, ilast, iinc

*              I1: row index of the first column in X( I, K )

*              I2: row index of the first column in X( I+1, K )

*              so the I2 - I1 is the row count of the block X( I, K )

               i1 = (i-1)*nb + 1

               i2 = min( i*nb, n ) + 1

*

*              Prepare the linear update to be executed with GEMM.

*              For each column, compute a consistent scaling, a

*              scaling factor to survive the linear update, and

*              rescale the column segments, if necessary. Then

*              the linear update is safely executed.

*

               DO kk = 1, k2-k1

                  rhs = k1 + kk - 1

*                 Compute consistent scaling

                  scamin = min( work( i+kk*lds), work( j+kk*lds ) )

*

*                 Compute scaling factor to survive the linear update

*                 simulating consistent scaling.

*

                  bnrm = clange( 'I', i2-i1, 1, x( i1, rhs ), ldx,

     $                           w )

                  bnrm = bnrm*( scamin / work( i+kk*lds ) )

                  xnrm( kk ) = xnrm( kk )*( scamin / work( j+kk*lds) )

                  anrm = work( awrk + i+(j-1)*nba )

                  scaloc = slarmm( anrm, xnrm( kk ), bnrm )

*

*                 Simultaneously apply the robust update factor and the

*                 consistency scaling factor to X( I, KK ) and X( J, KK ).

*

                  scal = ( scamin / work( i+kk*lds) )*scaloc

                  IF( scal.NE.one ) THEN

                     CALL csscal( i2-i1, scal, x( i1, rhs ), 1 )

                     work( i+kk*lds ) = scamin*scaloc

                  END IF

*

                  scal = ( scamin / work( j+kk*lds ) )*scaloc

                  IF( scal.NE.one ) THEN

                     CALL csscal( j2-j1, scal, x( j1, rhs ), 1 )

                     work( j+kk*lds ) = scamin*scaloc

                  END IF

               END DO

*

               IF( notran ) THEN

*

*                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )

*

                  CALL cgemm( 'N', 'N', i2-i1, k2-k1, j2-j1, -cone,

     $                        a( i1, j1 ), lda, x( j1, k1 ), ldx,

     $                        cone, x( i1, k1 ), ldx )

               ELSE IF( lsame( trans, 'T' ) ) THEN

*

*                 B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )

*

                  CALL cgemm( 'T', 'N', i2-i1, k2-k1, j2-j1, -cone,

     $                        a( j1, i1 ), lda, x( j1, k1 ), ldx,

     $                        cone, x( i1, k1 ), ldx )

               ELSE

*

*                 B( I, K ) := B( I, K ) - A( I, J )**H * X( J, K )

*

                  CALL cgemm( 'C', 'N', i2-i1, k2-k1, j2-j1, -cone,

     $                        a( j1, i1 ), lda, x( j1, k1 ), ldx,

     $                        cone, x( i1, k1 ), ldx )

               END IF

            END DO

         END DO

*

*        Reduce local scaling factors

*

         DO kk = 1, k2-k1

            rhs = k1 + kk - 1

            DO i = 1, nba

               scale( rhs ) = min( scale( rhs ), work( i+kk*lds ) )

            END DO

         END DO

*

*        Realize consistent scaling

*

         DO kk = 1, k2-k1

            rhs = k1 + kk - 1

            IF( scale( rhs ).NE.one .AND. scale( rhs ).NE. zero ) THEN

               DO i = 1, nba

                  i1 = (i-1)*nb + 1

                  i2 = min( i*nb, n ) + 1

                  scal = scale( rhs ) / work( i+kk*lds )

                  IF( scal.NE.one )

     $               CALL csscal( i2-i1, scal, x( i1, rhs ), 1 )

               END DO

            END IF

         END DO

      END DO

*

      work( 1 ) = sroundup_lwork( lwmin )

*

      RETURN

*

*     End of CLATRS3

*

      SUBROUTINE clatrs3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA, …

      END

xerbla
subroutine xerbla(srname, info)
Definition cblat2.f:3285

cgemm
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188

clatrs3
subroutine clatrs3(uplo, trans, diag, normin, n, nrhs, a, lda, x, ldx, scale, cnorm, work, lwork, info)
CLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
Definition clatrs3.f:235

clatrs
subroutine clatrs(uplo, trans, diag, normin, n, a, lda, x, scale, cnorm, info)
CLATRS solves a triangular system of equations with the scale factor set to prevent overflow.
Definition clatrs.f:238

sroundup_lwork
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
Definition sroundup_lwork.f:59

csscal
subroutine csscal(n, sa, cx, incx)
CSSCAL
Definition csscal.f:78