LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
clatrs3.f
Go to the documentation of this file.
1*> \brief \b CLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
2*
3* Definition:
4* ===========
5*
6* SUBROUTINE CLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
7* X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
8*
9* .. Scalar Arguments ..
10* CHARACTER DIAG, NORMIN, TRANS, UPLO
11* INTEGER INFO, LDA, LWORK, LDX, N, NRHS
12* ..
13* .. Array Arguments ..
14* REAL CNORM( * ), SCALE( * ), WORK( * )
15* COMPLEX A( LDA, * ), X( LDX, * )
16* ..
17*
18*
19*> \par Purpose:
20* =============
21*>
22*> \verbatim
23*>
24*> CLATRS3 solves one of the triangular systems
25*>
26*> A * X = B * diag(scale), A**T * X = B * diag(scale), or
27*> A**H * X = B * diag(scale)
28*>
29*> with scaling to prevent overflow. Here A is an upper or lower
30*> triangular matrix, A**T denotes the transpose of A, A**H denotes the
31*> conjugate transpose of A. X and B are n-by-nrhs matrices and scale
32*> is an nrhs-element vector of scaling factors. A scaling factor scale(j)
33*> is usually less than or equal to 1, chosen such that X(:,j) is less
34*> than the overflow threshold. If the matrix A is singular (A(j,j) = 0
35*> for some j), then a non-trivial solution to A*X = 0 is returned. If
36*> the system is so badly scaled that the solution cannot be represented
37*> as (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
38*>
39*> This is a BLAS-3 version of LATRS for solving several right
40*> hand sides simultaneously.
41*>
42*> \endverbatim
43*
44* Arguments:
45* ==========
46*
47*> \param[in] UPLO
48*> \verbatim
49*> UPLO is CHARACTER*1
50*> Specifies whether the matrix A is upper or lower triangular.
51*> = 'U': Upper triangular
52*> = 'L': Lower triangular
53*> \endverbatim
54*>
55*> \param[in] TRANS
56*> \verbatim
57*> TRANS is CHARACTER*1
58*> Specifies the operation applied to A.
59*> = 'N': Solve A * x = s*b (No transpose)
60*> = 'T': Solve A**T* x = s*b (Transpose)
61*> = 'C': Solve A**T* x = s*b (Conjugate transpose)
62*> \endverbatim
63*>
64*> \param[in] DIAG
65*> \verbatim
66*> DIAG is CHARACTER*1
67*> Specifies whether or not the matrix A is unit triangular.
68*> = 'N': Non-unit triangular
69*> = 'U': Unit triangular
70*> \endverbatim
71*>
72*> \param[in] NORMIN
73*> \verbatim
74*> NORMIN is CHARACTER*1
75*> Specifies whether CNORM has been set or not.
76*> = 'Y': CNORM contains the column norms on entry
77*> = 'N': CNORM is not set on entry. On exit, the norms will
78*> be computed and stored in CNORM.
79*> \endverbatim
80*>
81*> \param[in] N
82*> \verbatim
83*> N is INTEGER
84*> The order of the matrix A. N >= 0.
85*> \endverbatim
86*>
87*> \param[in] NRHS
88*> \verbatim
89*> NRHS is INTEGER
90*> The number of columns of X. NRHS >= 0.
91*> \endverbatim
92*>
93*> \param[in] A
94*> \verbatim
95*> A is COMPLEX array, dimension (LDA,N)
96*> The triangular matrix A. If UPLO = 'U', the leading n by n
97*> upper triangular part of the array A contains the upper
98*> triangular matrix, and the strictly lower triangular part of
99*> A is not referenced. If UPLO = 'L', the leading n by n lower
100*> triangular part of the array A contains the lower triangular
101*> matrix, and the strictly upper triangular part of A is not
102*> referenced. If DIAG = 'U', the diagonal elements of A are
103*> also not referenced and are assumed to be 1.
104*> \endverbatim
105*>
106*> \param[in] LDA
107*> \verbatim
108*> LDA is INTEGER
109*> The leading dimension of the array A. LDA >= max (1,N).
110*> \endverbatim
111*>
112*> \param[in,out] X
113*> \verbatim
114*> X is COMPLEX array, dimension (LDX,NRHS)
115*> On entry, the right hand side B of the triangular system.
116*> On exit, X is overwritten by the solution matrix X.
117*> \endverbatim
118*>
119*> \param[in] LDX
120*> \verbatim
121*> LDX is INTEGER
122*> The leading dimension of the array X. LDX >= max (1,N).
123*> \endverbatim
124*>
125*> \param[out] SCALE
126*> \verbatim
127*> SCALE is REAL array, dimension (NRHS)
128*> The scaling factor s(k) is for the triangular system
129*> A * x(:,k) = s(k)*b(:,k) or A**T* x(:,k) = s(k)*b(:,k).
130*> If SCALE = 0, the matrix A is singular or badly scaled.
131*> If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
132*> that is an exact or approximate solution to A*x(:,k) = 0
133*> is returned. If the system so badly scaled that solution
134*> cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
135*> is returned.
136*> \endverbatim
137*>
138*> \param[in,out] CNORM
139*> \verbatim
140*> CNORM is REAL array, dimension (N)
141*>
142*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
143*> contains the norm of the off-diagonal part of the j-th column
144*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal
145*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
146*> must be greater than or equal to the 1-norm.
147*>
148*> If NORMIN = 'N', CNORM is an output argument and CNORM(j)
149*> returns the 1-norm of the offdiagonal part of the j-th column
150*> of A.
151*> \endverbatim
152*>
153*> \param[out] WORK
154*> \verbatim
155*> WORK is REAL array, dimension (LWORK).
156*> On exit, if INFO = 0, WORK(1) returns the optimal size of
157*> WORK.
158*> \endverbatim
159*>
160*> \param[in] LWORK
161*> LWORK is INTEGER
162*> LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
163*> NBA = (N + NB - 1)/NB and NB is the optimal block size.
164*>
165*> If LWORK = -1, then a workspace query is assumed; the routine
166*> only calculates the optimal dimensions of the WORK array, returns
167*> this value as the first entry of the WORK array, and no error
168*> message related to LWORK is issued by XERBLA.
169*>
170*> \param[out] INFO
171*> \verbatim
172*> INFO is INTEGER
173*> = 0: successful exit
174*> < 0: if INFO = -k, the k-th argument had an illegal value
175*> \endverbatim
176*
177* Authors:
178* ========
179*
180*> \author Univ. of Tennessee
181*> \author Univ. of California Berkeley
182*> \author Univ. of Colorado Denver
183*> \author NAG Ltd.
184*
185*> \ingroup latrs3
186*> \par Further Details:
187* =====================
188* \verbatim
189* The algorithm follows the structure of a block triangular solve.
190* The diagonal block is solved with a call to the robust the triangular
191* solver LATRS for every right-hand side RHS = 1, ..., NRHS
192* op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
193* where op( A ) = A or op( A ) = A**T or op( A ) = A**H.
194* The linear block updates operate on block columns of X,
195* B( I, K ) - op(A( I, J )) * X( J, K )
196* and use GEMM. To avoid overflow in the linear block update, the worst case
197* growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
198* such that
199* || s * B( I, RHS )||_oo
200* + || op(A( I, J )) ||_oo * || s * X( J, RHS ) ||_oo <= Overflow threshold
201*
202* Once all columns of a block column have been rescaled (BLAS-1), the linear
203* update is executed with GEMM without overflow.
204*
205* To limit rescaling, local scale factors track the scaling of column segments.
206* There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
207* per right-hand side column RHS = 1, ..., NRHS. The global scale factor
208* SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
209* I = 1, ..., NBA.
210* A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
211* updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
212* linear update of potentially inconsistently scaled vector segments
213* s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
214* computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
215* if necessary, rescales the blocks prior to calling GEMM.
216*
217* \endverbatim
218* =====================================================================
219* References:
220* C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
221* Parallel robust solution of triangular linear systems. Concurrency
222* and Computation: Practice and Experience, 31(19), e5064.
223*
224* Contributor:
225* Angelika Schwarz, Umea University, Sweden.
226*
227* =====================================================================
228 SUBROUTINE clatrs3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
229 $ X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
230 IMPLICIT NONE
231*
232* .. Scalar Arguments ..
233 CHARACTER DIAG, TRANS, NORMIN, UPLO
234 INTEGER INFO, LDA, LWORK, LDX, N, NRHS
235* ..
236* .. Array Arguments ..
237 COMPLEX A( LDA, * ), X( LDX, * )
238 REAL CNORM( * ), SCALE( * ), WORK( * )
239* ..
240*
241* =====================================================================
242*
243* .. Parameters ..
244 REAL ZERO, ONE
245 parameter( zero = 0.0e+0, one = 1.0e+0 )
246 COMPLEX CZERO, CONE
247 parameter( czero = ( 0.0e+0, 0.0e+0 ) )
248 parameter( cone = ( 1.0e+0, 0.0e+0 ) )
249 INTEGER NBMAX, NBMIN, NBRHS, NRHSMIN
250 parameter( nrhsmin = 2, nbrhs = 32 )
251 parameter( nbmin = 8, nbmax = 64 )
252* ..
253* .. Local Arrays ..
254 REAL W( NBMAX ), XNRM( NBRHS )
255* ..
256* .. Local Scalars ..
257 LOGICAL LQUERY, NOTRAN, NOUNIT, UPPER
258 INTEGER AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
259 $ jfirst, jinc, jlast, j1, j2, k, kk, k1, k2,
260 $ lanrm, lds, lscale, nb, nba, nbx, rhs
261 REAL ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
262 $ scamin, smlnum, tmax
263* ..
264* .. External Functions ..
265 LOGICAL LSAME
266 INTEGER ILAENV
267 REAL SLAMCH, CLANGE, SLARMM
268 EXTERNAL ilaenv, lsame, slamch, clange, slarmm
269* ..
270* .. External Subroutines ..
271 EXTERNAL clatrs, csscal, xerbla
272* ..
273* .. Intrinsic Functions ..
274 INTRINSIC abs, max, min
275* ..
276* .. Executable Statements ..
277*
278 info = 0
279 upper = lsame( uplo, 'U' )
280 notran = lsame( trans, 'N' )
281 nounit = lsame( diag, 'N' )
282 lquery = ( lwork.EQ.-1 )
283*
284* Partition A and X into blocks.
285*
286 nb = max( nbmin, ilaenv( 1, 'CLATRS', '', n, n, -1, -1 ) )
287 nb = min( nbmax, nb )
288 nba = max( 1, (n + nb - 1) / nb )
289 nbx = max( 1, (nrhs + nbrhs - 1) / nbrhs )
290*
291* Compute the workspace
292*
293* The workspace comprises two parts.
294* The first part stores the local scale factors. Each simultaneously
295* computed right-hand side requires one local scale factor per block
296* row. WORK( I + KK * LDS ) is the scale factor of the vector
297* segment associated with the I-th block row and the KK-th vector
298* in the block column.
299 lscale = nba * max( nba, min( nrhs, nbrhs ) )
300 lds = nba
301* The second part stores upper bounds of the triangular A. There are
302* a total of NBA x NBA blocks, of which only the upper triangular
303* part or the lower triangular part is referenced. The upper bound of
304* the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
305 lanrm = nba * nba
306 awrk = lscale
307 work( 1 ) = lscale + lanrm
308*
309* Test the input parameters.
310*
311 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
312 info = -1
313 ELSE IF( .NOT.notran .AND. .NOT.lsame( trans, 'T' ) .AND. .NOT.
314 $ lsame( trans, 'C' ) ) THEN
315 info = -2
316 ELSE IF( .NOT.nounit .AND. .NOT.lsame( diag, 'U' ) ) THEN
317 info = -3
318 ELSE IF( .NOT.lsame( normin, 'Y' ) .AND. .NOT.
319 $ lsame( normin, 'N' ) ) THEN
320 info = -4
321 ELSE IF( n.LT.0 ) THEN
322 info = -5
323 ELSE IF( nrhs.LT.0 ) THEN
324 info = -6
325 ELSE IF( lda.LT.max( 1, n ) ) THEN
326 info = -8
327 ELSE IF( ldx.LT.max( 1, n ) ) THEN
328 info = -10
329 ELSE IF( .NOT.lquery .AND. lwork.LT.work( 1 ) ) THEN
330 info = -14
331 END IF
332 IF( info.NE.0 ) THEN
333 CALL xerbla( 'CLATRS3', -info )
334 RETURN
335 ELSE IF( lquery ) THEN
336 RETURN
337 END IF
338*
339* Initialize scaling factors
340*
341 DO kk = 1, nrhs
342 scale( kk ) = one
343 END DO
344*
345* Quick return if possible
346*
347 IF( min( n, nrhs ).EQ.0 )
348 $ RETURN
349*
350* Determine machine dependent constant to control overflow.
351*
352 bignum = slamch( 'Overflow' )
353 smlnum = slamch( 'Safe Minimum' )
354*
355* Use unblocked code for small problems
356*
357 IF( nrhs.LT.nrhsmin ) THEN
358 CALL clatrs( uplo, trans, diag, normin, n, a, lda, x( 1, 1 ),
359 $ scale( 1 ), cnorm, info )
360 DO k = 2, nrhs
361 CALL clatrs( uplo, trans, diag, 'Y', n, a, lda, x( 1, k ),
362 $ scale( k ), cnorm, info )
363 END DO
364 RETURN
365 END IF
366*
367* Compute norms of blocks of A excluding diagonal blocks and find
368* the block with the largest norm TMAX.
369*
370 tmax = zero
371 DO j = 1, nba
372 j1 = (j-1)*nb + 1
373 j2 = min( j*nb, n ) + 1
374 IF ( upper ) THEN
375 ifirst = 1
376 ilast = j - 1
377 ELSE
378 ifirst = j + 1
379 ilast = nba
380 END IF
381 DO i = ifirst, ilast
382 i1 = (i-1)*nb + 1
383 i2 = min( i*nb, n ) + 1
384*
385* Compute upper bound of A( I1:I2-1, J1:J2-1 ).
386*
387 IF( notran ) THEN
388 anrm = clange( 'I', i2-i1, j2-j1, a( i1, j1 ), lda, w )
389 work( awrk + i+(j-1)*nba ) = anrm
390 ELSE
391 anrm = clange( '1', i2-i1, j2-j1, a( i1, j1 ), lda, w )
392 work( awrk + j+(i-1)*nba ) = anrm
393 END IF
394 tmax = max( tmax, anrm )
395 END DO
396 END DO
397*
398 IF( .NOT. tmax.LE.slamch('Overflow') ) THEN
399*
400* Some matrix entries have huge absolute value. At least one upper
401* bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
402* number, either due to overflow in LANGE or due to Inf in A.
403* Fall back to LATRS. Set normin = 'N' for every right-hand side to
404* force computation of TSCAL in LATRS to avoid the likely overflow
405* in the computation of the column norms CNORM.
406*
407 DO k = 1, nrhs
408 CALL clatrs( uplo, trans, diag, 'N', n, a, lda, x( 1, k ),
409 $ scale( k ), cnorm, info )
410 END DO
411 RETURN
412 END IF
413*
414* Every right-hand side requires workspace to store NBA local scale
415* factors. To save workspace, X is computed successively in block columns
416* of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
417* workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
418 DO k = 1, nbx
419* Loop over block columns (index = K) of X and, for column-wise scalings,
420* over individual columns (index = KK).
421* K1: column index of the first column in X( J, K )
422* K2: column index of the first column in X( J, K+1 )
423* so the K2 - K1 is the column count of the block X( J, K )
424 k1 = (k-1)*nbrhs + 1
425 k2 = min( k*nbrhs, nrhs ) + 1
426*
427* Initialize local scaling factors of current block column X( J, K )
428*
429 DO kk = 1, k2-k1
430 DO i = 1, nba
431 work( i+kk*lds ) = one
432 END DO
433 END DO
434*
435 IF( notran ) THEN
436*
437* Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
438*
439 IF( upper ) THEN
440 jfirst = nba
441 jlast = 1
442 jinc = -1
443 ELSE
444 jfirst = 1
445 jlast = nba
446 jinc = 1
447 END IF
448 ELSE
449*
450* Solve op(A) * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
451* where op(A) = A**T or op(A) = A**H
452*
453 IF( upper ) THEN
454 jfirst = 1
455 jlast = nba
456 jinc = 1
457 ELSE
458 jfirst = nba
459 jlast = 1
460 jinc = -1
461 END IF
462 END IF
463
464 DO j = jfirst, jlast, jinc
465* J1: row index of the first row in A( J, J )
466* J2: row index of the first row in A( J+1, J+1 )
467* so that J2 - J1 is the row count of the block A( J, J )
468 j1 = (j-1)*nb + 1
469 j2 = min( j*nb, n ) + 1
470*
471* Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
472*
473 DO kk = 1, k2-k1
474 rhs = k1 + kk - 1
475 IF( kk.EQ.1 ) THEN
476 CALL clatrs( uplo, trans, diag, 'N', j2-j1,
477 $ a( j1, j1 ), lda, x( j1, rhs ),
478 $ scaloc, cnorm, info )
479 ELSE
480 CALL clatrs( uplo, trans, diag, 'Y', j2-j1,
481 $ a( j1, j1 ), lda, x( j1, rhs ),
482 $ scaloc, cnorm, info )
483 END IF
484* Find largest absolute value entry in the vector segment
485* X( J1:J2-1, RHS ) as an upper bound for the worst case
486* growth in the linear updates.
487 xnrm( kk ) = clange( 'I', j2-j1, 1, x( j1, rhs ),
488 $ ldx, w )
489*
490 IF( scaloc .EQ. zero ) THEN
491* LATRS found that A is singular through A(j,j) = 0.
492* Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
493* and compute op(A)*x = 0. Note that X(J1:J2-1, KK) is
494* set by LATRS.
495 scale( rhs ) = zero
496 DO ii = 1, j1-1
497 x( ii, kk ) = czero
498 END DO
499 DO ii = j2, n
500 x( ii, kk ) = czero
501 END DO
502* Discard the local scale factors.
503 DO ii = 1, nba
504 work( ii+kk*lds ) = one
505 END DO
506 scaloc = one
507 ELSE IF( scaloc*work( j+kk*lds ) .EQ. zero ) THEN
508* LATRS computed a valid scale factor, but combined with
509* the current scaling the solution does not have a
510* scale factor > 0.
511*
512* Set WORK( J+KK*LDS ) to smallest valid scale
513* factor and increase SCALOC accordingly.
514 scal = work( j+kk*lds ) / smlnum
515 scaloc = scaloc * scal
516 work( j+kk*lds ) = smlnum
517* If LATRS overestimated the growth, x may be
518* rescaled to preserve a valid combined scale
519* factor WORK( J, KK ) > 0.
520 rscal = one / scaloc
521 IF( xnrm( kk )*rscal .LE. bignum ) THEN
522 xnrm( kk ) = xnrm( kk ) * rscal
523 CALL csscal( j2-j1, rscal, x( j1, rhs ), 1 )
524 scaloc = one
525 ELSE
526* The system op(A) * x = b is badly scaled and its
527* solution cannot be represented as (1/scale) * x.
528* Set x to zero. This approach deviates from LATRS
529* where a completely meaningless non-zero vector
530* is returned that is not a solution to op(A) * x = b.
531 scale( rhs ) = zero
532 DO ii = 1, n
533 x( ii, kk ) = czero
534 END DO
535* Discard the local scale factors.
536 DO ii = 1, nba
537 work( ii+kk*lds ) = one
538 END DO
539 scaloc = one
540 END IF
541 END IF
542 scaloc = scaloc * work( j+kk*lds )
543 work( j+kk*lds ) = scaloc
544 END DO
545*
546* Linear block updates
547*
548 IF( notran ) THEN
549 IF( upper ) THEN
550 ifirst = j - 1
551 ilast = 1
552 iinc = -1
553 ELSE
554 ifirst = j + 1
555 ilast = nba
556 iinc = 1
557 END IF
558 ELSE
559 IF( upper ) THEN
560 ifirst = j + 1
561 ilast = nba
562 iinc = 1
563 ELSE
564 ifirst = j - 1
565 ilast = 1
566 iinc = -1
567 END IF
568 END IF
569*
570 DO i = ifirst, ilast, iinc
571* I1: row index of the first column in X( I, K )
572* I2: row index of the first column in X( I+1, K )
573* so the I2 - I1 is the row count of the block X( I, K )
574 i1 = (i-1)*nb + 1
575 i2 = min( i*nb, n ) + 1
576*
577* Prepare the linear update to be executed with GEMM.
578* For each column, compute a consistent scaling, a
579* scaling factor to survive the linear update, and
580* rescale the column segments, if necessary. Then
581* the linear update is safely executed.
582*
583 DO kk = 1, k2-k1
584 rhs = k1 + kk - 1
585* Compute consistent scaling
586 scamin = min( work( i+kk*lds), work( j+kk*lds ) )
587*
588* Compute scaling factor to survive the linear update
589* simulating consistent scaling.
590*
591 bnrm = clange( 'I', i2-i1, 1, x( i1, rhs ), ldx, w )
592 bnrm = bnrm*( scamin / work( i+kk*lds ) )
593 xnrm( kk ) = xnrm( kk )*( scamin / work( j+kk*lds) )
594 anrm = work( awrk + i+(j-1)*nba )
595 scaloc = slarmm( anrm, xnrm( kk ), bnrm )
596*
597* Simultaneously apply the robust update factor and the
598* consistency scaling factor to X( I, KK ) and X( J, KK ).
599*
600 scal = ( scamin / work( i+kk*lds) )*scaloc
601 IF( scal.NE.one ) THEN
602 CALL csscal( i2-i1, scal, x( i1, rhs ), 1 )
603 work( i+kk*lds ) = scamin*scaloc
604 END IF
605*
606 scal = ( scamin / work( j+kk*lds ) )*scaloc
607 IF( scal.NE.one ) THEN
608 CALL csscal( j2-j1, scal, x( j1, rhs ), 1 )
609 work( j+kk*lds ) = scamin*scaloc
610 END IF
611 END DO
612*
613 IF( notran ) THEN
614*
615* B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
616*
617 CALL cgemm( 'N', 'N', i2-i1, k2-k1, j2-j1, -cone,
618 $ a( i1, j1 ), lda, x( j1, k1 ), ldx,
619 $ cone, x( i1, k1 ), ldx )
620 ELSE IF( lsame( trans, 'T' ) ) THEN
621*
622* B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )
623*
624 CALL cgemm( 'T', 'N', i2-i1, k2-k1, j2-j1, -cone,
625 $ a( j1, i1 ), lda, x( j1, k1 ), ldx,
626 $ cone, x( i1, k1 ), ldx )
627 ELSE
628*
629* B( I, K ) := B( I, K ) - A( I, J )**H * X( J, K )
630*
631 CALL cgemm( 'C', 'N', i2-i1, k2-k1, j2-j1, -cone,
632 $ a( j1, i1 ), lda, x( j1, k1 ), ldx,
633 $ cone, x( i1, k1 ), ldx )
634 END IF
635 END DO
636 END DO
637*
638* Reduce local scaling factors
639*
640 DO kk = 1, k2-k1
641 rhs = k1 + kk - 1
642 DO i = 1, nba
643 scale( rhs ) = min( scale( rhs ), work( i+kk*lds ) )
644 END DO
645 END DO
646*
647* Realize consistent scaling
648*
649 DO kk = 1, k2-k1
650 rhs = k1 + kk - 1
651 IF( scale( rhs ).NE.one .AND. scale( rhs ).NE. zero ) THEN
652 DO i = 1, nba
653 i1 = (i-1)*nb + 1
654 i2 = min( i*nb, n ) + 1
655 scal = scale( rhs ) / work( i+kk*lds )
656 IF( scal.NE.one )
657 $ CALL csscal( i2-i1, scal, x( i1, rhs ), 1 )
658 END DO
659 END IF
660 END DO
661 END DO
662 RETURN
663*
664* End of CLATRS3
665*
666 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188
subroutine clatrs3(uplo, trans, diag, normin, n, nrhs, a, lda, x, ldx, scale, cnorm, work, lwork, info)
CLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
Definition clatrs3.f:230
subroutine clatrs(uplo, trans, diag, normin, n, a, lda, x, scale, cnorm, info)
CLATRS solves a triangular system of equations with the scale factor set to prevent overflow.
Definition clatrs.f:239
subroutine csscal(n, sa, cx, incx)
CSSCAL
Definition csscal.f:78