LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
ztrsyl3.f
Go to the documentation of this file.
1*> \brief \b ZTRSYL3
2*
3* Definition:
4* ===========
5*
6* SUBROUTINE ZTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB,
7* C, LDC, SCALE, SWORK, LDSWORK, INFO )
8*
9* .. Scalar Arguments ..
10* CHARACTER TRANA, TRANB
11* INTEGER INFO, ISGN, LDA, LDB, LDC, LDSWORK, M, N
12* DOUBLE PRECISION SCALE
13* ..
14* .. Array Arguments ..
15* COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * )
16* DOUBLE PRECISION SWORK( LDSWORK, * )
17* ..
18*
19*
20*> \par Purpose:
21* =============
22*>
23*> \verbatim
24*>
25*> ZTRSYL3 solves the complex Sylvester matrix equation:
26*>
27*> op(A)*X + X*op(B) = scale*C or
28*> op(A)*X - X*op(B) = scale*C,
29*>
30*> where op(A) = A or A**H, and A and B are both upper triangular. A is
31*> M-by-M and B is N-by-N; the right hand side C and the solution X are
32*> M-by-N; and scale is an output scale factor, set <= 1 to avoid
33*> overflow in X.
34*>
35*> This is the block version of the algorithm.
36*> \endverbatim
37*
38* Arguments:
39* ==========
40*
41*> \param[in] TRANA
42*> \verbatim
43*> TRANA is CHARACTER*1
44*> Specifies the option op(A):
45*> = 'N': op(A) = A (No transpose)
46*> = 'C': op(A) = A**H (Conjugate transpose)
47*> \endverbatim
48*>
49*> \param[in] TRANB
50*> \verbatim
51*> TRANB is CHARACTER*1
52*> Specifies the option op(B):
53*> = 'N': op(B) = B (No transpose)
54*> = 'C': op(B) = B**H (Conjugate transpose)
55*> \endverbatim
56*>
57*> \param[in] ISGN
58*> \verbatim
59*> ISGN is INTEGER
60*> Specifies the sign in the equation:
61*> = +1: solve op(A)*X + X*op(B) = scale*C
62*> = -1: solve op(A)*X - X*op(B) = scale*C
63*> \endverbatim
64*>
65*> \param[in] M
66*> \verbatim
67*> M is INTEGER
68*> The order of the matrix A, and the number of rows in the
69*> matrices X and C. M >= 0.
70*> \endverbatim
71*>
72*> \param[in] N
73*> \verbatim
74*> N is INTEGER
75*> The order of the matrix B, and the number of columns in the
76*> matrices X and C. N >= 0.
77*> \endverbatim
78*>
79*> \param[in] A
80*> \verbatim
81*> A is COMPLEX*16 array, dimension (LDA,M)
82*> The upper triangular matrix A.
83*> \endverbatim
84*>
85*> \param[in] LDA
86*> \verbatim
87*> LDA is INTEGER
88*> The leading dimension of the array A. LDA >= max(1,M).
89*> \endverbatim
90*>
91*> \param[in] B
92*> \verbatim
93*> B is COMPLEX*16 array, dimension (LDB,N)
94*> The upper triangular matrix B.
95*> \endverbatim
96*>
97*> \param[in] LDB
98*> \verbatim
99*> LDB is INTEGER
100*> The leading dimension of the array B. LDB >= max(1,N).
101*> \endverbatim
102*>
103*> \param[in,out] C
104*> \verbatim
105*> C is COMPLEX*16 array, dimension (LDC,N)
106*> On entry, the M-by-N right hand side matrix C.
107*> On exit, C is overwritten by the solution matrix X.
108*> \endverbatim
109*>
110*> \param[in] LDC
111*> \verbatim
112*> LDC is INTEGER
113*> The leading dimension of the array C. LDC >= max(1,M)
114*> \endverbatim
115*>
116*> \param[out] SCALE
117*> \verbatim
118*> SCALE is DOUBLE PRECISION
119*> The scale factor, scale, set <= 1 to avoid overflow in X.
120*> \endverbatim
121*>
122*> \param[out] SWORK
123*> \verbatim
124*> SWORK is DOUBLE PRECISION array, dimension (MAX(2, ROWS),
125*> MAX(1,COLS)).
126*> On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS
127*> and SWORK(2) returns the optimal COLS.
128*> \endverbatim
129*>
130*> \param[in] LDSWORK
131*> \verbatim
132*> LDSWORK is INTEGER
133*> LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1)
134*> and NB is the optimal block size.
135*>
136*> If LDSWORK = -1, then a workspace query is assumed; the routine
137*> only calculates the optimal dimensions of the SWORK matrix,
138*> returns these values as the first and second entry of the SWORK
139*> matrix, and no error message related LWORK is issued by XERBLA.
140*> \endverbatim
141*>
142*> \param[out] INFO
143*> \verbatim
144*> INFO is INTEGER
145*> = 0: successful exit
146*> < 0: if INFO = -i, the i-th argument had an illegal value
147*> = 1: A and B have common or very close eigenvalues; perturbed
148*> values were used to solve the equation (but the matrices
149*> A and B are unchanged).
150*> \endverbatim
151*
152*> \ingroup trsyl3
153*
154* =====================================================================
155* References:
156* E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of
157* algorithms: The triangular Sylvester equation, ACM Transactions
158* on Mathematical Software (TOMS), volume 29, pages 218--243.
159*
160* A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel
161* Solution of the Triangular Sylvester Equation. Lecture Notes in
162* Computer Science, vol 12043, pages 82--92, Springer.
163*
164* Contributor:
165* Angelika Schwarz, Umea University, Sweden.
166*
167* =====================================================================
168 SUBROUTINE ztrsyl3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB,
169 $ C, LDC, SCALE, SWORK, LDSWORK, INFO )
170 IMPLICIT NONE
171*
172* .. Scalar Arguments ..
173 CHARACTER TRANA, TRANB
174 INTEGER INFO, ISGN, LDA, LDB, LDC, LDSWORK, M, N
175 DOUBLE PRECISION SCALE
176* ..
177* .. Array Arguments ..
178 COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * )
179 DOUBLE PRECISION SWORK( LDSWORK, * )
180* ..
181* .. Parameters ..
182 DOUBLE PRECISION ZERO, ONE
183 parameter( zero = 0.0d0, one = 1.0d0 )
184 COMPLEX*16 CONE
185 parameter( cone = ( 1.0d0, 0.0d0 ) )
186* ..
187* .. Local Scalars ..
188 LOGICAL NOTRNA, NOTRNB, LQUERY
189 INTEGER AWRK, BWRK, I, I1, I2, IINFO, J, J1, J2, JJ,
190 $ k, k1, k2, l, l1, l2, ll, nba, nb, nbb
191 DOUBLE PRECISION ANRM, BIGNUM, BNRM, CNRM, SCAL, SCALOC,
192 $ scamin, sgn, xnrm, buf, smlnum
193 COMPLEX*16 CSGN
194* ..
195* .. Local Arrays ..
196 DOUBLE PRECISION WNRM( MAX( M, N ) )
197* ..
198* .. External Functions ..
199 LOGICAL LSAME
200 INTEGER ILAENV
201 DOUBLE PRECISION DLAMCH, DLARMM, ZLANGE
202 EXTERNAL dlamch, dlarmm, ilaenv, lsame,
203 $ zlange
204* ..
205* .. External Subroutines ..
206 EXTERNAL xerbla, zdscal, zgemm, zlascl,
207 $ ztrsyl
208* ..
209* .. Intrinsic Functions ..
210 INTRINSIC abs, dble, dimag, exponent, max, min
211* ..
212* .. Executable Statements ..
213*
214* Decode and Test input parameters
215*
216 notrna = lsame( trana, 'N' )
217 notrnb = lsame( tranb, 'N' )
218*
219* Use the same block size for all matrices.
220*
221 nb = max( 8, ilaenv( 1, 'ZTRSYL', '', m, n, -1, -1) )
222*
223* Compute number of blocks in A and B
224*
225 nba = max( 1, (m + nb - 1) / nb )
226 nbb = max( 1, (n + nb - 1) / nb )
227*
228* Compute workspace
229*
230 info = 0
231 lquery = ( ldswork.EQ.-1 )
232 IF( lquery ) THEN
233 ldswork = 2
234 swork(1,1) = max( nba, nbb )
235 swork(2,1) = 2 * nbb + nba
236 END IF
237*
238* Test the input arguments
239*
240 IF( .NOT.notrna .AND. .NOT. lsame( trana, 'C' ) ) THEN
241 info = -1
242 ELSE IF( .NOT.notrnb .AND. .NOT. lsame( tranb, 'C' ) ) THEN
243 info = -2
244 ELSE IF( isgn.NE.1 .AND. isgn.NE.-1 ) THEN
245 info = -3
246 ELSE IF( m.LT.0 ) THEN
247 info = -4
248 ELSE IF( n.LT.0 ) THEN
249 info = -5
250 ELSE IF( lda.LT.max( 1, m ) ) THEN
251 info = -7
252 ELSE IF( ldb.LT.max( 1, n ) ) THEN
253 info = -9
254 ELSE IF( ldc.LT.max( 1, m ) ) THEN
255 info = -11
256 END IF
257 IF( info.NE.0 ) THEN
258 CALL xerbla( 'ZTRSYL3', -info )
259 RETURN
260 ELSE IF( lquery ) THEN
261 RETURN
262 END IF
263*
264* Quick return if possible
265*
266 scale = one
267 IF( m.EQ.0 .OR. n.EQ.0 )
268 $ RETURN
269*
270* Use unblocked code for small problems or if insufficient
271* workspace is provided
272*
273 IF( min( nba, nbb ).EQ.1 .OR. ldswork.LT.max( nba, nbb ) ) THEN
274 CALL ztrsyl( trana, tranb, isgn, m, n, a, lda, b, ldb,
275 $ c, ldc, scale, info )
276 RETURN
277 END IF
278*
279* Set constants to control overflow
280*
281 smlnum = dlamch( 'S' )
282 bignum = one / smlnum
283*
284* Set local scaling factors.
285*
286 DO l = 1, nbb
287 DO k = 1, nba
288 swork( k, l ) = one
289 END DO
290 END DO
291*
292* Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero.
293* This scaling is to ensure compatibility with TRSYL and may get flushed.
294*
295 buf = one
296*
297* Compute upper bounds of blocks of A and B
298*
299 awrk = nbb
300 DO k = 1, nba
301 k1 = (k - 1) * nb + 1
302 k2 = min( k * nb, m ) + 1
303 DO l = k, nba
304 l1 = (l - 1) * nb + 1
305 l2 = min( l * nb, m ) + 1
306 IF( notrna ) THEN
307 swork( k, awrk + l ) = zlange( 'I', k2-k1, l2-l1,
308 $ a( k1, l1 ), lda, wnrm )
309 ELSE
310 swork( l, awrk + k ) = zlange( '1', k2-k1, l2-l1,
311 $ a( k1, l1 ), lda, wnrm )
312 END IF
313 END DO
314 END DO
315 bwrk = nbb + nba
316 DO k = 1, nbb
317 k1 = (k - 1) * nb + 1
318 k2 = min( k * nb, n ) + 1
319 DO l = k, nbb
320 l1 = (l - 1) * nb + 1
321 l2 = min( l * nb, n ) + 1
322 IF( notrnb ) THEN
323 swork( k, bwrk + l ) = zlange( 'I', k2-k1, l2-l1,
324 $ b( k1, l1 ), ldb, wnrm )
325 ELSE
326 swork( l, bwrk + k ) = zlange( '1', k2-k1, l2-l1,
327 $ b( k1, l1 ), ldb, wnrm )
328 END IF
329 END DO
330 END DO
331*
332 sgn = dble( isgn )
333 csgn = dcmplx( sgn, zero )
334*
335 IF( notrna .AND. notrnb ) THEN
336*
337* Solve A*X + ISGN*X*B = scale*C.
338*
339* The (K,L)th block of X is determined starting from
340* bottom-left corner column by column by
341*
342* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
343*
344* Where
345* M L-1
346* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
347* I=K+1 J=1
348*
349* Start loop over block rows (index = K) and block columns (index = L)
350*
351 DO k = nba, 1, -1
352*
353* K1: row index of the first row in X( K, L )
354* K2: row index of the first row in X( K+1, L )
355* so the K2 - K1 is the column count of the block X( K, L )
356*
357 k1 = (k - 1) * nb + 1
358 k2 = min( k * nb, m ) + 1
359 DO l = 1, nbb
360*
361* L1: column index of the first column in X( K, L )
362* L2: column index of the first column in X( K, L + 1)
363* so that L2 - L1 is the row count of the block X( K, L )
364*
365 l1 = (l - 1) * nb + 1
366 l2 = min( l * nb, n ) + 1
367*
368 CALL ztrsyl( trana, tranb, isgn, k2-k1, l2-l1,
369 $ a( k1, k1 ), lda,
370 $ b( l1, l1 ), ldb,
371 $ c( k1, l1 ), ldc, scaloc, iinfo )
372 info = max( info, iinfo )
373*
374 IF( scaloc * swork( k, l ) .EQ. zero ) THEN
375 IF( scaloc .EQ. zero ) THEN
376* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
377* is larger than the product of BIGNUM**2 and cannot be
378* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
379* Mark the computation as pointless.
380 buf = zero
381 ELSE
382 buf = buf*2.d0**exponent( scaloc )
383 END IF
384 DO jj = 1, nbb
385 DO ll = 1, nba
386* Bound by BIGNUM to not introduce Inf. The value
387* is irrelevant; corresponding entries of the
388* solution will be flushed in consistency scaling.
389 swork( ll, jj ) = min( bignum,
390 $ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
391 END DO
392 END DO
393 END IF
394 swork( k, l ) = scaloc * swork( k, l )
395 xnrm = zlange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,
396 $ wnrm )
397*
398 DO i = k - 1, 1, -1
399*
400* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
401*
402 i1 = (i - 1) * nb + 1
403 i2 = min( i * nb, m ) + 1
404*
405* Compute scaling factor to survive the linear update
406* simulating consistent scaling.
407*
408 cnrm = zlange( 'I', i2-i1, l2-l1, c( i1, l1 ),
409 $ ldc, wnrm )
410 scamin = min( swork( i, l ), swork( k, l ) )
411 cnrm = cnrm * ( scamin / swork( i, l ) )
412 xnrm = xnrm * ( scamin / swork( k, l ) )
413 anrm = swork( i, awrk + k )
414 scaloc = dlarmm( anrm, xnrm, cnrm )
415 IF( scaloc * scamin .EQ. zero ) THEN
416* Use second scaling factor to prevent flushing to zero.
417 buf = buf*2.d0**exponent( scaloc )
418 DO jj = 1, nbb
419 DO ll = 1, nba
420 swork( ll, jj ) = min( bignum,
421 $ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
422 END DO
423 END DO
424 scamin = scamin / 2.d0**exponent( scaloc )
425 scaloc = scaloc / 2.d0**exponent( scaloc )
426 END IF
427 cnrm = cnrm * scaloc
428 xnrm = xnrm * scaloc
429*
430* Simultaneously apply the robust update factor and the
431* consistency scaling factor to C( I, L ) and C( K, L ).
432*
433 scal = ( scamin / swork( k, l ) ) * scaloc
434 IF( scal .NE. one ) THEN
435 DO jj = l1, l2-1
436 CALL zdscal( k2-k1, scal, c( k1, jj ), 1)
437 END DO
438 ENDIF
439*
440 scal = ( scamin / swork( i, l ) ) * scaloc
441 IF( scal .NE. one ) THEN
442 DO ll = l1, l2-1
443 CALL zdscal( i2-i1, scal, c( i1, ll ), 1)
444 END DO
445 ENDIF
446*
447* Record current scaling factor
448*
449 swork( k, l ) = scamin * scaloc
450 swork( i, l ) = scamin * scaloc
451*
452 CALL zgemm( 'N', 'N', i2-i1, l2-l1, k2-k1, -cone,
453 $ a( i1, k1 ), lda, c( k1, l1 ), ldc,
454 $ cone, c( i1, l1 ), ldc )
455*
456 END DO
457*
458 DO j = l + 1, nbb
459*
460* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
461*
462 j1 = (j - 1) * nb + 1
463 j2 = min( j * nb, n ) + 1
464*
465* Compute scaling factor to survive the linear update
466* simulating consistent scaling.
467*
468 cnrm = zlange( 'I', k2-k1, j2-j1, c( k1, j1 ),
469 $ ldc, wnrm )
470 scamin = min( swork( k, j ), swork( k, l ) )
471 cnrm = cnrm * ( scamin / swork( k, j ) )
472 xnrm = xnrm * ( scamin / swork( k, l ) )
473 bnrm = swork(l, bwrk + j)
474 scaloc = dlarmm( bnrm, xnrm, cnrm )
475 IF( scaloc * scamin .EQ. zero ) THEN
476* Use second scaling factor to prevent flushing to zero.
477 buf = buf*2.d0**exponent( scaloc )
478 DO jj = 1, nbb
479 DO ll = 1, nba
480 swork( ll, jj ) = min( bignum,
481 $ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
482 END DO
483 END DO
484 scamin = scamin / 2.d0**exponent( scaloc )
485 scaloc = scaloc / 2.d0**exponent( scaloc )
486 END IF
487 cnrm = cnrm * scaloc
488 xnrm = xnrm * scaloc
489*
490* Simultaneously apply the robust update factor and the
491* consistency scaling factor to C( K, J ) and C( K, L).
492*
493 scal = ( scamin / swork( k, l ) ) * scaloc
494 IF( scal .NE. one ) THEN
495 DO ll = l1, l2-1
496 CALL zdscal( k2-k1, scal, c( k1, ll ), 1 )
497 END DO
498 ENDIF
499*
500 scal = ( scamin / swork( k, j ) ) * scaloc
501 IF( scal .NE. one ) THEN
502 DO jj = j1, j2-1
503 CALL zdscal( k2-k1, scal, c( k1, jj ), 1 )
504 END DO
505 ENDIF
506*
507* Record current scaling factor
508*
509 swork( k, l ) = scamin * scaloc
510 swork( k, j ) = scamin * scaloc
511*
512 CALL zgemm( 'N', 'N', k2-k1, j2-j1, l2-l1, -csgn,
513 $ c( k1, l1 ), ldc, b( l1, j1 ), ldb,
514 $ cone, c( k1, j1 ), ldc )
515 END DO
516 END DO
517 END DO
518 ELSE IF( .NOT.notrna .AND. notrnb ) THEN
519*
520* Solve A**H *X + ISGN*X*B = scale*C.
521*
522* The (K,L)th block of X is determined starting from
523* upper-left corner column by column by
524*
525* A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
526*
527* Where
528* K-1 L-1
529* R(K,L) = SUM [A(I,K)**H*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
530* I=1 J=1
531*
532* Start loop over block rows (index = K) and block columns (index = L)
533*
534 DO k = 1, nba
535*
536* K1: row index of the first row in X( K, L )
537* K2: row index of the first row in X( K+1, L )
538* so the K2 - K1 is the column count of the block X( K, L )
539*
540 k1 = (k - 1) * nb + 1
541 k2 = min( k * nb, m ) + 1
542 DO l = 1, nbb
543*
544* L1: column index of the first column in X( K, L )
545* L2: column index of the first column in X( K, L + 1)
546* so that L2 - L1 is the row count of the block X( K, L )
547*
548 l1 = (l - 1) * nb + 1
549 l2 = min( l * nb, n ) + 1
550*
551 CALL ztrsyl( trana, tranb, isgn, k2-k1, l2-l1,
552 $ a( k1, k1 ), lda,
553 $ b( l1, l1 ), ldb,
554 $ c( k1, l1 ), ldc, scaloc, iinfo )
555 info = max( info, iinfo )
556*
557 IF( scaloc * swork( k, l ) .EQ. zero ) THEN
558 IF( scaloc .EQ. zero ) THEN
559* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
560* is larger than the product of BIGNUM**2 and cannot be
561* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
562* Mark the computation as pointless.
563 buf = zero
564 ELSE
565* Use second scaling factor to prevent flushing to zero.
566 buf = buf*2.d0**exponent( scaloc )
567 END IF
568 DO jj = 1, nbb
569 DO ll = 1, nba
570* Bound by BIGNUM to not introduce Inf. The value
571* is irrelevant; corresponding entries of the
572* solution will be flushed in consistency scaling.
573 swork( ll, jj ) = min( bignum,
574 $ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
575 END DO
576 END DO
577 END IF
578 swork( k, l ) = scaloc * swork( k, l )
579 xnrm = zlange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,
580 $ wnrm )
581*
582 DO i = k + 1, nba
583*
584* C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L )
585*
586 i1 = (i - 1) * nb + 1
587 i2 = min( i * nb, m ) + 1
588*
589* Compute scaling factor to survive the linear update
590* simulating consistent scaling.
591*
592 cnrm = zlange( 'I', i2-i1, l2-l1, c( i1, l1 ),
593 $ ldc, wnrm )
594 scamin = min( swork( i, l ), swork( k, l ) )
595 cnrm = cnrm * ( scamin / swork( i, l ) )
596 xnrm = xnrm * ( scamin / swork( k, l ) )
597 anrm = swork( i, awrk + k )
598 scaloc = dlarmm( anrm, xnrm, cnrm )
599 IF( scaloc * scamin .EQ. zero ) THEN
600* Use second scaling factor to prevent flushing to zero.
601 buf = buf*2.d0**exponent( scaloc )
602 DO jj = 1, nbb
603 DO ll = 1, nba
604 swork( ll, jj ) = min( bignum,
605 $ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
606 END DO
607 END DO
608 scamin = scamin / 2.d0**exponent( scaloc )
609 scaloc = scaloc / 2.d0**exponent( scaloc )
610 END IF
611 cnrm = cnrm * scaloc
612 xnrm = xnrm * scaloc
613*
614* Simultaneously apply the robust update factor and the
615* consistency scaling factor to to C( I, L ) and C( K, L).
616*
617 scal = ( scamin / swork( k, l ) ) * scaloc
618 IF( scal .NE. one ) THEN
619 DO ll = l1, l2-1
620 CALL zdscal( k2-k1, scal, c( k1, ll ), 1 )
621 END DO
622 ENDIF
623*
624 scal = ( scamin / swork( i, l ) ) * scaloc
625 IF( scal .NE. one ) THEN
626 DO ll = l1, l2-1
627 CALL zdscal( i2-i1, scal, c( i1, ll ), 1 )
628 END DO
629 ENDIF
630*
631* Record current scaling factor
632*
633 swork( k, l ) = scamin * scaloc
634 swork( i, l ) = scamin * scaloc
635*
636 CALL zgemm( 'C', 'N', i2-i1, l2-l1, k2-k1, -cone,
637 $ a( k1, i1 ), lda, c( k1, l1 ), ldc,
638 $ cone, c( i1, l1 ), ldc )
639 END DO
640*
641 DO j = l + 1, nbb
642*
643* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
644*
645 j1 = (j - 1) * nb + 1
646 j2 = min( j * nb, n ) + 1
647*
648* Compute scaling factor to survive the linear update
649* simulating consistent scaling.
650*
651 cnrm = zlange( 'I', k2-k1, j2-j1, c( k1, j1 ),
652 $ ldc, wnrm )
653 scamin = min( swork( k, j ), swork( k, l ) )
654 cnrm = cnrm * ( scamin / swork( k, j ) )
655 xnrm = xnrm * ( scamin / swork( k, l ) )
656 bnrm = swork( l, bwrk + j )
657 scaloc = dlarmm( bnrm, xnrm, cnrm )
658 IF( scaloc * scamin .EQ. zero ) THEN
659* Use second scaling factor to prevent flushing to zero.
660 buf = buf*2.d0**exponent( scaloc )
661 DO jj = 1, nbb
662 DO ll = 1, nba
663 swork( ll, jj ) = min( bignum,
664 $ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
665 END DO
666 END DO
667 scamin = scamin / 2.d0**exponent( scaloc )
668 scaloc = scaloc / 2.d0**exponent( scaloc )
669 END IF
670 cnrm = cnrm * scaloc
671 xnrm = xnrm * scaloc
672*
673* Simultaneously apply the robust update factor and the
674* consistency scaling factor to to C( K, J ) and C( K, L).
675*
676 scal = ( scamin / swork( k, l ) ) * scaloc
677 IF( scal .NE. one ) THEN
678 DO ll = l1, l2-1
679 CALL zdscal( k2-k1, scal, c( k1, ll ), 1 )
680 END DO
681 ENDIF
682*
683 scal = ( scamin / swork( k, j ) ) * scaloc
684 IF( scal .NE. one ) THEN
685 DO jj = j1, j2-1
686 CALL zdscal( k2-k1, scal, c( k1, jj ), 1 )
687 END DO
688 ENDIF
689*
690* Record current scaling factor
691*
692 swork( k, l ) = scamin * scaloc
693 swork( k, j ) = scamin * scaloc
694*
695 CALL zgemm( 'N', 'N', k2-k1, j2-j1, l2-l1, -csgn,
696 $ c( k1, l1 ), ldc, b( l1, j1 ), ldb,
697 $ cone, c( k1, j1 ), ldc )
698 END DO
699 END DO
700 END DO
701 ELSE IF( .NOT.notrna .AND. .NOT.notrnb ) THEN
702*
703* Solve A**H *X + ISGN*X*B**H = scale*C.
704*
705* The (K,L)th block of X is determined starting from
706* top-right corner column by column by
707*
708* A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L)
709*
710* Where
711* K-1 N
712* R(K,L) = SUM [A(I,K)**H*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H].
713* I=1 J=L+1
714*
715* Start loop over block rows (index = K) and block columns (index = L)
716*
717 DO k = 1, nba
718*
719* K1: row index of the first row in X( K, L )
720* K2: row index of the first row in X( K+1, L )
721* so the K2 - K1 is the column count of the block X( K, L )
722*
723 k1 = (k - 1) * nb + 1
724 k2 = min( k * nb, m ) + 1
725 DO l = nbb, 1, -1
726*
727* L1: column index of the first column in X( K, L )
728* L2: column index of the first column in X( K, L + 1)
729* so that L2 - L1 is the row count of the block X( K, L )
730*
731 l1 = (l - 1) * nb + 1
732 l2 = min( l * nb, n ) + 1
733*
734 CALL ztrsyl( trana, tranb, isgn, k2-k1, l2-l1,
735 $ a( k1, k1 ), lda,
736 $ b( l1, l1 ), ldb,
737 $ c( k1, l1 ), ldc, scaloc, iinfo )
738 info = max( info, iinfo )
739*
740 IF( scaloc * swork( k, l ) .EQ. zero ) THEN
741 IF( scaloc .EQ. zero ) THEN
742* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
743* is larger than the product of BIGNUM**2 and cannot be
744* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
745* Mark the computation as pointless.
746 buf = zero
747 ELSE
748* Use second scaling factor to prevent flushing to zero.
749 buf = buf*2.d0**exponent( scaloc )
750 END IF
751 DO jj = 1, nbb
752 DO ll = 1, nba
753* Bound by BIGNUM to not introduce Inf. The value
754* is irrelevant; corresponding entries of the
755* solution will be flushed in consistency scaling.
756 swork( ll, jj ) = min( bignum,
757 $ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
758 END DO
759 END DO
760 END IF
761 swork( k, l ) = scaloc * swork( k, l )
762 xnrm = zlange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,
763 $ wnrm )
764*
765 DO i = k + 1, nba
766*
767* C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L )
768*
769 i1 = (i - 1) * nb + 1
770 i2 = min( i * nb, m ) + 1
771*
772* Compute scaling factor to survive the linear update
773* simulating consistent scaling.
774*
775 cnrm = zlange( 'I', i2-i1, l2-l1, c( i1, l1 ),
776 $ ldc, wnrm )
777 scamin = min( swork( i, l ), swork( k, l ) )
778 cnrm = cnrm * ( scamin / swork( i, l ) )
779 xnrm = xnrm * ( scamin / swork( k, l ) )
780 anrm = swork( i, awrk + k )
781 scaloc = dlarmm( anrm, xnrm, cnrm )
782 IF( scaloc * scamin .EQ. zero ) THEN
783* Use second scaling factor to prevent flushing to zero.
784 buf = buf*2.d0**exponent( scaloc )
785 DO jj = 1, nbb
786 DO ll = 1, nba
787 swork( ll, jj ) = min( bignum,
788 $ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
789 END DO
790 END DO
791 scamin = scamin / 2.d0**exponent( scaloc )
792 scaloc = scaloc / 2.d0**exponent( scaloc )
793 END IF
794 cnrm = cnrm * scaloc
795 xnrm = xnrm * scaloc
796*
797* Simultaneously apply the robust update factor and the
798* consistency scaling factor to C( I, L ) and C( K, L).
799*
800 scal = ( scamin / swork( k, l ) ) * scaloc
801 IF( scal .NE. one ) THEN
802 DO ll = l1, l2-1
803 CALL zdscal( k2-k1, scal, c( k1, ll ), 1 )
804 END DO
805 ENDIF
806*
807 scal = ( scamin / swork( i, l ) ) * scaloc
808 IF( scal .NE. one ) THEN
809 DO ll = l1, l2-1
810 CALL zdscal( i2-i1, scal, c( i1, ll ), 1 )
811 END DO
812 ENDIF
813*
814* Record current scaling factor
815*
816 swork( k, l ) = scamin * scaloc
817 swork( i, l ) = scamin * scaloc
818*
819 CALL zgemm( 'C', 'N', i2-i1, l2-l1, k2-k1, -cone,
820 $ a( k1, i1 ), lda, c( k1, l1 ), ldc,
821 $ cone, c( i1, l1 ), ldc )
822 END DO
823*
824 DO j = 1, l - 1
825*
826* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H
827*
828 j1 = (j - 1) * nb + 1
829 j2 = min( j * nb, n ) + 1
830*
831* Compute scaling factor to survive the linear update
832* simulating consistent scaling.
833*
834 cnrm = zlange( 'I', k2-k1, j2-j1, c( k1, j1 ),
835 $ ldc, wnrm )
836 scamin = min( swork( k, j ), swork( k, l ) )
837 cnrm = cnrm * ( scamin / swork( k, j ) )
838 xnrm = xnrm * ( scamin / swork( k, l ) )
839 bnrm = swork( l, bwrk + j )
840 scaloc = dlarmm( bnrm, xnrm, cnrm )
841 IF( scaloc * scamin .EQ. zero ) THEN
842* Use second scaling factor to prevent flushing to zero.
843 buf = buf*2.d0**exponent( scaloc )
844 DO jj = 1, nbb
845 DO ll = 1, nba
846 swork( ll, jj ) = min( bignum,
847 $ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
848 END DO
849 END DO
850 scamin = scamin / 2.d0**exponent( scaloc )
851 scaloc = scaloc / 2.d0**exponent( scaloc )
852 END IF
853 cnrm = cnrm * scaloc
854 xnrm = xnrm * scaloc
855*
856* Simultaneously apply the robust update factor and the
857* consistency scaling factor to C( K, J ) and C( K, L).
858*
859 scal = ( scamin / swork( k, l ) ) * scaloc
860 IF( scal .NE. one ) THEN
861 DO ll = l1, l2-1
862 CALL zdscal( k2-k1, scal, c( k1, ll ), 1)
863 END DO
864 ENDIF
865*
866 scal = ( scamin / swork( k, j ) ) * scaloc
867 IF( scal .NE. one ) THEN
868 DO jj = j1, j2-1
869 CALL zdscal( k2-k1, scal, c( k1, jj ), 1 )
870 END DO
871 ENDIF
872*
873* Record current scaling factor
874*
875 swork( k, l ) = scamin * scaloc
876 swork( k, j ) = scamin * scaloc
877*
878 CALL zgemm( 'N', 'C', k2-k1, j2-j1, l2-l1, -csgn,
879 $ c( k1, l1 ), ldc, b( j1, l1 ), ldb,
880 $ cone, c( k1, j1 ), ldc )
881 END DO
882 END DO
883 END DO
884 ELSE IF( notrna .AND. .NOT.notrnb ) THEN
885*
886* Solve A*X + ISGN*X*B**H = scale*C.
887*
888* The (K,L)th block of X is determined starting from
889* bottom-right corner column by column by
890*
891* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L)
892*
893* Where
894* M N
895* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H].
896* I=K+1 J=L+1
897*
898* Start loop over block rows (index = K) and block columns (index = L)
899*
900 DO k = nba, 1, -1
901*
902* K1: row index of the first row in X( K, L )
903* K2: row index of the first row in X( K+1, L )
904* so the K2 - K1 is the column count of the block X( K, L )
905*
906 k1 = (k - 1) * nb + 1
907 k2 = min( k * nb, m ) + 1
908 DO l = nbb, 1, -1
909*
910* L1: column index of the first column in X( K, L )
911* L2: column index of the first column in X( K, L + 1)
912* so that L2 - L1 is the row count of the block X( K, L )
913*
914 l1 = (l - 1) * nb + 1
915 l2 = min( l * nb, n ) + 1
916*
917 CALL ztrsyl( trana, tranb, isgn, k2-k1, l2-l1,
918 $ a( k1, k1 ), lda,
919 $ b( l1, l1 ), ldb,
920 $ c( k1, l1 ), ldc, scaloc, iinfo )
921 info = max( info, iinfo )
922*
923 IF( scaloc * swork( k, l ) .EQ. zero ) THEN
924 IF( scaloc .EQ. zero ) THEN
925* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
926* is larger than the product of BIGNUM**2 and cannot be
927* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
928* Mark the computation as pointless.
929 buf = zero
930 ELSE
931* Use second scaling factor to prevent flushing to zero.
932 buf = buf*2.d0**exponent( scaloc )
933 END IF
934 DO jj = 1, nbb
935 DO ll = 1, nba
936* Bound by BIGNUM to not introduce Inf. The value
937* is irrelevant; corresponding entries of the
938* solution will be flushed in consistency scaling.
939 swork( ll, jj ) = min( bignum,
940 $ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
941 END DO
942 END DO
943 END IF
944 swork( k, l ) = scaloc * swork( k, l )
945 xnrm = zlange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,
946 $ wnrm )
947*
948 DO i = 1, k - 1
949*
950* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
951*
952 i1 = (i - 1) * nb + 1
953 i2 = min( i * nb, m ) + 1
954*
955* Compute scaling factor to survive the linear update
956* simulating consistent scaling.
957*
958 cnrm = zlange( 'I', i2-i1, l2-l1, c( i1, l1 ),
959 $ ldc, wnrm )
960 scamin = min( swork( i, l ), swork( k, l ) )
961 cnrm = cnrm * ( scamin / swork( i, l ) )
962 xnrm = xnrm * ( scamin / swork( k, l ) )
963 anrm = swork( i, awrk + k )
964 scaloc = dlarmm( anrm, xnrm, cnrm )
965 IF( scaloc * scamin .EQ. zero ) THEN
966* Use second scaling factor to prevent flushing to zero.
967 buf = buf*2.d0**exponent( scaloc )
968 DO jj = 1, nbb
969 DO ll = 1, nba
970 swork( ll, jj ) = min( bignum,
971 $ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
972 END DO
973 END DO
974 scamin = scamin / 2.d0**exponent( scaloc )
975 scaloc = scaloc / 2.d0**exponent( scaloc )
976 END IF
977 cnrm = cnrm * scaloc
978 xnrm = xnrm * scaloc
979*
980* Simultaneously apply the robust update factor and the
981* consistency scaling factor to C( I, L ) and C( K, L).
982*
983 scal = ( scamin / swork( k, l ) ) * scaloc
984 IF( scal .NE. one ) THEN
985 DO ll = l1, l2-1
986 CALL zdscal( k2-k1, scal, c( k1, ll ), 1 )
987 END DO
988 ENDIF
989*
990 scal = ( scamin / swork( i, l ) ) * scaloc
991 IF( scal .NE. one ) THEN
992 DO ll = l1, l2-1
993 CALL zdscal( i2-i1, scal, c( i1, ll ), 1 )
994 END DO
995 ENDIF
996*
997* Record current scaling factor
998*
999 swork( k, l ) = scamin * scaloc
1000 swork( i, l ) = scamin * scaloc
1001*
1002 CALL zgemm( 'N', 'N', i2-i1, l2-l1, k2-k1, -cone,
1003 $ a( i1, k1 ), lda, c( k1, l1 ), ldc,
1004 $ cone, c( i1, l1 ), ldc )
1005*
1006 END DO
1007*
1008 DO j = 1, l - 1
1009*
1010* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H
1011*
1012 j1 = (j - 1) * nb + 1
1013 j2 = min( j * nb, n ) + 1
1014*
1015* Compute scaling factor to survive the linear update
1016* simulating consistent scaling.
1017*
1018 cnrm = zlange( 'I', k2-k1, j2-j1, c( k1, j1 ),
1019 $ ldc, wnrm )
1020 scamin = min( swork( k, j ), swork( k, l ) )
1021 cnrm = cnrm * ( scamin / swork( k, j ) )
1022 xnrm = xnrm * ( scamin / swork( k, l ) )
1023 bnrm = swork( l, bwrk + j )
1024 scaloc = dlarmm( bnrm, xnrm, cnrm )
1025 IF( scaloc * scamin .EQ. zero ) THEN
1026* Use second scaling factor to prevent flushing to zero.
1027 buf = buf*2.d0**exponent( scaloc )
1028 DO jj = 1, nbb
1029 DO ll = 1, nba
1030 swork( ll, jj ) = min( bignum,
1031 $ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
1032 END DO
1033 END DO
1034 scamin = scamin / 2.d0**exponent( scaloc )
1035 scaloc = scaloc / 2.d0**exponent( scaloc )
1036 END IF
1037 cnrm = cnrm * scaloc
1038 xnrm = xnrm * scaloc
1039*
1040* Simultaneously apply the robust update factor and the
1041* consistency scaling factor to C( K, J ) and C( K, L).
1042*
1043 scal = ( scamin / swork( k, l ) ) * scaloc
1044 IF( scal .NE. one ) THEN
1045 DO jj = l1, l2-1
1046 CALL zdscal( k2-k1, scal, c( k1, jj ), 1 )
1047 END DO
1048 ENDIF
1049*
1050 scal = ( scamin / swork( k, j ) ) * scaloc
1051 IF( scal .NE. one ) THEN
1052 DO jj = j1, j2-1
1053 CALL zdscal( k2-k1, scal, c( k1, jj ), 1 )
1054 END DO
1055 ENDIF
1056*
1057* Record current scaling factor
1058*
1059 swork( k, l ) = scamin * scaloc
1060 swork( k, j ) = scamin * scaloc
1061*
1062 CALL zgemm( 'N', 'C', k2-k1, j2-j1, l2-l1, -csgn,
1063 $ c( k1, l1 ), ldc, b( j1, l1 ), ldb,
1064 $ cone, c( k1, j1 ), ldc )
1065 END DO
1066 END DO
1067 END DO
1068*
1069 END IF
1070*
1071* Reduce local scaling factors
1072*
1073 scale = swork( 1, 1 )
1074 DO k = 1, nba
1075 DO l = 1, nbb
1076 scale = min( scale, swork( k, l ) )
1077 END DO
1078 END DO
1079 IF( scale .EQ. zero ) THEN
1080*
1081* The magnitude of the largest entry of the solution is larger
1082* than the product of BIGNUM**2 and cannot be represented in the
1083* form (1/SCALE)*X if SCALE is DOUBLE PRECISION. Set SCALE to
1084* zero and give up.
1085*
1086 swork(1,1) = max( nba, nbb )
1087 swork(2,1) = 2 * nbb + nba
1088 RETURN
1089 END IF
1090*
1091* Realize consistent scaling
1092*
1093 DO k = 1, nba
1094 k1 = (k - 1) * nb + 1
1095 k2 = min( k * nb, m ) + 1
1096 DO l = 1, nbb
1097 l1 = (l - 1) * nb + 1
1098 l2 = min( l * nb, n ) + 1
1099 scal = scale / swork( k, l )
1100 IF( scal .NE. one ) THEN
1101 DO ll = l1, l2-1
1102 CALL zdscal( k2-k1, scal, c( k1, ll ), 1 )
1103 END DO
1104 ENDIF
1105 END DO
1106 END DO
1107*
1108 IF( buf .NE. one .AND. buf.GT.zero ) THEN
1109*
1110* Decrease SCALE as much as possible.
1111*
1112 scaloc = min( scale / smlnum, one / buf )
1113 buf = buf * scaloc
1114 scale = scale / scaloc
1115 END IF
1116*
1117 IF( buf.NE.one .AND. buf.GT.zero ) THEN
1118*
1119* In case of overly aggressive scaling during the computation,
1120* flushing of the global scale factor may be prevented by
1121* undoing some of the scaling. This step is to ensure that
1122* this routine flushes only scale factors that TRSYL also
1123* flushes and be usable as a drop-in replacement.
1124*
1125* How much can the normwise largest entry be upscaled?
1126*
1127 scal = max( abs( dble( c( 1, 1 ) ) ),
1128 $ abs( dimag( c( 1, 1 ) ) ) )
1129 DO k = 1, m
1130 DO l = 1, n
1131 scal = max( scal, abs( dble( c( k, l ) ) ),
1132 $ abs( dimag( c( k, l ) ) ) )
1133 END DO
1134 END DO
1135*
1136* Increase BUF as close to 1 as possible and apply scaling.
1137*
1138 scaloc = min( bignum / scal, one / buf )
1139 buf = buf * scaloc
1140 CALL zlascl( 'G', -1, -1, one, scaloc, m, n, c, ldc, iinfo )
1141 END IF
1142*
1143* Combine with buffer scaling factor. SCALE will be flushed if
1144* BUF is less than one here.
1145*
1146 scale = scale * buf
1147*
1148* Restore workspace dimensions
1149*
1150 swork(1,1) = max( nba, nbb )
1151 swork(2,1) = 2 * nbb + nba
1152*
1153 RETURN
1154*
1155* End of ZTRSYL3
1156*
1157 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
Definition zgemm.f:188
subroutine zlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition zlascl.f:142
subroutine zdscal(n, da, zx, incx)
ZDSCAL
Definition zdscal.f:78
subroutine ztrsyl3(trana, tranb, isgn, m, n, a, lda, b, ldb, c, ldc, scale, swork, ldswork, info)
ZTRSYL3
Definition ztrsyl3.f:170
subroutine ztrsyl(trana, tranb, isgn, m, n, a, lda, b, ldb, c, ldc, scale, info)
ZTRSYL
Definition ztrsyl.f:155