LAPACK 3.12.0 LAPACK: Linear Algebra PACKage
Searching...
No Matches
ztrsyl3.f
Go to the documentation of this file.
1*> \brief \b ZTRSYL3
2*
3* Definition:
4* ===========
5*
6*
7*> \par Purpose
8* =============
9*>
10*> \verbatim
11*>
12*> ZTRSYL3 solves the complex Sylvester matrix equation:
13*>
14*> op(A)*X + X*op(B) = scale*C or
15*> op(A)*X - X*op(B) = scale*C,
16*>
17*> where op(A) = A or A**H, and A and B are both upper triangular. A is
18*> M-by-M and B is N-by-N; the right hand side C and the solution X are
19*> M-by-N; and scale is an output scale factor, set <= 1 to avoid
20*> overflow in X.
21*>
22*> This is the block version of the algorithm.
23*> \endverbatim
24*
25* Arguments
26* =========
27*
28*> \param[in] TRANA
29*> \verbatim
30*> TRANA is CHARACTER*1
31*> Specifies the option op(A):
32*> = 'N': op(A) = A (No transpose)
33*> = 'C': op(A) = A**H (Conjugate transpose)
34*> \endverbatim
35*>
36*> \param[in] TRANB
37*> \verbatim
38*> TRANB is CHARACTER*1
39*> Specifies the option op(B):
40*> = 'N': op(B) = B (No transpose)
41*> = 'C': op(B) = B**H (Conjugate transpose)
42*> \endverbatim
43*>
44*> \param[in] ISGN
45*> \verbatim
46*> ISGN is INTEGER
48*> = +1: solve op(A)*X + X*op(B) = scale*C
49*> = -1: solve op(A)*X - X*op(B) = scale*C
50*> \endverbatim
51*>
52*> \param[in] M
53*> \verbatim
54*> M is INTEGER
55*> The order of the matrix A, and the number of rows in the
56*> matrices X and C. M >= 0.
57*> \endverbatim
58*>
59*> \param[in] N
60*> \verbatim
61*> N is INTEGER
62*> The order of the matrix B, and the number of columns in the
63*> matrices X and C. N >= 0.
64*> \endverbatim
65*>
66*> \param[in] A
67*> \verbatim
68*> A is COMPLEX*16 array, dimension (LDA,M)
69*> The upper triangular matrix A.
70*> \endverbatim
71*>
72*> \param[in] LDA
73*> \verbatim
74*> LDA is INTEGER
75*> The leading dimension of the array A. LDA >= max(1,M).
76*> \endverbatim
77*>
78*> \param[in] B
79*> \verbatim
80*> B is COMPLEX*16 array, dimension (LDB,N)
81*> The upper triangular matrix B.
82*> \endverbatim
83*>
84*> \param[in] LDB
85*> \verbatim
86*> LDB is INTEGER
87*> The leading dimension of the array B. LDB >= max(1,N).
88*> \endverbatim
89*>
90*> \param[in,out] C
91*> \verbatim
92*> C is COMPLEX*16 array, dimension (LDC,N)
93*> On entry, the M-by-N right hand side matrix C.
94*> On exit, C is overwritten by the solution matrix X.
95*> \endverbatim
96*>
97*> \param[in] LDC
98*> \verbatim
99*> LDC is INTEGER
100*> The leading dimension of the array C. LDC >= max(1,M)
101*> \endverbatim
102*>
103*> \param[out] SCALE
104*> \verbatim
105*> SCALE is DOUBLE PRECISION
106*> The scale factor, scale, set <= 1 to avoid overflow in X.
107*> \endverbatim
108*>
109*> \param[out] SWORK
110*> \verbatim
111*> SWORK is DOUBLE PRECISION array, dimension (MAX(2, ROWS),
112*> MAX(1,COLS)).
113*> On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS
114*> and SWORK(2) returns the optimal COLS.
115*> \endverbatim
116*>
117*> \param[in] LDSWORK
118*> \verbatim
119*> LDSWORK is INTEGER
120*> LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1)
121*> and NB is the optimal block size.
122*>
123*> If LDSWORK = -1, then a workspace query is assumed; the routine
124*> only calculates the optimal dimensions of the SWORK matrix,
125*> returns these values as the first and second entry of the SWORK
126*> matrix, and no error message related LWORK is issued by XERBLA.
127*> \endverbatim
128*>
129*> \param[out] INFO
130*> \verbatim
131*> INFO is INTEGER
132*> = 0: successful exit
133*> < 0: if INFO = -i, the i-th argument had an illegal value
134*> = 1: A and B have common or very close eigenvalues; perturbed
135*> values were used to solve the equation (but the matrices
136*> A and B are unchanged).
137*> \endverbatim
138*
139*> \ingroup trsyl3
140*
141* =====================================================================
142* References:
143* E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of
144* algorithms: The triangular Sylvester equation, ACM Transactions
145* on Mathematical Software (TOMS), volume 29, pages 218--243.
146*
147* A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel
148* Solution of the Triangular Sylvester Equation. Lecture Notes in
149* Computer Science, vol 12043, pages 82--92, Springer.
150*
151* Contributor:
152* Angelika Schwarz, Umea University, Sweden.
153*
154* =====================================================================
155 SUBROUTINE ztrsyl3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
156 \$ LDC, SCALE, SWORK, LDSWORK, INFO )
157 IMPLICIT NONE
158*
159* .. Scalar Arguments ..
160 CHARACTER TRANA, TRANB
161 INTEGER INFO, ISGN, LDA, LDB, LDC, LDSWORK, M, N
162 DOUBLE PRECISION SCALE
163* ..
164* .. Array Arguments ..
165 COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * )
166 DOUBLE PRECISION SWORK( LDSWORK, * )
167* ..
168* .. Parameters ..
169 DOUBLE PRECISION ZERO, ONE
170 parameter( zero = 0.0d0, one = 1.0d0 )
171 COMPLEX*16 CONE
172 parameter( cone = ( 1.0d0, 0.0d0 ) )
173* ..
174* .. Local Scalars ..
175 LOGICAL NOTRNA, NOTRNB, LQUERY
176 INTEGER AWRK, BWRK, I, I1, I2, IINFO, J, J1, J2, JJ,
177 \$ k, k1, k2, l, l1, l2, ll, nba, nb, nbb
178 DOUBLE PRECISION ANRM, BIGNUM, BNRM, CNRM, SCAL, SCALOC,
179 \$ scamin, sgn, xnrm, buf, smlnum
180 COMPLEX*16 CSGN
181* ..
182* .. Local Arrays ..
183 DOUBLE PRECISION WNRM( MAX( M, N ) )
184* ..
185* .. External Functions ..
186 LOGICAL LSAME
187 INTEGER ILAENV
188 DOUBLE PRECISION DLAMCH, DLARMM, ZLANGE
189 EXTERNAL dlamch, dlarmm, ilaenv, lsame, zlange
190* ..
191* .. External Subroutines ..
192 EXTERNAL xerbla, zdscal, zgemm, zlascl, ztrsyl
193* ..
194* .. Intrinsic Functions ..
195 INTRINSIC abs, dble, dimag, exponent, max, min
196* ..
197* .. Executable Statements ..
198*
199* Decode and Test input parameters
200*
201 notrna = lsame( trana, 'N' )
202 notrnb = lsame( tranb, 'N' )
203*
204* Use the same block size for all matrices.
205*
206 nb = max( 8, ilaenv( 1, 'ZTRSYL', '', m, n, -1, -1) )
207*
208* Compute number of blocks in A and B
209*
210 nba = max( 1, (m + nb - 1) / nb )
211 nbb = max( 1, (n + nb - 1) / nb )
212*
213* Compute workspace
214*
215 info = 0
216 lquery = ( ldswork.EQ.-1 )
217 IF( lquery ) THEN
218 ldswork = 2
219 swork(1,1) = max( nba, nbb )
220 swork(2,1) = 2 * nbb + nba
221 END IF
222*
223* Test the input arguments
224*
225 IF( .NOT.notrna .AND. .NOT. lsame( trana, 'C' ) ) THEN
226 info = -1
227 ELSE IF( .NOT.notrnb .AND. .NOT. lsame( tranb, 'C' ) ) THEN
228 info = -2
229 ELSE IF( isgn.NE.1 .AND. isgn.NE.-1 ) THEN
230 info = -3
231 ELSE IF( m.LT.0 ) THEN
232 info = -4
233 ELSE IF( n.LT.0 ) THEN
234 info = -5
235 ELSE IF( lda.LT.max( 1, m ) ) THEN
236 info = -7
237 ELSE IF( ldb.LT.max( 1, n ) ) THEN
238 info = -9
239 ELSE IF( ldc.LT.max( 1, m ) ) THEN
240 info = -11
241 END IF
242 IF( info.NE.0 ) THEN
243 CALL xerbla( 'ZTRSYL3', -info )
244 RETURN
245 ELSE IF( lquery ) THEN
246 RETURN
247 END IF
248*
249* Quick return if possible
250*
251 scale = one
252 IF( m.EQ.0 .OR. n.EQ.0 )
253 \$ RETURN
254*
255* Use unblocked code for small problems or if insufficient
256* workspace is provided
257*
258 IF( min( nba, nbb ).EQ.1 .OR. ldswork.LT.max( nba, nbb ) ) THEN
259 CALL ztrsyl( trana, tranb, isgn, m, n, a, lda, b, ldb,
260 \$ c, ldc, scale, info )
261 RETURN
262 END IF
263*
264* Set constants to control overflow
265*
266 smlnum = dlamch( 'S' )
267 bignum = one / smlnum
268*
269* Set local scaling factors.
270*
271 DO l = 1, nbb
272 DO k = 1, nba
273 swork( k, l ) = one
274 END DO
275 END DO
276*
277* Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero.
278* This scaling is to ensure compatibility with TRSYL and may get flushed.
279*
280 buf = one
281*
282* Compute upper bounds of blocks of A and B
283*
284 awrk = nbb
285 DO k = 1, nba
286 k1 = (k - 1) * nb + 1
287 k2 = min( k * nb, m ) + 1
288 DO l = k, nba
289 l1 = (l - 1) * nb + 1
290 l2 = min( l * nb, m ) + 1
291 IF( notrna ) THEN
292 swork( k, awrk + l ) = zlange( 'I', k2-k1, l2-l1,
293 \$ a( k1, l1 ), lda, wnrm )
294 ELSE
295 swork( l, awrk + k ) = zlange( '1', k2-k1, l2-l1,
296 \$ a( k1, l1 ), lda, wnrm )
297 END IF
298 END DO
299 END DO
300 bwrk = nbb + nba
301 DO k = 1, nbb
302 k1 = (k - 1) * nb + 1
303 k2 = min( k * nb, n ) + 1
304 DO l = k, nbb
305 l1 = (l - 1) * nb + 1
306 l2 = min( l * nb, n ) + 1
307 IF( notrnb ) THEN
308 swork( k, bwrk + l ) = zlange( 'I', k2-k1, l2-l1,
309 \$ b( k1, l1 ), ldb, wnrm )
310 ELSE
311 swork( l, bwrk + k ) = zlange( '1', k2-k1, l2-l1,
312 \$ b( k1, l1 ), ldb, wnrm )
313 END IF
314 END DO
315 END DO
316*
317 sgn = dble( isgn )
318 csgn = dcmplx( sgn, zero )
319*
320 IF( notrna .AND. notrnb ) THEN
321*
322* Solve A*X + ISGN*X*B = scale*C.
323*
324* The (K,L)th block of X is determined starting from
325* bottom-left corner column by column by
326*
327* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
328*
329* Where
330* M L-1
331* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
332* I=K+1 J=1
333*
334* Start loop over block rows (index = K) and block columns (index = L)
335*
336 DO k = nba, 1, -1
337*
338* K1: row index of the first row in X( K, L )
339* K2: row index of the first row in X( K+1, L )
340* so the K2 - K1 is the column count of the block X( K, L )
341*
342 k1 = (k - 1) * nb + 1
343 k2 = min( k * nb, m ) + 1
344 DO l = 1, nbb
345*
346* L1: column index of the first column in X( K, L )
347* L2: column index of the first column in X( K, L + 1)
348* so that L2 - L1 is the row count of the block X( K, L )
349*
350 l1 = (l - 1) * nb + 1
351 l2 = min( l * nb, n ) + 1
352*
353 CALL ztrsyl( trana, tranb, isgn, k2-k1, l2-l1,
354 \$ a( k1, k1 ), lda,
355 \$ b( l1, l1 ), ldb,
356 \$ c( k1, l1 ), ldc, scaloc, iinfo )
357 info = max( info, iinfo )
358*
359 IF( scaloc * swork( k, l ) .EQ. zero ) THEN
360 IF( scaloc .EQ. zero ) THEN
361* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
362* is larger than the product of BIGNUM**2 and cannot be
363* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
364* Mark the computation as pointless.
365 buf = zero
366 ELSE
367 buf = buf*2.d0**exponent( scaloc )
368 END IF
369 DO jj = 1, nbb
370 DO ll = 1, nba
371* Bound by BIGNUM to not introduce Inf. The value
372* is irrelevant; corresponding entries of the
373* solution will be flushed in consistency scaling.
374 swork( ll, jj ) = min( bignum,
375 \$ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
376 END DO
377 END DO
378 END IF
379 swork( k, l ) = scaloc * swork( k, l )
380 xnrm = zlange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,
381 \$ wnrm )
382*
383 DO i = k - 1, 1, -1
384*
385* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
386*
387 i1 = (i - 1) * nb + 1
388 i2 = min( i * nb, m ) + 1
389*
390* Compute scaling factor to survive the linear update
391* simulating consistent scaling.
392*
393 cnrm = zlange( 'I', i2-i1, l2-l1, c( i1, l1 ),
394 \$ ldc, wnrm )
395 scamin = min( swork( i, l ), swork( k, l ) )
396 cnrm = cnrm * ( scamin / swork( i, l ) )
397 xnrm = xnrm * ( scamin / swork( k, l ) )
398 anrm = swork( i, awrk + k )
399 scaloc = dlarmm( anrm, xnrm, cnrm )
400 IF( scaloc * scamin .EQ. zero ) THEN
401* Use second scaling factor to prevent flushing to zero.
402 buf = buf*2.d0**exponent( scaloc )
403 DO jj = 1, nbb
404 DO ll = 1, nba
405 swork( ll, jj ) = min( bignum,
406 \$ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
407 END DO
408 END DO
409 scamin = scamin / 2.d0**exponent( scaloc )
410 scaloc = scaloc / 2.d0**exponent( scaloc )
411 END IF
412 cnrm = cnrm * scaloc
413 xnrm = xnrm * scaloc
414*
415* Simultaneously apply the robust update factor and the
416* consistency scaling factor to C( I, L ) and C( K, L ).
417*
418 scal = ( scamin / swork( k, l ) ) * scaloc
419 IF( scal .NE. one ) THEN
420 DO jj = l1, l2-1
421 CALL zdscal( k2-k1, scal, c( k1, jj ), 1)
422 END DO
423 ENDIF
424*
425 scal = ( scamin / swork( i, l ) ) * scaloc
426 IF( scal .NE. one ) THEN
427 DO ll = l1, l2-1
428 CALL zdscal( i2-i1, scal, c( i1, ll ), 1)
429 END DO
430 ENDIF
431*
432* Record current scaling factor
433*
434 swork( k, l ) = scamin * scaloc
435 swork( i, l ) = scamin * scaloc
436*
437 CALL zgemm( 'N', 'N', i2-i1, l2-l1, k2-k1, -cone,
438 \$ a( i1, k1 ), lda, c( k1, l1 ), ldc,
439 \$ cone, c( i1, l1 ), ldc )
440*
441 END DO
442*
443 DO j = l + 1, nbb
444*
445* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
446*
447 j1 = (j - 1) * nb + 1
448 j2 = min( j * nb, n ) + 1
449*
450* Compute scaling factor to survive the linear update
451* simulating consistent scaling.
452*
453 cnrm = zlange( 'I', k2-k1, j2-j1, c( k1, j1 ),
454 \$ ldc, wnrm )
455 scamin = min( swork( k, j ), swork( k, l ) )
456 cnrm = cnrm * ( scamin / swork( k, j ) )
457 xnrm = xnrm * ( scamin / swork( k, l ) )
458 bnrm = swork(l, bwrk + j)
459 scaloc = dlarmm( bnrm, xnrm, cnrm )
460 IF( scaloc * scamin .EQ. zero ) THEN
461* Use second scaling factor to prevent flushing to zero.
462 buf = buf*2.d0**exponent( scaloc )
463 DO jj = 1, nbb
464 DO ll = 1, nba
465 swork( ll, jj ) = min( bignum,
466 \$ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
467 END DO
468 END DO
469 scamin = scamin / 2.d0**exponent( scaloc )
470 scaloc = scaloc / 2.d0**exponent( scaloc )
471 END IF
472 cnrm = cnrm * scaloc
473 xnrm = xnrm * scaloc
474*
475* Simultaneously apply the robust update factor and the
476* consistency scaling factor to C( K, J ) and C( K, L).
477*
478 scal = ( scamin / swork( k, l ) ) * scaloc
479 IF( scal .NE. one ) THEN
480 DO ll = l1, l2-1
481 CALL zdscal( k2-k1, scal, c( k1, ll ), 1 )
482 END DO
483 ENDIF
484*
485 scal = ( scamin / swork( k, j ) ) * scaloc
486 IF( scal .NE. one ) THEN
487 DO jj = j1, j2-1
488 CALL zdscal( k2-k1, scal, c( k1, jj ), 1 )
489 END DO
490 ENDIF
491*
492* Record current scaling factor
493*
494 swork( k, l ) = scamin * scaloc
495 swork( k, j ) = scamin * scaloc
496*
497 CALL zgemm( 'N', 'N', k2-k1, j2-j1, l2-l1, -csgn,
498 \$ c( k1, l1 ), ldc, b( l1, j1 ), ldb,
499 \$ cone, c( k1, j1 ), ldc )
500 END DO
501 END DO
502 END DO
503 ELSE IF( .NOT.notrna .AND. notrnb ) THEN
504*
505* Solve A**H *X + ISGN*X*B = scale*C.
506*
507* The (K,L)th block of X is determined starting from
508* upper-left corner column by column by
509*
510* A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
511*
512* Where
513* K-1 L-1
514* R(K,L) = SUM [A(I,K)**H*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
515* I=1 J=1
516*
517* Start loop over block rows (index = K) and block columns (index = L)
518*
519 DO k = 1, nba
520*
521* K1: row index of the first row in X( K, L )
522* K2: row index of the first row in X( K+1, L )
523* so the K2 - K1 is the column count of the block X( K, L )
524*
525 k1 = (k - 1) * nb + 1
526 k2 = min( k * nb, m ) + 1
527 DO l = 1, nbb
528*
529* L1: column index of the first column in X( K, L )
530* L2: column index of the first column in X( K, L + 1)
531* so that L2 - L1 is the row count of the block X( K, L )
532*
533 l1 = (l - 1) * nb + 1
534 l2 = min( l * nb, n ) + 1
535*
536 CALL ztrsyl( trana, tranb, isgn, k2-k1, l2-l1,
537 \$ a( k1, k1 ), lda,
538 \$ b( l1, l1 ), ldb,
539 \$ c( k1, l1 ), ldc, scaloc, iinfo )
540 info = max( info, iinfo )
541*
542 IF( scaloc * swork( k, l ) .EQ. zero ) THEN
543 IF( scaloc .EQ. zero ) THEN
544* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
545* is larger than the product of BIGNUM**2 and cannot be
546* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
547* Mark the computation as pointless.
548 buf = zero
549 ELSE
550* Use second scaling factor to prevent flushing to zero.
551 buf = buf*2.d0**exponent( scaloc )
552 END IF
553 DO jj = 1, nbb
554 DO ll = 1, nba
555* Bound by BIGNUM to not introduce Inf. The value
556* is irrelevant; corresponding entries of the
557* solution will be flushed in consistency scaling.
558 swork( ll, jj ) = min( bignum,
559 \$ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
560 END DO
561 END DO
562 END IF
563 swork( k, l ) = scaloc * swork( k, l )
564 xnrm = zlange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,
565 \$ wnrm )
566*
567 DO i = k + 1, nba
568*
569* C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L )
570*
571 i1 = (i - 1) * nb + 1
572 i2 = min( i * nb, m ) + 1
573*
574* Compute scaling factor to survive the linear update
575* simulating consistent scaling.
576*
577 cnrm = zlange( 'I', i2-i1, l2-l1, c( i1, l1 ),
578 \$ ldc, wnrm )
579 scamin = min( swork( i, l ), swork( k, l ) )
580 cnrm = cnrm * ( scamin / swork( i, l ) )
581 xnrm = xnrm * ( scamin / swork( k, l ) )
582 anrm = swork( i, awrk + k )
583 scaloc = dlarmm( anrm, xnrm, cnrm )
584 IF( scaloc * scamin .EQ. zero ) THEN
585* Use second scaling factor to prevent flushing to zero.
586 buf = buf*2.d0**exponent( scaloc )
587 DO jj = 1, nbb
588 DO ll = 1, nba
589 swork( ll, jj ) = min( bignum,
590 \$ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
591 END DO
592 END DO
593 scamin = scamin / 2.d0**exponent( scaloc )
594 scaloc = scaloc / 2.d0**exponent( scaloc )
595 END IF
596 cnrm = cnrm * scaloc
597 xnrm = xnrm * scaloc
598*
599* Simultaneously apply the robust update factor and the
600* consistency scaling factor to to C( I, L ) and C( K, L).
601*
602 scal = ( scamin / swork( k, l ) ) * scaloc
603 IF( scal .NE. one ) THEN
604 DO ll = l1, l2-1
605 CALL zdscal( k2-k1, scal, c( k1, ll ), 1 )
606 END DO
607 ENDIF
608*
609 scal = ( scamin / swork( i, l ) ) * scaloc
610 IF( scal .NE. one ) THEN
611 DO ll = l1, l2-1
612 CALL zdscal( i2-i1, scal, c( i1, ll ), 1 )
613 END DO
614 ENDIF
615*
616* Record current scaling factor
617*
618 swork( k, l ) = scamin * scaloc
619 swork( i, l ) = scamin * scaloc
620*
621 CALL zgemm( 'C', 'N', i2-i1, l2-l1, k2-k1, -cone,
622 \$ a( k1, i1 ), lda, c( k1, l1 ), ldc,
623 \$ cone, c( i1, l1 ), ldc )
624 END DO
625*
626 DO j = l + 1, nbb
627*
628* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
629*
630 j1 = (j - 1) * nb + 1
631 j2 = min( j * nb, n ) + 1
632*
633* Compute scaling factor to survive the linear update
634* simulating consistent scaling.
635*
636 cnrm = zlange( 'I', k2-k1, j2-j1, c( k1, j1 ),
637 \$ ldc, wnrm )
638 scamin = min( swork( k, j ), swork( k, l ) )
639 cnrm = cnrm * ( scamin / swork( k, j ) )
640 xnrm = xnrm * ( scamin / swork( k, l ) )
641 bnrm = swork( l, bwrk + j )
642 scaloc = dlarmm( bnrm, xnrm, cnrm )
643 IF( scaloc * scamin .EQ. zero ) THEN
644* Use second scaling factor to prevent flushing to zero.
645 buf = buf*2.d0**exponent( scaloc )
646 DO jj = 1, nbb
647 DO ll = 1, nba
648 swork( ll, jj ) = min( bignum,
649 \$ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
650 END DO
651 END DO
652 scamin = scamin / 2.d0**exponent( scaloc )
653 scaloc = scaloc / 2.d0**exponent( scaloc )
654 END IF
655 cnrm = cnrm * scaloc
656 xnrm = xnrm * scaloc
657*
658* Simultaneously apply the robust update factor and the
659* consistency scaling factor to to C( K, J ) and C( K, L).
660*
661 scal = ( scamin / swork( k, l ) ) * scaloc
662 IF( scal .NE. one ) THEN
663 DO ll = l1, l2-1
664 CALL zdscal( k2-k1, scal, c( k1, ll ), 1 )
665 END DO
666 ENDIF
667*
668 scal = ( scamin / swork( k, j ) ) * scaloc
669 IF( scal .NE. one ) THEN
670 DO jj = j1, j2-1
671 CALL zdscal( k2-k1, scal, c( k1, jj ), 1 )
672 END DO
673 ENDIF
674*
675* Record current scaling factor
676*
677 swork( k, l ) = scamin * scaloc
678 swork( k, j ) = scamin * scaloc
679*
680 CALL zgemm( 'N', 'N', k2-k1, j2-j1, l2-l1, -csgn,
681 \$ c( k1, l1 ), ldc, b( l1, j1 ), ldb,
682 \$ cone, c( k1, j1 ), ldc )
683 END DO
684 END DO
685 END DO
686 ELSE IF( .NOT.notrna .AND. .NOT.notrnb ) THEN
687*
688* Solve A**H *X + ISGN*X*B**H = scale*C.
689*
690* The (K,L)th block of X is determined starting from
691* top-right corner column by column by
692*
693* A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L)
694*
695* Where
696* K-1 N
697* R(K,L) = SUM [A(I,K)**H*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H].
698* I=1 J=L+1
699*
700* Start loop over block rows (index = K) and block columns (index = L)
701*
702 DO k = 1, nba
703*
704* K1: row index of the first row in X( K, L )
705* K2: row index of the first row in X( K+1, L )
706* so the K2 - K1 is the column count of the block X( K, L )
707*
708 k1 = (k - 1) * nb + 1
709 k2 = min( k * nb, m ) + 1
710 DO l = nbb, 1, -1
711*
712* L1: column index of the first column in X( K, L )
713* L2: column index of the first column in X( K, L + 1)
714* so that L2 - L1 is the row count of the block X( K, L )
715*
716 l1 = (l - 1) * nb + 1
717 l2 = min( l * nb, n ) + 1
718*
719 CALL ztrsyl( trana, tranb, isgn, k2-k1, l2-l1,
720 \$ a( k1, k1 ), lda,
721 \$ b( l1, l1 ), ldb,
722 \$ c( k1, l1 ), ldc, scaloc, iinfo )
723 info = max( info, iinfo )
724*
725 IF( scaloc * swork( k, l ) .EQ. zero ) THEN
726 IF( scaloc .EQ. zero ) THEN
727* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
728* is larger than the product of BIGNUM**2 and cannot be
729* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
730* Mark the computation as pointless.
731 buf = zero
732 ELSE
733* Use second scaling factor to prevent flushing to zero.
734 buf = buf*2.d0**exponent( scaloc )
735 END IF
736 DO jj = 1, nbb
737 DO ll = 1, nba
738* Bound by BIGNUM to not introduce Inf. The value
739* is irrelevant; corresponding entries of the
740* solution will be flushed in consistency scaling.
741 swork( ll, jj ) = min( bignum,
742 \$ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
743 END DO
744 END DO
745 END IF
746 swork( k, l ) = scaloc * swork( k, l )
747 xnrm = zlange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,
748 \$ wnrm )
749*
750 DO i = k + 1, nba
751*
752* C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L )
753*
754 i1 = (i - 1) * nb + 1
755 i2 = min( i * nb, m ) + 1
756*
757* Compute scaling factor to survive the linear update
758* simulating consistent scaling.
759*
760 cnrm = zlange( 'I', i2-i1, l2-l1, c( i1, l1 ),
761 \$ ldc, wnrm )
762 scamin = min( swork( i, l ), swork( k, l ) )
763 cnrm = cnrm * ( scamin / swork( i, l ) )
764 xnrm = xnrm * ( scamin / swork( k, l ) )
765 anrm = swork( i, awrk + k )
766 scaloc = dlarmm( anrm, xnrm, cnrm )
767 IF( scaloc * scamin .EQ. zero ) THEN
768* Use second scaling factor to prevent flushing to zero.
769 buf = buf*2.d0**exponent( scaloc )
770 DO jj = 1, nbb
771 DO ll = 1, nba
772 swork( ll, jj ) = min( bignum,
773 \$ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
774 END DO
775 END DO
776 scamin = scamin / 2.d0**exponent( scaloc )
777 scaloc = scaloc / 2.d0**exponent( scaloc )
778 END IF
779 cnrm = cnrm * scaloc
780 xnrm = xnrm * scaloc
781*
782* Simultaneously apply the robust update factor and the
783* consistency scaling factor to C( I, L ) and C( K, L).
784*
785 scal = ( scamin / swork( k, l ) ) * scaloc
786 IF( scal .NE. one ) THEN
787 DO ll = l1, l2-1
788 CALL zdscal( k2-k1, scal, c( k1, ll ), 1 )
789 END DO
790 ENDIF
791*
792 scal = ( scamin / swork( i, l ) ) * scaloc
793 IF( scal .NE. one ) THEN
794 DO ll = l1, l2-1
795 CALL zdscal( i2-i1, scal, c( i1, ll ), 1 )
796 END DO
797 ENDIF
798*
799* Record current scaling factor
800*
801 swork( k, l ) = scamin * scaloc
802 swork( i, l ) = scamin * scaloc
803*
804 CALL zgemm( 'C', 'N', i2-i1, l2-l1, k2-k1, -cone,
805 \$ a( k1, i1 ), lda, c( k1, l1 ), ldc,
806 \$ cone, c( i1, l1 ), ldc )
807 END DO
808*
809 DO j = 1, l - 1
810*
811* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H
812*
813 j1 = (j - 1) * nb + 1
814 j2 = min( j * nb, n ) + 1
815*
816* Compute scaling factor to survive the linear update
817* simulating consistent scaling.
818*
819 cnrm = zlange( 'I', k2-k1, j2-j1, c( k1, j1 ),
820 \$ ldc, wnrm )
821 scamin = min( swork( k, j ), swork( k, l ) )
822 cnrm = cnrm * ( scamin / swork( k, j ) )
823 xnrm = xnrm * ( scamin / swork( k, l ) )
824 bnrm = swork( l, bwrk + j )
825 scaloc = dlarmm( bnrm, xnrm, cnrm )
826 IF( scaloc * scamin .EQ. zero ) THEN
827* Use second scaling factor to prevent flushing to zero.
828 buf = buf*2.d0**exponent( scaloc )
829 DO jj = 1, nbb
830 DO ll = 1, nba
831 swork( ll, jj ) = min( bignum,
832 \$ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
833 END DO
834 END DO
835 scamin = scamin / 2.d0**exponent( scaloc )
836 scaloc = scaloc / 2.d0**exponent( scaloc )
837 END IF
838 cnrm = cnrm * scaloc
839 xnrm = xnrm * scaloc
840*
841* Simultaneously apply the robust update factor and the
842* consistency scaling factor to C( K, J ) and C( K, L).
843*
844 scal = ( scamin / swork( k, l ) ) * scaloc
845 IF( scal .NE. one ) THEN
846 DO ll = l1, l2-1
847 CALL zdscal( k2-k1, scal, c( k1, ll ), 1)
848 END DO
849 ENDIF
850*
851 scal = ( scamin / swork( k, j ) ) * scaloc
852 IF( scal .NE. one ) THEN
853 DO jj = j1, j2-1
854 CALL zdscal( k2-k1, scal, c( k1, jj ), 1 )
855 END DO
856 ENDIF
857*
858* Record current scaling factor
859*
860 swork( k, l ) = scamin * scaloc
861 swork( k, j ) = scamin * scaloc
862*
863 CALL zgemm( 'N', 'C', k2-k1, j2-j1, l2-l1, -csgn,
864 \$ c( k1, l1 ), ldc, b( j1, l1 ), ldb,
865 \$ cone, c( k1, j1 ), ldc )
866 END DO
867 END DO
868 END DO
869 ELSE IF( notrna .AND. .NOT.notrnb ) THEN
870*
871* Solve A*X + ISGN*X*B**H = scale*C.
872*
873* The (K,L)th block of X is determined starting from
874* bottom-right corner column by column by
875*
876* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L)
877*
878* Where
879* M N
880* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H].
881* I=K+1 J=L+1
882*
883* Start loop over block rows (index = K) and block columns (index = L)
884*
885 DO k = nba, 1, -1
886*
887* K1: row index of the first row in X( K, L )
888* K2: row index of the first row in X( K+1, L )
889* so the K2 - K1 is the column count of the block X( K, L )
890*
891 k1 = (k - 1) * nb + 1
892 k2 = min( k * nb, m ) + 1
893 DO l = nbb, 1, -1
894*
895* L1: column index of the first column in X( K, L )
896* L2: column index of the first column in X( K, L + 1)
897* so that L2 - L1 is the row count of the block X( K, L )
898*
899 l1 = (l - 1) * nb + 1
900 l2 = min( l * nb, n ) + 1
901*
902 CALL ztrsyl( trana, tranb, isgn, k2-k1, l2-l1,
903 \$ a( k1, k1 ), lda,
904 \$ b( l1, l1 ), ldb,
905 \$ c( k1, l1 ), ldc, scaloc, iinfo )
906 info = max( info, iinfo )
907*
908 IF( scaloc * swork( k, l ) .EQ. zero ) THEN
909 IF( scaloc .EQ. zero ) THEN
910* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
911* is larger than the product of BIGNUM**2 and cannot be
912* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
913* Mark the computation as pointless.
914 buf = zero
915 ELSE
916* Use second scaling factor to prevent flushing to zero.
917 buf = buf*2.d0**exponent( scaloc )
918 END IF
919 DO jj = 1, nbb
920 DO ll = 1, nba
921* Bound by BIGNUM to not introduce Inf. The value
922* is irrelevant; corresponding entries of the
923* solution will be flushed in consistency scaling.
924 swork( ll, jj ) = min( bignum,
925 \$ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
926 END DO
927 END DO
928 END IF
929 swork( k, l ) = scaloc * swork( k, l )
930 xnrm = zlange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,
931 \$ wnrm )
932*
933 DO i = 1, k - 1
934*
935* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
936*
937 i1 = (i - 1) * nb + 1
938 i2 = min( i * nb, m ) + 1
939*
940* Compute scaling factor to survive the linear update
941* simulating consistent scaling.
942*
943 cnrm = zlange( 'I', i2-i1, l2-l1, c( i1, l1 ),
944 \$ ldc, wnrm )
945 scamin = min( swork( i, l ), swork( k, l ) )
946 cnrm = cnrm * ( scamin / swork( i, l ) )
947 xnrm = xnrm * ( scamin / swork( k, l ) )
948 anrm = swork( i, awrk + k )
949 scaloc = dlarmm( anrm, xnrm, cnrm )
950 IF( scaloc * scamin .EQ. zero ) THEN
951* Use second scaling factor to prevent flushing to zero.
952 buf = buf*2.d0**exponent( scaloc )
953 DO jj = 1, nbb
954 DO ll = 1, nba
955 swork( ll, jj ) = min( bignum,
956 \$ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
957 END DO
958 END DO
959 scamin = scamin / 2.d0**exponent( scaloc )
960 scaloc = scaloc / 2.d0**exponent( scaloc )
961 END IF
962 cnrm = cnrm * scaloc
963 xnrm = xnrm * scaloc
964*
965* Simultaneously apply the robust update factor and the
966* consistency scaling factor to C( I, L ) and C( K, L).
967*
968 scal = ( scamin / swork( k, l ) ) * scaloc
969 IF( scal .NE. one ) THEN
970 DO ll = l1, l2-1
971 CALL zdscal( k2-k1, scal, c( k1, ll ), 1 )
972 END DO
973 ENDIF
974*
975 scal = ( scamin / swork( i, l ) ) * scaloc
976 IF( scal .NE. one ) THEN
977 DO ll = l1, l2-1
978 CALL zdscal( i2-i1, scal, c( i1, ll ), 1 )
979 END DO
980 ENDIF
981*
982* Record current scaling factor
983*
984 swork( k, l ) = scamin * scaloc
985 swork( i, l ) = scamin * scaloc
986*
987 CALL zgemm( 'N', 'N', i2-i1, l2-l1, k2-k1, -cone,
988 \$ a( i1, k1 ), lda, c( k1, l1 ), ldc,
989 \$ cone, c( i1, l1 ), ldc )
990*
991 END DO
992*
993 DO j = 1, l - 1
994*
995* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H
996*
997 j1 = (j - 1) * nb + 1
998 j2 = min( j * nb, n ) + 1
999*
1000* Compute scaling factor to survive the linear update
1001* simulating consistent scaling.
1002*
1003 cnrm = zlange( 'I', k2-k1, j2-j1, c( k1, j1 ),
1004 \$ ldc, wnrm )
1005 scamin = min( swork( k, j ), swork( k, l ) )
1006 cnrm = cnrm * ( scamin / swork( k, j ) )
1007 xnrm = xnrm * ( scamin / swork( k, l ) )
1008 bnrm = swork( l, bwrk + j )
1009 scaloc = dlarmm( bnrm, xnrm, cnrm )
1010 IF( scaloc * scamin .EQ. zero ) THEN
1011* Use second scaling factor to prevent flushing to zero.
1012 buf = buf*2.d0**exponent( scaloc )
1013 DO jj = 1, nbb
1014 DO ll = 1, nba
1015 swork( ll, jj ) = min( bignum,
1016 \$ swork( ll, jj ) / 2.d0**exponent( scaloc ) )
1017 END DO
1018 END DO
1019 scamin = scamin / 2.d0**exponent( scaloc )
1020 scaloc = scaloc / 2.d0**exponent( scaloc )
1021 END IF
1022 cnrm = cnrm * scaloc
1023 xnrm = xnrm * scaloc
1024*
1025* Simultaneously apply the robust update factor and the
1026* consistency scaling factor to C( K, J ) and C( K, L).
1027*
1028 scal = ( scamin / swork( k, l ) ) * scaloc
1029 IF( scal .NE. one ) THEN
1030 DO jj = l1, l2-1
1031 CALL zdscal( k2-k1, scal, c( k1, jj ), 1 )
1032 END DO
1033 ENDIF
1034*
1035 scal = ( scamin / swork( k, j ) ) * scaloc
1036 IF( scal .NE. one ) THEN
1037 DO jj = j1, j2-1
1038 CALL zdscal( k2-k1, scal, c( k1, jj ), 1 )
1039 END DO
1040 ENDIF
1041*
1042* Record current scaling factor
1043*
1044 swork( k, l ) = scamin * scaloc
1045 swork( k, j ) = scamin * scaloc
1046*
1047 CALL zgemm( 'N', 'C', k2-k1, j2-j1, l2-l1, -csgn,
1048 \$ c( k1, l1 ), ldc, b( j1, l1 ), ldb,
1049 \$ cone, c( k1, j1 ), ldc )
1050 END DO
1051 END DO
1052 END DO
1053*
1054 END IF
1055*
1056* Reduce local scaling factors
1057*
1058 scale = swork( 1, 1 )
1059 DO k = 1, nba
1060 DO l = 1, nbb
1061 scale = min( scale, swork( k, l ) )
1062 END DO
1063 END DO
1064 IF( scale .EQ. zero ) THEN
1065*
1066* The magnitude of the largest entry of the solution is larger
1067* than the product of BIGNUM**2 and cannot be represented in the
1068* form (1/SCALE)*X if SCALE is DOUBLE PRECISION. Set SCALE to
1069* zero and give up.
1070*
1071 swork(1,1) = max( nba, nbb )
1072 swork(2,1) = 2 * nbb + nba
1073 RETURN
1074 END IF
1075*
1076* Realize consistent scaling
1077*
1078 DO k = 1, nba
1079 k1 = (k - 1) * nb + 1
1080 k2 = min( k * nb, m ) + 1
1081 DO l = 1, nbb
1082 l1 = (l - 1) * nb + 1
1083 l2 = min( l * nb, n ) + 1
1084 scal = scale / swork( k, l )
1085 IF( scal .NE. one ) THEN
1086 DO ll = l1, l2-1
1087 CALL zdscal( k2-k1, scal, c( k1, ll ), 1 )
1088 END DO
1089 ENDIF
1090 END DO
1091 END DO
1092*
1093 IF( buf .NE. one .AND. buf.GT.zero ) THEN
1094*
1095* Decrease SCALE as much as possible.
1096*
1097 scaloc = min( scale / smlnum, one / buf )
1098 buf = buf * scaloc
1099 scale = scale / scaloc
1100 END IF
1101*
1102 IF( buf.NE.one .AND. buf.GT.zero ) THEN
1103*
1104* In case of overly aggressive scaling during the computation,
1105* flushing of the global scale factor may be prevented by
1106* undoing some of the scaling. This step is to ensure that
1107* this routine flushes only scale factors that TRSYL also
1108* flushes and be usable as a drop-in replacement.
1109*
1110* How much can the normwise largest entry be upscaled?
1111*
1112 scal = max( abs( dble( c( 1, 1 ) ) ),
1113 \$ abs( dimag( c( 1, 1 ) ) ) )
1114 DO k = 1, m
1115 DO l = 1, n
1116 scal = max( scal, abs( dble( c( k, l ) ) ),
1117 \$ abs( dimag( c( k, l ) ) ) )
1118 END DO
1119 END DO
1120*
1121* Increase BUF as close to 1 as possible and apply scaling.
1122*
1123 scaloc = min( bignum / scal, one / buf )
1124 buf = buf * scaloc
1125 CALL zlascl( 'G', -1, -1, one, scaloc, m, n, c, ldc, iinfo )
1126 END IF
1127*
1128* Combine with buffer scaling factor. SCALE will be flushed if
1129* BUF is less than one here.
1130*
1131 scale = scale * buf
1132*
1133* Restore workspace dimensions
1134*
1135 swork(1,1) = max( nba, nbb )
1136 swork(2,1) = 2 * nbb + nba
1137*
1138 RETURN
1139*
1140* End of ZTRSYL3
1141*
1142 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
Definition zgemm.f:188
subroutine zlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition zlascl.f:143
subroutine zdscal(n, da, zx, incx)
ZDSCAL
Definition zdscal.f:78
subroutine ztrsyl3(trana, tranb, isgn, m, n, a, lda, b, ldb, c, ldc, scale, swork, ldswork, info)
ZTRSYL3
Definition ztrsyl3.f:157
subroutine ztrsyl(trana, tranb, isgn, m, n, a, lda, b, ldb, c, ldc, scale, info)
ZTRSYL
Definition ztrsyl.f:157