LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
csytri_3x.f
Go to the documentation of this file.
1*> \brief \b CSYTRI_3X
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download CSYTRI_3X + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/csytri_3x.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/csytri_3x.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/csytri_3x.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE CSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
22*
23* .. Scalar Arguments ..
24* CHARACTER UPLO
25* INTEGER INFO, LDA, N, NB
26* ..
27* .. Array Arguments ..
28* INTEGER IPIV( * )
29* COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * )
30* ..
31*
32*
33*> \par Purpose:
34* =============
35*>
36*> \verbatim
37*> CSYTRI_3X computes the inverse of a complex symmetric indefinite
38*> matrix A using the factorization computed by CSYTRF_RK or CSYTRF_BK:
39*>
40*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),
41*>
42*> where U (or L) is unit upper (or lower) triangular matrix,
43*> U**T (or L**T) is the transpose of U (or L), P is a permutation
44*> matrix, P**T is the transpose of P, and D is symmetric and block
45*> diagonal with 1-by-1 and 2-by-2 diagonal blocks.
46*>
47*> This is the blocked version of the algorithm, calling Level 3 BLAS.
48*> \endverbatim
49*
50* Arguments:
51* ==========
52*
53*> \param[in] UPLO
54*> \verbatim
55*> UPLO is CHARACTER*1
56*> Specifies whether the details of the factorization are
57*> stored as an upper or lower triangular matrix.
58*> = 'U': Upper triangle of A is stored;
59*> = 'L': Lower triangle of A is stored.
60*> \endverbatim
61*>
62*> \param[in] N
63*> \verbatim
64*> N is INTEGER
65*> The order of the matrix A. N >= 0.
66*> \endverbatim
67*>
68*> \param[in,out] A
69*> \verbatim
70*> A is COMPLEX array, dimension (LDA,N)
71*> On entry, diagonal of the block diagonal matrix D and
72*> factors U or L as computed by CSYTRF_RK and CSYTRF_BK:
73*> a) ONLY diagonal elements of the symmetric block diagonal
74*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
75*> (superdiagonal (or subdiagonal) elements of D
76*> should be provided on entry in array E), and
77*> b) If UPLO = 'U': factor U in the superdiagonal part of A.
78*> If UPLO = 'L': factor L in the subdiagonal part of A.
79*>
80*> On exit, if INFO = 0, the symmetric inverse of the original
81*> matrix.
82*> If UPLO = 'U': the upper triangular part of the inverse
83*> is formed and the part of A below the diagonal is not
84*> referenced;
85*> If UPLO = 'L': the lower triangular part of the inverse
86*> is formed and the part of A above the diagonal is not
87*> referenced.
88*> \endverbatim
89*>
90*> \param[in] LDA
91*> \verbatim
92*> LDA is INTEGER
93*> The leading dimension of the array A. LDA >= max(1,N).
94*> \endverbatim
95*>
96*> \param[in] E
97*> \verbatim
98*> E is COMPLEX array, dimension (N)
99*> On entry, contains the superdiagonal (or subdiagonal)
100*> elements of the symmetric block diagonal matrix D
101*> with 1-by-1 or 2-by-2 diagonal blocks, where
102*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced;
103*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.
104*>
105*> NOTE: For 1-by-1 diagonal block D(k), where
106*> 1 <= k <= N, the element E(k) is not referenced in both
107*> UPLO = 'U' or UPLO = 'L' cases.
108*> \endverbatim
109*>
110*> \param[in] IPIV
111*> \verbatim
112*> IPIV is INTEGER array, dimension (N)
113*> Details of the interchanges and the block structure of D
114*> as determined by CSYTRF_RK or CSYTRF_BK.
115*> \endverbatim
116*>
117*> \param[out] WORK
118*> \verbatim
119*> WORK is COMPLEX array, dimension (N+NB+1,NB+3).
120*> \endverbatim
121*>
122*> \param[in] NB
123*> \verbatim
124*> NB is INTEGER
125*> Block size.
126*> \endverbatim
127*>
128*> \param[out] INFO
129*> \verbatim
130*> INFO is INTEGER
131*> = 0: successful exit
132*> < 0: if INFO = -i, the i-th argument had an illegal value
133*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
134*> inverse could not be computed.
135*> \endverbatim
136*
137* Authors:
138* ========
139*
140*> \author Univ. of Tennessee
141*> \author Univ. of California Berkeley
142*> \author Univ. of Colorado Denver
143*> \author NAG Ltd.
144*
145*> \ingroup hetri_3x
146*
147*> \par Contributors:
148* ==================
149*> \verbatim
150*>
151*> June 2017, Igor Kozachenko,
152*> Computer Science Division,
153*> University of California, Berkeley
154*>
155*> \endverbatim
156*
157* =====================================================================
158 SUBROUTINE csytri_3x( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
159*
160* -- LAPACK computational routine --
161* -- LAPACK is a software package provided by Univ. of Tennessee, --
162* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163*
164* .. Scalar Arguments ..
165 CHARACTER UPLO
166 INTEGER INFO, LDA, N, NB
167* ..
168* .. Array Arguments ..
169 INTEGER IPIV( * )
170 COMPLEX A( LDA, * ), E( * ), WORK( N+NB+1, * )
171* ..
172*
173* =====================================================================
174*
175* .. Parameters ..
176 COMPLEX CONE, CZERO
177 parameter( cone = ( 1.0e+0, 0.0e+0 ),
178 $ czero = ( 0.0e+0, 0.0e+0 ) )
179* ..
180* .. Local Scalars ..
181 LOGICAL UPPER
182 INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11
183 COMPLEX AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J,
184 $ U11_I_J, U11_IP1_J
185* ..
186* .. External Functions ..
187 LOGICAL LSAME
188 EXTERNAL lsame
189* ..
190* .. External Subroutines ..
191 EXTERNAL cgemm, csyswapr, ctrtri, ctrmm, xerbla
192* ..
193* .. Intrinsic Functions ..
194 INTRINSIC abs, max, mod
195* ..
196* .. Executable Statements ..
197*
198* Test the input parameters.
199*
200 info = 0
201 upper = lsame( uplo, 'U' )
202 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
203 info = -1
204 ELSE IF( n.LT.0 ) THEN
205 info = -2
206 ELSE IF( lda.LT.max( 1, n ) ) THEN
207 info = -4
208 END IF
209*
210* Quick return if possible
211*
212 IF( info.NE.0 ) THEN
213 CALL xerbla( 'CSYTRI_3X', -info )
214 RETURN
215 END IF
216 IF( n.EQ.0 )
217 $ RETURN
218*
219* Workspace got Non-diag elements of D
220*
221 DO k = 1, n
222 work( k, 1 ) = e( k )
223 END DO
224*
225* Check that the diagonal matrix D is nonsingular.
226*
227 IF( upper ) THEN
228*
229* Upper triangular storage: examine D from bottom to top
230*
231 DO info = n, 1, -1
232 IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.czero )
233 $ RETURN
234 END DO
235 ELSE
236*
237* Lower triangular storage: examine D from top to bottom.
238*
239 DO info = 1, n
240 IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.czero )
241 $ RETURN
242 END DO
243 END IF
244*
245 info = 0
246*
247* Splitting Workspace
248* U01 is a block ( N, NB+1 )
249* The first element of U01 is in WORK( 1, 1 )
250* U11 is a block ( NB+1, NB+1 )
251* The first element of U11 is in WORK( N+1, 1 )
252*
253 u11 = n
254*
255* INVD is a block ( N, 2 )
256* The first element of INVD is in WORK( 1, INVD )
257*
258 invd = nb + 2
259
260 IF( upper ) THEN
261*
262* Begin Upper
263*
264* invA = P * inv(U**T) * inv(D) * inv(U) * P**T.
265*
266 CALL ctrtri( uplo, 'U', n, a, lda, info )
267*
268* inv(D) and inv(D) * inv(U)
269*
270 k = 1
271 DO WHILE( k.LE.n )
272 IF( ipiv( k ).GT.0 ) THEN
273* 1 x 1 diagonal NNB
274 work( k, invd ) = cone / a( k, k )
275 work( k, invd+1 ) = czero
276 ELSE
277* 2 x 2 diagonal NNB
278 t = work( k+1, 1 )
279 ak = a( k, k ) / t
280 akp1 = a( k+1, k+1 ) / t
281 akkp1 = work( k+1, 1 ) / t
282 d = t*( ak*akp1-cone )
283 work( k, invd ) = akp1 / d
284 work( k+1, invd+1 ) = ak / d
285 work( k, invd+1 ) = -akkp1 / d
286 work( k+1, invd ) = work( k, invd+1 )
287 k = k + 1
288 END IF
289 k = k + 1
290 END DO
291*
292* inv(U**T) = (inv(U))**T
293*
294* inv(U**T) * inv(D) * inv(U)
295*
296 cut = n
297 DO WHILE( cut.GT.0 )
298 nnb = nb
299 IF( cut.LE.nnb ) THEN
300 nnb = cut
301 ELSE
302 icount = 0
303* count negative elements,
304 DO i = cut+1-nnb, cut
305 IF( ipiv( i ).LT.0 ) icount = icount + 1
306 END DO
307* need a even number for a clear cut
308 IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1
309 END IF
310
311 cut = cut - nnb
312*
313* U01 Block
314*
315 DO i = 1, cut
316 DO j = 1, nnb
317 work( i, j ) = a( i, cut+j )
318 END DO
319 END DO
320*
321* U11 Block
322*
323 DO i = 1, nnb
324 work( u11+i, i ) = cone
325 DO j = 1, i-1
326 work( u11+i, j ) = czero
327 END DO
328 DO j = i+1, nnb
329 work( u11+i, j ) = a( cut+i, cut+j )
330 END DO
331 END DO
332*
333* invD * U01
334*
335 i = 1
336 DO WHILE( i.LE.cut )
337 IF( ipiv( i ).GT.0 ) THEN
338 DO j = 1, nnb
339 work( i, j ) = work( i, invd ) * work( i, j )
340 END DO
341 ELSE
342 DO j = 1, nnb
343 u01_i_j = work( i, j )
344 u01_ip1_j = work( i+1, j )
345 work( i, j ) = work( i, invd ) * u01_i_j
346 $ + work( i, invd+1 ) * u01_ip1_j
347 work( i+1, j ) = work( i+1, invd ) * u01_i_j
348 $ + work( i+1, invd+1 ) * u01_ip1_j
349 END DO
350 i = i + 1
351 END IF
352 i = i + 1
353 END DO
354*
355* invD1 * U11
356*
357 i = 1
358 DO WHILE ( i.LE.nnb )
359 IF( ipiv( cut+i ).GT.0 ) THEN
360 DO j = i, nnb
361 work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
362 END DO
363 ELSE
364 DO j = i, nnb
365 u11_i_j = work(u11+i,j)
366 u11_ip1_j = work(u11+i+1,j)
367 work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
368 $ + work(cut+i,invd+1) * work(u11+i+1,j)
369 work( u11+i+1, j ) = work(cut+i+1,invd) * u11_i_j
370 $ + work(cut+i+1,invd+1) * u11_ip1_j
371 END DO
372 i = i + 1
373 END IF
374 i = i + 1
375 END DO
376*
377* U11**T * invD1 * U11 -> U11
378*
379 CALL ctrmm( 'L', 'U', 'T', 'U', nnb, nnb,
380 $ cone, a( cut+1, cut+1 ), lda, work( u11+1, 1 ),
381 $ n+nb+1 )
382*
383 DO i = 1, nnb
384 DO j = i, nnb
385 a( cut+i, cut+j ) = work( u11+i, j )
386 END DO
387 END DO
388*
389* U01**T * invD * U01 -> A( CUT+I, CUT+J )
390*
391 CALL cgemm( 'T', 'N', nnb, nnb, cut, cone, a( 1, cut+1 ),
392 $ lda, work, n+nb+1, czero, work(u11+1,1),
393 $ n+nb+1 )
394
395*
396* U11 = U11**T * invD1 * U11 + U01**T * invD * U01
397*
398 DO i = 1, nnb
399 DO j = i, nnb
400 a( cut+i, cut+j ) = a( cut+i, cut+j ) + work(u11+i,j)
401 END DO
402 END DO
403*
404* U01 = U00**T * invD0 * U01
405*
406 CALL ctrmm( 'L', uplo, 'T', 'U', cut, nnb,
407 $ cone, a, lda, work, n+nb+1 )
408
409*
410* Update U01
411*
412 DO i = 1, cut
413 DO j = 1, nnb
414 a( i, cut+j ) = work( i, j )
415 END DO
416 END DO
417*
418* Next Block
419*
420 END DO
421*
422* Apply PERMUTATIONS P and P**T:
423* P * inv(U**T) * inv(D) * inv(U) * P**T.
424* Interchange rows and columns I and IPIV(I) in reverse order
425* from the formation order of IPIV vector for Upper case.
426*
427* ( We can use a loop over IPIV with increment 1,
428* since the ABS value of IPIV(I) represents the row (column)
429* index of the interchange with row (column) i in both 1x1
430* and 2x2 pivot cases, i.e. we don't need separate code branches
431* for 1x1 and 2x2 pivot cases )
432*
433 DO i = 1, n
434 ip = abs( ipiv( i ) )
435 IF( ip.NE.i ) THEN
436 IF (i .LT. ip) CALL csyswapr( uplo, n, a, lda, i ,ip )
437 IF (i .GT. ip) CALL csyswapr( uplo, n, a, lda, ip ,i )
438 END IF
439 END DO
440*
441 ELSE
442*
443* Begin Lower
444*
445* inv A = P * inv(L**T) * inv(D) * inv(L) * P**T.
446*
447 CALL ctrtri( uplo, 'U', n, a, lda, info )
448*
449* inv(D) and inv(D) * inv(L)
450*
451 k = n
452 DO WHILE ( k .GE. 1 )
453 IF( ipiv( k ).GT.0 ) THEN
454* 1 x 1 diagonal NNB
455 work( k, invd ) = cone / a( k, k )
456 work( k, invd+1 ) = czero
457 ELSE
458* 2 x 2 diagonal NNB
459 t = work( k-1, 1 )
460 ak = a( k-1, k-1 ) / t
461 akp1 = a( k, k ) / t
462 akkp1 = work( k-1, 1 ) / t
463 d = t*( ak*akp1-cone )
464 work( k-1, invd ) = akp1 / d
465 work( k, invd ) = ak / d
466 work( k, invd+1 ) = -akkp1 / d
467 work( k-1, invd+1 ) = work( k, invd+1 )
468 k = k - 1
469 END IF
470 k = k - 1
471 END DO
472*
473* inv(L**T) = (inv(L))**T
474*
475* inv(L**T) * inv(D) * inv(L)
476*
477 cut = 0
478 DO WHILE( cut.LT.n )
479 nnb = nb
480 IF( (cut + nnb).GT.n ) THEN
481 nnb = n - cut
482 ELSE
483 icount = 0
484* count negative elements,
485 DO i = cut + 1, cut+nnb
486 IF ( ipiv( i ).LT.0 ) icount = icount + 1
487 END DO
488* need a even number for a clear cut
489 IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1
490 END IF
491*
492* L21 Block
493*
494 DO i = 1, n-cut-nnb
495 DO j = 1, nnb
496 work( i, j ) = a( cut+nnb+i, cut+j )
497 END DO
498 END DO
499*
500* L11 Block
501*
502 DO i = 1, nnb
503 work( u11+i, i) = cone
504 DO j = i+1, nnb
505 work( u11+i, j ) = czero
506 END DO
507 DO j = 1, i-1
508 work( u11+i, j ) = a( cut+i, cut+j )
509 END DO
510 END DO
511*
512* invD*L21
513*
514 i = n-cut-nnb
515 DO WHILE( i.GE.1 )
516 IF( ipiv( cut+nnb+i ).GT.0 ) THEN
517 DO j = 1, nnb
518 work( i, j ) = work( cut+nnb+i, invd) * work( i, j)
519 END DO
520 ELSE
521 DO j = 1, nnb
522 u01_i_j = work(i,j)
523 u01_ip1_j = work(i-1,j)
524 work(i,j)=work(cut+nnb+i,invd)*u01_i_j+
525 $ work(cut+nnb+i,invd+1)*u01_ip1_j
526 work(i-1,j)=work(cut+nnb+i-1,invd+1)*u01_i_j+
527 $ work(cut+nnb+i-1,invd)*u01_ip1_j
528 END DO
529 i = i - 1
530 END IF
531 i = i - 1
532 END DO
533*
534* invD1*L11
535*
536 i = nnb
537 DO WHILE( i.GE.1 )
538 IF( ipiv( cut+i ).GT.0 ) THEN
539 DO j = 1, nnb
540 work( u11+i, j ) = work( cut+i, invd)*work(u11+i,j)
541 END DO
542
543 ELSE
544 DO j = 1, nnb
545 u11_i_j = work( u11+i, j )
546 u11_ip1_j = work( u11+i-1, j )
547 work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
548 $ + work(cut+i,invd+1) * u11_ip1_j
549 work( u11+i-1, j ) = work(cut+i-1,invd+1) * u11_i_j
550 $ + work(cut+i-1,invd) * u11_ip1_j
551 END DO
552 i = i - 1
553 END IF
554 i = i - 1
555 END DO
556*
557* L11**T * invD1 * L11 -> L11
558*
559 CALL ctrmm( 'L', uplo, 'T', 'U', nnb, nnb, cone,
560 $ a( cut+1, cut+1 ), lda, work( u11+1, 1 ),
561 $ n+nb+1 )
562
563*
564 DO i = 1, nnb
565 DO j = 1, i
566 a( cut+i, cut+j ) = work( u11+i, j )
567 END DO
568 END DO
569*
570 IF( (cut+nnb).LT.n ) THEN
571*
572* L21**T * invD2*L21 -> A( CUT+I, CUT+J )
573*
574 CALL cgemm( 'T', 'N', nnb, nnb, n-nnb-cut, cone,
575 $ a( cut+nnb+1, cut+1 ), lda, work, n+nb+1,
576 $ czero, work( u11+1, 1 ), n+nb+1 )
577
578*
579* L11 = L11**T * invD1 * L11 + U01**T * invD * U01
580*
581 DO i = 1, nnb
582 DO j = 1, i
583 a( cut+i, cut+j ) = a( cut+i, cut+j )+work(u11+i,j)
584 END DO
585 END DO
586*
587* L01 = L22**T * invD2 * L21
588*
589 CALL ctrmm( 'L', uplo, 'T', 'U', n-nnb-cut, nnb, cone,
590 $ a( cut+nnb+1, cut+nnb+1 ), lda, work,
591 $ n+nb+1 )
592*
593* Update L21
594*
595 DO i = 1, n-cut-nnb
596 DO j = 1, nnb
597 a( cut+nnb+i, cut+j ) = work( i, j )
598 END DO
599 END DO
600*
601 ELSE
602*
603* L11 = L11**T * invD1 * L11
604*
605 DO i = 1, nnb
606 DO j = 1, i
607 a( cut+i, cut+j ) = work( u11+i, j )
608 END DO
609 END DO
610 END IF
611*
612* Next Block
613*
614 cut = cut + nnb
615*
616 END DO
617*
618* Apply PERMUTATIONS P and P**T:
619* P * inv(L**T) * inv(D) * inv(L) * P**T.
620* Interchange rows and columns I and IPIV(I) in reverse order
621* from the formation order of IPIV vector for Lower case.
622*
623* ( We can use a loop over IPIV with increment -1,
624* since the ABS value of IPIV(I) represents the row (column)
625* index of the interchange with row (column) i in both 1x1
626* and 2x2 pivot cases, i.e. we don't need separate code branches
627* for 1x1 and 2x2 pivot cases )
628*
629 DO i = n, 1, -1
630 ip = abs( ipiv( i ) )
631 IF( ip.NE.i ) THEN
632 IF (i .LT. ip) CALL csyswapr( uplo, n, a, lda, i ,ip )
633 IF (i .GT. ip) CALL csyswapr( uplo, n, a, lda, ip ,i )
634 END IF
635 END DO
636*
637 END IF
638*
639 RETURN
640*
641* End of CSYTRI_3X
642*
643 END
644
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188
subroutine csyswapr(uplo, n, a, lda, i1, i2)
CSYSWAPR
Definition csyswapr.f:100
subroutine csytri_3x(uplo, n, a, lda, e, ipiv, work, nb, info)
CSYTRI_3X
Definition csytri_3x.f:159
subroutine ctrmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
CTRMM
Definition ctrmm.f:177
subroutine ctrtri(uplo, diag, n, a, lda, info)
CTRTRI
Definition ctrtri.f:109