LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
clahef_rk.f
Go to the documentation of this file.
1*> \brief \b CLAHEF_RK computes a partial factorization of a complex Hermitian indefinite matrix using bounded Bunch-Kaufman (rook) diagonal pivoting method.
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> Download CLAHEF_RK + dependencies
9*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clahef_rk.f">
10*> [TGZ]</a>
11*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clahef_rk.f">
12*> [ZIP]</a>
13*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clahef_rk.f">
14*> [TXT]</a>
15*
16* Definition:
17* ===========
18*
19* SUBROUTINE CLAHEF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW,
20* INFO )
21*
22* .. Scalar Arguments ..
23* CHARACTER UPLO
24* INTEGER INFO, KB, LDA, LDW, N, NB
25* ..
26* .. Array Arguments ..
27* INTEGER IPIV( * )
28* COMPLEX A( LDA, * ), E( * ), W( LDW, * )
29* ..
30*
31*
32*> \par Purpose:
33* =============
34*>
35*> \verbatim
36*> CLAHEF_RK computes a partial factorization of a complex Hermitian
37*> matrix A using the bounded Bunch-Kaufman (rook) diagonal
38*> pivoting method. The partial factorization has the form:
39*>
40*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or:
41*> ( 0 U22 ) ( 0 D ) ( U12**H U22**H )
42*>
43*> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L',
44*> ( L21 I ) ( 0 A22 ) ( 0 I )
45*>
46*> where the order of D is at most NB. The actual order is returned in
47*> the argument KB, and is either NB or NB-1, or N if N <= NB.
48*>
49*> CLAHEF_RK is an auxiliary routine called by CHETRF_RK. It uses
50*> blocked code (calling Level 3 BLAS) to update the submatrix
51*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L').
52*> \endverbatim
53*
54* Arguments:
55* ==========
56*
57*> \param[in] UPLO
58*> \verbatim
59*> UPLO is CHARACTER*1
60*> Specifies whether the upper or lower triangular part of the
61*> Hermitian matrix A is stored:
62*> = 'U': Upper triangular
63*> = 'L': Lower triangular
64*> \endverbatim
65*>
66*> \param[in] N
67*> \verbatim
68*> N is INTEGER
69*> The order of the matrix A. N >= 0.
70*> \endverbatim
71*>
72*> \param[in] NB
73*> \verbatim
74*> NB is INTEGER
75*> The maximum number of columns of the matrix A that should be
76*> factored. NB should be at least 2 to allow for 2-by-2 pivot
77*> blocks.
78*> \endverbatim
79*>
80*> \param[out] KB
81*> \verbatim
82*> KB is INTEGER
83*> The number of columns of A that were actually factored.
84*> KB is either NB-1 or NB, or N if N <= NB.
85*> \endverbatim
86*>
87*> \param[in,out] A
88*> \verbatim
89*> A is COMPLEX array, dimension (LDA,N)
90*> On entry, the Hermitian matrix A.
91*> If UPLO = 'U': the leading N-by-N upper triangular part
92*> of A contains the upper triangular part of the matrix A,
93*> and the strictly lower triangular part of A is not
94*> referenced.
95*>
96*> If UPLO = 'L': the leading N-by-N lower triangular part
97*> of A contains the lower triangular part of the matrix A,
98*> and the strictly upper triangular part of A is not
99*> referenced.
100*>
101*> On exit, contains:
102*> a) ONLY diagonal elements of the Hermitian block diagonal
103*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
104*> (superdiagonal (or subdiagonal) elements of D
105*> are stored on exit in array E), and
106*> b) If UPLO = 'U': factor U in the superdiagonal part of A.
107*> If UPLO = 'L': factor L in the subdiagonal part of A.
108*> \endverbatim
109*>
110*> \param[in] LDA
111*> \verbatim
112*> LDA is INTEGER
113*> The leading dimension of the array A. LDA >= max(1,N).
114*> \endverbatim
115*>
116*> \param[out] E
117*> \verbatim
118*> E is COMPLEX array, dimension (N)
119*> On exit, contains the superdiagonal (or subdiagonal)
120*> elements of the Hermitian block diagonal matrix D
121*> with 1-by-1 or 2-by-2 diagonal blocks, where
122*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0;
123*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0.
124*>
125*> NOTE: For 1-by-1 diagonal block D(k), where
126*> 1 <= k <= N, the element E(k) is set to 0 in both
127*> UPLO = 'U' or UPLO = 'L' cases.
128*> \endverbatim
129*>
130*> \param[out] IPIV
131*> \verbatim
132*> IPIV is INTEGER array, dimension (N)
133*> IPIV describes the permutation matrix P in the factorization
134*> of matrix A as follows. The absolute value of IPIV(k)
135*> represents the index of row and column that were
136*> interchanged with the k-th row and column. The value of UPLO
137*> describes the order in which the interchanges were applied.
138*> Also, the sign of IPIV represents the block structure of
139*> the Hermitian block diagonal matrix D with 1-by-1 or 2-by-2
140*> diagonal blocks which correspond to 1 or 2 interchanges
141*> at each factorization step.
142*>
143*> If UPLO = 'U',
144*> ( in factorization order, k decreases from N to 1 ):
145*> a) A single positive entry IPIV(k) > 0 means:
146*> D(k,k) is a 1-by-1 diagonal block.
147*> If IPIV(k) != k, rows and columns k and IPIV(k) were
148*> interchanged in the submatrix A(1:N,N-KB+1:N);
149*> If IPIV(k) = k, no interchange occurred.
150*>
151*>
152*> b) A pair of consecutive negative entries
153*> IPIV(k) < 0 and IPIV(k-1) < 0 means:
154*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block.
155*> (NOTE: negative entries in IPIV appear ONLY in pairs).
156*> 1) If -IPIV(k) != k, rows and columns
157*> k and -IPIV(k) were interchanged
158*> in the matrix A(1:N,N-KB+1:N).
159*> If -IPIV(k) = k, no interchange occurred.
160*> 2) If -IPIV(k-1) != k-1, rows and columns
161*> k-1 and -IPIV(k-1) were interchanged
162*> in the submatrix A(1:N,N-KB+1:N).
163*> If -IPIV(k-1) = k-1, no interchange occurred.
164*>
165*> c) In both cases a) and b) is always ABS( IPIV(k) ) <= k.
166*>
167*> d) NOTE: Any entry IPIV(k) is always NONZERO on output.
168*>
169*> If UPLO = 'L',
170*> ( in factorization order, k increases from 1 to N ):
171*> a) A single positive entry IPIV(k) > 0 means:
172*> D(k,k) is a 1-by-1 diagonal block.
173*> If IPIV(k) != k, rows and columns k and IPIV(k) were
174*> interchanged in the submatrix A(1:N,1:KB).
175*> If IPIV(k) = k, no interchange occurred.
176*>
177*> b) A pair of consecutive negative entries
178*> IPIV(k) < 0 and IPIV(k+1) < 0 means:
179*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
180*> (NOTE: negative entries in IPIV appear ONLY in pairs).
181*> 1) If -IPIV(k) != k, rows and columns
182*> k and -IPIV(k) were interchanged
183*> in the submatrix A(1:N,1:KB).
184*> If -IPIV(k) = k, no interchange occurred.
185*> 2) If -IPIV(k+1) != k+1, rows and columns
186*> k-1 and -IPIV(k-1) were interchanged
187*> in the submatrix A(1:N,1:KB).
188*> If -IPIV(k+1) = k+1, no interchange occurred.
189*>
190*> c) In both cases a) and b) is always ABS( IPIV(k) ) >= k.
191*>
192*> d) NOTE: Any entry IPIV(k) is always NONZERO on output.
193*> \endverbatim
194*>
195*> \param[out] W
196*> \verbatim
197*> W is COMPLEX array, dimension (LDW,NB)
198*> \endverbatim
199*>
200*> \param[in] LDW
201*> \verbatim
202*> LDW is INTEGER
203*> The leading dimension of the array W. LDW >= max(1,N).
204*> \endverbatim
205*>
206*> \param[out] INFO
207*> \verbatim
208*> INFO is INTEGER
209*> = 0: successful exit
210*>
211*> < 0: If INFO = -k, the k-th argument had an illegal value
212*>
213*> > 0: If INFO = k, the matrix A is singular, because:
214*> If UPLO = 'U': column k in the upper
215*> triangular part of A contains all zeros.
216*> If UPLO = 'L': column k in the lower
217*> triangular part of A contains all zeros.
218*>
219*> Therefore D(k,k) is exactly zero, and superdiagonal
220*> elements of column k of U (or subdiagonal elements of
221*> column k of L ) are all zeros. The factorization has
222*> been completed, but the block diagonal matrix D is
223*> exactly singular, and division by zero will occur if
224*> it is used to solve a system of equations.
225*>
226*> NOTE: INFO only stores the first occurrence of
227*> a singularity, any subsequent occurrence of singularity
228*> is not stored in INFO even though the factorization
229*> always completes.
230*> \endverbatim
231*
232* Authors:
233* ========
234*
235*> \author Univ. of Tennessee
236*> \author Univ. of California Berkeley
237*> \author Univ. of Colorado Denver
238*> \author NAG Ltd.
239*
240*> \ingroup lahef_rk
241*
242*> \par Contributors:
243* ==================
244*>
245*> \verbatim
246*>
247*> December 2016, Igor Kozachenko,
248*> Computer Science Division,
249*> University of California, Berkeley
250*>
251*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas,
252*> School of Mathematics,
253*> University of Manchester
254*>
255*> \endverbatim
256*
257* =====================================================================
258 SUBROUTINE clahef_rk( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW,
259 $ INFO )
260*
261* -- LAPACK computational routine --
262* -- LAPACK is a software package provided by Univ. of Tennessee, --
263* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
264*
265* .. Scalar Arguments ..
266 CHARACTER UPLO
267 INTEGER INFO, KB, LDA, LDW, N, NB
268* ..
269* .. Array Arguments ..
270 INTEGER IPIV( * )
271 COMPLEX A( LDA, * ), W( LDW, * ), E( * )
272* ..
273*
274* =====================================================================
275*
276* .. Parameters ..
277 REAL ZERO, ONE
278 parameter( zero = 0.0e+0, one = 1.0e+0 )
279 REAL EIGHT, SEVTEN
280 parameter( eight = 8.0e+0, sevten = 17.0e+0 )
281 COMPLEX CONE, CZERO
282 parameter( cone = ( 1.0e+0, 0.0e+0 ),
283 $ czero = ( 0.0e+0, 0.0e+0 ) )
284* ..
285* .. Local Scalars ..
286 LOGICAL DONE
287 INTEGER IMAX, ITEMP, II, J, JMAX, K, KK, KKW,
288 $ kp, kstep, kw, p
289 REAL ABSAKK, ALPHA, COLMAX, STEMP, R1, ROWMAX, T,
290 $ sfmin
291 COMPLEX D11, D21, D22, Z
292* ..
293* .. External Functions ..
294 LOGICAL LSAME
295 INTEGER ICAMAX
296 REAL SLAMCH
297 EXTERNAL lsame, icamax, slamch
298* ..
299* .. External Subroutines ..
300 EXTERNAL ccopy, csscal, cgemm, cgemv, clacgv,
301 $ cswap
302* ..
303* .. Intrinsic Functions ..
304 INTRINSIC abs, conjg, aimag, max, min, real, sqrt
305* ..
306* .. Statement Functions ..
307 REAL CABS1
308* ..
309* .. Statement Function definitions ..
310 cabs1( z ) = abs( real( z ) ) + abs( aimag( z ) )
311* ..
312* .. Executable Statements ..
313*
314 info = 0
315*
316* Initialize ALPHA for use in choosing pivot block size.
317*
318 alpha = ( one+sqrt( sevten ) ) / eight
319*
320* Compute machine safe minimum
321*
322 sfmin = slamch( 'S' )
323*
324 IF( lsame( uplo, 'U' ) ) THEN
325*
326* Factorize the trailing columns of A using the upper triangle
327* of A and working backwards, and compute the matrix W = U12*D
328* for use in updating A11 (note that conjg(W) is actually stored)
329*
330* Initialize the first entry of array E, where superdiagonal
331* elements of D are stored
332*
333 e( 1 ) = czero
334*
335* K is the main loop index, decreasing from N in steps of 1 or 2
336*
337 k = n
338 10 CONTINUE
339*
340* KW is the column of W which corresponds to column K of A
341*
342 kw = nb + k - n
343*
344* Exit from loop
345*
346 IF( ( k.LE.n-nb+1 .AND. nb.LT.n ) .OR. k.LT.1 )
347 $ GO TO 30
348*
349 kstep = 1
350 p = k
351*
352* Copy column K of A to column KW of W and update it
353*
354 IF( k.GT.1 )
355 $ CALL ccopy( k-1, a( 1, k ), 1, w( 1, kw ), 1 )
356 w( k, kw ) = real( a( k, k ) )
357 IF( k.LT.n ) THEN
358 CALL cgemv( 'No transpose', k, n-k, -cone, a( 1, k+1 ),
359 $ lda,
360 $ w( k, kw+1 ), ldw, cone, w( 1, kw ), 1 )
361 w( k, kw ) = real( w( k, kw ) )
362 END IF
363*
364* Determine rows and columns to be interchanged and whether
365* a 1-by-1 or 2-by-2 pivot block will be used
366*
367 absakk = abs( real( w( k, kw ) ) )
368*
369* IMAX is the row-index of the largest off-diagonal element in
370* column K, and COLMAX is its absolute value.
371* Determine both COLMAX and IMAX.
372*
373 IF( k.GT.1 ) THEN
374 imax = icamax( k-1, w( 1, kw ), 1 )
375 colmax = cabs1( w( imax, kw ) )
376 ELSE
377 colmax = zero
378 END IF
379*
380 IF( max( absakk, colmax ).EQ.zero ) THEN
381*
382* Column K is zero or underflow: set INFO and continue
383*
384 IF( info.EQ.0 )
385 $ info = k
386 kp = k
387 a( k, k ) = real( w( k, kw ) )
388 IF( k.GT.1 )
389 $ CALL ccopy( k-1, w( 1, kw ), 1, a( 1, k ), 1 )
390*
391* Set E( K ) to zero
392*
393 IF( k.GT.1 )
394 $ e( k ) = czero
395*
396 ELSE
397*
398* ============================================================
399*
400* BEGIN pivot search
401*
402* Case(1)
403* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX
404* (used to handle NaN and Inf)
405 IF( .NOT.( absakk.LT.alpha*colmax ) ) THEN
406*
407* no interchange, use 1-by-1 pivot block
408*
409 kp = k
410*
411 ELSE
412*
413* Lop until pivot found
414*
415 done = .false.
416*
417 12 CONTINUE
418*
419* BEGIN pivot search loop body
420*
421*
422* Copy column IMAX to column KW-1 of W and update it
423*
424 IF( imax.GT.1 )
425 $ CALL ccopy( imax-1, a( 1, imax ), 1, w( 1,
426 $ kw-1 ),
427 $ 1 )
428 w( imax, kw-1 ) = real( a( imax, imax ) )
429*
430 CALL ccopy( k-imax, a( imax, imax+1 ), lda,
431 $ w( imax+1, kw-1 ), 1 )
432 CALL clacgv( k-imax, w( imax+1, kw-1 ), 1 )
433*
434 IF( k.LT.n ) THEN
435 CALL cgemv( 'No transpose', k, n-k, -cone,
436 $ a( 1, k+1 ), lda, w( imax, kw+1 ), ldw,
437 $ cone, w( 1, kw-1 ), 1 )
438 w( imax, kw-1 ) = real( w( imax, kw-1 ) )
439 END IF
440*
441* JMAX is the column-index of the largest off-diagonal
442* element in row IMAX, and ROWMAX is its absolute value.
443* Determine both ROWMAX and JMAX.
444*
445 IF( imax.NE.k ) THEN
446 jmax = imax + icamax( k-imax, w( imax+1, kw-1 ),
447 $ 1 )
448 rowmax = cabs1( w( jmax, kw-1 ) )
449 ELSE
450 rowmax = zero
451 END IF
452*
453 IF( imax.GT.1 ) THEN
454 itemp = icamax( imax-1, w( 1, kw-1 ), 1 )
455 stemp = cabs1( w( itemp, kw-1 ) )
456 IF( stemp.GT.rowmax ) THEN
457 rowmax = stemp
458 jmax = itemp
459 END IF
460 END IF
461*
462* Case(2)
463* Equivalent to testing for
464* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX
465* (used to handle NaN and Inf)
466*
467 IF( .NOT.( abs( real( w( imax,kw-1 ) ) )
468 $ .LT.alpha*rowmax ) ) THEN
469*
470* interchange rows and columns K and IMAX,
471* use 1-by-1 pivot block
472*
473 kp = imax
474*
475* copy column KW-1 of W to column KW of W
476*
477 CALL ccopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 )
478*
479 done = .true.
480*
481* Case(3)
482* Equivalent to testing for ROWMAX.EQ.COLMAX,
483* (used to handle NaN and Inf)
484*
485 ELSE IF( ( p.EQ.jmax ) .OR. ( rowmax.LE.colmax ) )
486 $ THEN
487*
488* interchange rows and columns K-1 and IMAX,
489* use 2-by-2 pivot block
490*
491 kp = imax
492 kstep = 2
493 done = .true.
494*
495* Case(4)
496 ELSE
497*
498* Pivot not found: set params and repeat
499*
500 p = imax
501 colmax = rowmax
502 imax = jmax
503*
504* Copy updated JMAXth (next IMAXth) column to Kth of W
505*
506 CALL ccopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 )
507*
508 END IF
509*
510*
511* END pivot search loop body
512*
513 IF( .NOT.done ) GOTO 12
514*
515 END IF
516*
517* END pivot search
518*
519* ============================================================
520*
521* KK is the column of A where pivoting step stopped
522*
523 kk = k - kstep + 1
524*
525* KKW is the column of W which corresponds to column KK of A
526*
527 kkw = nb + kk - n
528*
529* Interchange rows and columns P and K.
530* Updated column P is already stored in column KW of W.
531*
532 IF( ( kstep.EQ.2 ) .AND. ( p.NE.k ) ) THEN
533*
534* Copy non-updated column K to column P of submatrix A
535* at step K. No need to copy element into columns
536* K and K-1 of A for 2-by-2 pivot, since these columns
537* will be later overwritten.
538*
539 a( p, p ) = real( a( k, k ) )
540 CALL ccopy( k-1-p, a( p+1, k ), 1, a( p, p+1 ),
541 $ lda )
542 CALL clacgv( k-1-p, a( p, p+1 ), lda )
543 IF( p.GT.1 )
544 $ CALL ccopy( p-1, a( 1, k ), 1, a( 1, p ), 1 )
545*
546* Interchange rows K and P in the last K+1 to N columns of A
547* (columns K and K-1 of A for 2-by-2 pivot will be
548* later overwritten). Interchange rows K and P
549* in last KKW to NB columns of W.
550*
551 IF( k.LT.n )
552 $ CALL cswap( n-k, a( k, k+1 ), lda, a( p, k+1 ),
553 $ lda )
554 CALL cswap( n-kk+1, w( k, kkw ), ldw, w( p, kkw ),
555 $ ldw )
556 END IF
557*
558* Interchange rows and columns KP and KK.
559* Updated column KP is already stored in column KKW of W.
560*
561 IF( kp.NE.kk ) THEN
562*
563* Copy non-updated column KK to column KP of submatrix A
564* at step K. No need to copy element into column K
565* (or K and K-1 for 2-by-2 pivot) of A, since these columns
566* will be later overwritten.
567*
568 a( kp, kp ) = real( a( kk, kk ) )
569 CALL ccopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),
570 $ lda )
571 CALL clacgv( kk-1-kp, a( kp, kp+1 ), lda )
572 IF( kp.GT.1 )
573 $ CALL ccopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 )
574*
575* Interchange rows KK and KP in last K+1 to N columns of A
576* (columns K (or K and K-1 for 2-by-2 pivot) of A will be
577* later overwritten). Interchange rows KK and KP
578* in last KKW to NB columns of W.
579*
580 IF( k.LT.n )
581 $ CALL cswap( n-k, a( kk, k+1 ), lda, a( kp, k+1 ),
582 $ lda )
583 CALL cswap( n-kk+1, w( kk, kkw ), ldw, w( kp, kkw ),
584 $ ldw )
585 END IF
586*
587 IF( kstep.EQ.1 ) THEN
588*
589* 1-by-1 pivot block D(k): column kw of W now holds
590*
591* W(kw) = U(k)*D(k),
592*
593* where U(k) is the k-th column of U
594*
595* (1) Store subdiag. elements of column U(k)
596* and 1-by-1 block D(k) in column k of A.
597* (NOTE: Diagonal element U(k,k) is a UNIT element
598* and not stored)
599* A(k,k) := D(k,k) = W(k,kw)
600* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k)
601*
602* (NOTE: No need to use for Hermitian matrix
603* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal
604* element D(k,k) from W (potentially saves only one load))
605 CALL ccopy( k, w( 1, kw ), 1, a( 1, k ), 1 )
606 IF( k.GT.1 ) THEN
607*
608* (NOTE: No need to check if A(k,k) is NOT ZERO,
609* since that was ensured earlier in pivot search:
610* case A(k,k) = 0 falls into 2x2 pivot case(3))
611*
612* Handle division by a small number
613*
614 t = real( a( k, k ) )
615 IF( abs( t ).GE.sfmin ) THEN
616 r1 = one / t
617 CALL csscal( k-1, r1, a( 1, k ), 1 )
618 ELSE
619 DO 14 ii = 1, k-1
620 a( ii, k ) = a( ii, k ) / t
621 14 CONTINUE
622 END IF
623*
624* (2) Conjugate column W(kw)
625*
626 CALL clacgv( k-1, w( 1, kw ), 1 )
627*
628* Store the superdiagonal element of D in array E
629*
630 e( k ) = czero
631*
632 END IF
633*
634 ELSE
635*
636* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold
637*
638* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k)
639*
640* where U(k) and U(k-1) are the k-th and (k-1)-th columns
641* of U
642*
643* (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2
644* block D(k-1:k,k-1:k) in columns k-1 and k of A.
645* (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT
646* block and not stored)
647* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw)
648* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) =
649* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) )
650*
651 IF( k.GT.2 ) THEN
652*
653* Factor out the columns of the inverse of 2-by-2 pivot
654* block D, so that each column contains 1, to reduce the
655* number of FLOPS when we multiply panel
656* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
657*
658* D**(-1) = ( d11 cj(d21) )**(-1) =
659* ( d21 d22 )
660*
661* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
662* ( (-d21) ( d11 ) )
663*
664* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
665*
666* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
667* ( ( -1 ) ( d11/conj(d21) ) )
668*
669* = 1/(|d21|**2) * 1/(D22*D11-1) *
670*
671* * ( d21*( D11 ) conj(d21)*( -1 ) ) =
672* ( ( -1 ) ( D22 ) )
673*
674* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
675* ( ( -1 ) ( D22 ) )
676*
677* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
678* ( ( -1 ) ( D22 ) )
679*
680* Handle division by a small number. (NOTE: order of
681* operations is important)
682*
683* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) )
684* ( (( -1 ) ) (( D22 ) ) ),
685*
686* where D11 = d22/d21,
687* D22 = d11/conj(d21),
688* D21 = d21,
689* T = 1/(D22*D11-1).
690*
691* (NOTE: No need to check for division by ZERO,
692* since that was ensured earlier in pivot search:
693* (a) d21 != 0 in 2x2 pivot case(4),
694* since |d21| should be larger than |d11| and |d22|;
695* (b) (D22*D11 - 1) != 0, since from (a),
696* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
697*
698 d21 = w( k-1, kw )
699 d11 = w( k, kw ) / conjg( d21 )
700 d22 = w( k-1, kw-1 ) / d21
701 t = one / ( real( d11*d22 )-one )
702*
703* Update elements in columns A(k-1) and A(k) as
704* dot products of rows of ( W(kw-1) W(kw) ) and columns
705* of D**(-1)
706*
707 DO 20 j = 1, k - 2
708 a( j, k-1 ) = t*( ( d11*w( j, kw-1 )-w( j, kw ) ) /
709 $ d21 )
710 a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /
711 $ conjg( d21 ) )
712 20 CONTINUE
713 END IF
714*
715* Copy diagonal elements of D(K) to A,
716* copy superdiagonal element of D(K) to E(K) and
717* ZERO out superdiagonal entry of A
718*
719 a( k-1, k-1 ) = w( k-1, kw-1 )
720 a( k-1, k ) = czero
721 a( k, k ) = w( k, kw )
722 e( k ) = w( k-1, kw )
723 e( k-1 ) = czero
724*
725* (2) Conjugate columns W(kw) and W(kw-1)
726*
727 CALL clacgv( k-1, w( 1, kw ), 1 )
728 CALL clacgv( k-2, w( 1, kw-1 ), 1 )
729*
730 END IF
731*
732* End column K is nonsingular
733*
734 END IF
735*
736* Store details of the interchanges in IPIV
737*
738 IF( kstep.EQ.1 ) THEN
739 ipiv( k ) = kp
740 ELSE
741 ipiv( k ) = -p
742 ipiv( k-1 ) = -kp
743 END IF
744*
745* Decrease K and return to the start of the main loop
746*
747 k = k - kstep
748 GO TO 10
749*
750 30 CONTINUE
751*
752* Update the upper triangle of A11 (= A(1:k,1:k)) as
753*
754* A11 := A11 - U12*D*U12**H = A11 - U12*W**H
755*
756* (note that conjg(W) is actually stored)
757*
758 CALL cgemmtr( 'Upper', 'No transpose', 'Transpose', k, n-k,
759 $ -cone, a( 1, k+1 ), lda, w( 1, kw+1 ), ldw,
760 $ cone, a( 1, 1 ), lda )
761*
762* Set KB to the number of columns factorized
763*
764 kb = n - k
765*
766 ELSE
767*
768* Factorize the leading columns of A using the lower triangle
769* of A and working forwards, and compute the matrix W = L21*D
770* for use in updating A22 (note that conjg(W) is actually stored)
771*
772* Initialize the unused last entry of the subdiagonal array E.
773*
774 e( n ) = czero
775*
776* K is the main loop index, increasing from 1 in steps of 1 or 2
777*
778 k = 1
779 70 CONTINUE
780*
781* Exit from loop
782*
783 IF( ( k.GE.nb .AND. nb.LT.n ) .OR. k.GT.n )
784 $ GO TO 90
785*
786 kstep = 1
787 p = k
788*
789* Copy column K of A to column K of W and update column K of W
790*
791 w( k, k ) = real( a( k, k ) )
792 IF( k.LT.n )
793 $ CALL ccopy( n-k, a( k+1, k ), 1, w( k+1, k ), 1 )
794 IF( k.GT.1 ) THEN
795 CALL cgemv( 'No transpose', n-k+1, k-1, -cone, a( k, 1 ),
796 $ lda, w( k, 1 ), ldw, cone, w( k, k ), 1 )
797 w( k, k ) = real( w( k, k ) )
798 END IF
799*
800* Determine rows and columns to be interchanged and whether
801* a 1-by-1 or 2-by-2 pivot block will be used
802*
803 absakk = abs( real( w( k, k ) ) )
804*
805* IMAX is the row-index of the largest off-diagonal element in
806* column K, and COLMAX is its absolute value.
807* Determine both COLMAX and IMAX.
808*
809 IF( k.LT.n ) THEN
810 imax = k + icamax( n-k, w( k+1, k ), 1 )
811 colmax = cabs1( w( imax, k ) )
812 ELSE
813 colmax = zero
814 END IF
815*
816 IF( max( absakk, colmax ).EQ.zero ) THEN
817*
818* Column K is zero or underflow: set INFO and continue
819*
820 IF( info.EQ.0 )
821 $ info = k
822 kp = k
823 a( k, k ) = real( w( k, k ) )
824 IF( k.LT.n )
825 $ CALL ccopy( n-k, w( k+1, k ), 1, a( k+1, k ), 1 )
826*
827* Set E( K ) to zero
828*
829 IF( k.LT.n )
830 $ e( k ) = czero
831*
832 ELSE
833*
834* ============================================================
835*
836* BEGIN pivot search
837*
838* Case(1)
839* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX
840* (used to handle NaN and Inf)
841*
842 IF( .NOT.( absakk.LT.alpha*colmax ) ) THEN
843*
844* no interchange, use 1-by-1 pivot block
845*
846 kp = k
847*
848 ELSE
849*
850 done = .false.
851*
852* Loop until pivot found
853*
854 72 CONTINUE
855*
856* BEGIN pivot search loop body
857*
858*
859* Copy column IMAX to column k+1 of W and update it
860*
861 CALL ccopy( imax-k, a( imax, k ), lda, w( k, k+1 ),
862 $ 1)
863 CALL clacgv( imax-k, w( k, k+1 ), 1 )
864 w( imax, k+1 ) = real( a( imax, imax ) )
865*
866 IF( imax.LT.n )
867 $ CALL ccopy( n-imax, a( imax+1, imax ), 1,
868 $ w( imax+1, k+1 ), 1 )
869*
870 IF( k.GT.1 ) THEN
871 CALL cgemv( 'No transpose', n-k+1, k-1, -cone,
872 $ a( k, 1 ), lda, w( imax, 1 ), ldw,
873 $ cone, w( k, k+1 ), 1 )
874 w( imax, k+1 ) = real( w( imax, k+1 ) )
875 END IF
876*
877* JMAX is the column-index of the largest off-diagonal
878* element in row IMAX, and ROWMAX is its absolute value.
879* Determine both ROWMAX and JMAX.
880*
881 IF( imax.NE.k ) THEN
882 jmax = k - 1 + icamax( imax-k, w( k, k+1 ), 1 )
883 rowmax = cabs1( w( jmax, k+1 ) )
884 ELSE
885 rowmax = zero
886 END IF
887*
888 IF( imax.LT.n ) THEN
889 itemp = imax + icamax( n-imax, w( imax+1, k+1 ),
890 $ 1)
891 stemp = cabs1( w( itemp, k+1 ) )
892 IF( stemp.GT.rowmax ) THEN
893 rowmax = stemp
894 jmax = itemp
895 END IF
896 END IF
897*
898* Case(2)
899* Equivalent to testing for
900* ABS( REAL( W( IMAX,K+1 ) ) ).GE.ALPHA*ROWMAX
901* (used to handle NaN and Inf)
902*
903 IF( .NOT.( abs( real( w( imax,k+1 ) ) )
904 $ .LT.alpha*rowmax ) ) THEN
905*
906* interchange rows and columns K and IMAX,
907* use 1-by-1 pivot block
908*
909 kp = imax
910*
911* copy column K+1 of W to column K of W
912*
913 CALL ccopy( n-k+1, w( k, k+1 ), 1, w( k, k ),
914 $ 1 )
915*
916 done = .true.
917*
918* Case(3)
919* Equivalent to testing for ROWMAX.EQ.COLMAX,
920* (used to handle NaN and Inf)
921*
922 ELSE IF( ( p.EQ.jmax ) .OR. ( rowmax.LE.colmax ) )
923 $ THEN
924*
925* interchange rows and columns K+1 and IMAX,
926* use 2-by-2 pivot block
927*
928 kp = imax
929 kstep = 2
930 done = .true.
931*
932* Case(4)
933 ELSE
934*
935* Pivot not found: set params and repeat
936*
937 p = imax
938 colmax = rowmax
939 imax = jmax
940*
941* Copy updated JMAXth (next IMAXth) column to Kth of W
942*
943 CALL ccopy( n-k+1, w( k, k+1 ), 1, w( k, k ),
944 $ 1 )
945*
946 END IF
947*
948*
949* End pivot search loop body
950*
951 IF( .NOT.done ) GOTO 72
952*
953 END IF
954*
955* END pivot search
956*
957* ============================================================
958*
959* KK is the column of A where pivoting step stopped
960*
961 kk = k + kstep - 1
962*
963* Interchange rows and columns P and K (only for 2-by-2 pivot).
964* Updated column P is already stored in column K of W.
965*
966 IF( ( kstep.EQ.2 ) .AND. ( p.NE.k ) ) THEN
967*
968* Copy non-updated column KK-1 to column P of submatrix A
969* at step K. No need to copy element into columns
970* K and K+1 of A for 2-by-2 pivot, since these columns
971* will be later overwritten.
972*
973 a( p, p ) = real( a( k, k ) )
974 CALL ccopy( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda )
975 CALL clacgv( p-k-1, a( p, k+1 ), lda )
976 IF( p.LT.n )
977 $ CALL ccopy( n-p, a( p+1, k ), 1, a( p+1, p ), 1 )
978*
979* Interchange rows K and P in first K-1 columns of A
980* (columns K and K+1 of A for 2-by-2 pivot will be
981* later overwritten). Interchange rows K and P
982* in first KK columns of W.
983*
984 IF( k.GT.1 )
985 $ CALL cswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda )
986 CALL cswap( kk, w( k, 1 ), ldw, w( p, 1 ), ldw )
987 END IF
988*
989* Interchange rows and columns KP and KK.
990* Updated column KP is already stored in column KK of W.
991*
992 IF( kp.NE.kk ) THEN
993*
994* Copy non-updated column KK to column KP of submatrix A
995* at step K. No need to copy element into column K
996* (or K and K+1 for 2-by-2 pivot) of A, since these columns
997* will be later overwritten.
998*
999 a( kp, kp ) = real( a( kk, kk ) )
1000 CALL ccopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),
1001 $ lda )
1002 CALL clacgv( kp-kk-1, a( kp, kk+1 ), lda )
1003 IF( kp.LT.n )
1004 $ CALL ccopy( n-kp, a( kp+1, kk ), 1, a( kp+1, kp ),
1005 $ 1 )
1006*
1007* Interchange rows KK and KP in first K-1 columns of A
1008* (column K (or K and K+1 for 2-by-2 pivot) of A will be
1009* later overwritten). Interchange rows KK and KP
1010* in first KK columns of W.
1011*
1012 IF( k.GT.1 )
1013 $ CALL cswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda )
1014 CALL cswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw )
1015 END IF
1016*
1017 IF( kstep.EQ.1 ) THEN
1018*
1019* 1-by-1 pivot block D(k): column k of W now holds
1020*
1021* W(k) = L(k)*D(k),
1022*
1023* where L(k) is the k-th column of L
1024*
1025* (1) Store subdiag. elements of column L(k)
1026* and 1-by-1 block D(k) in column k of A.
1027* (NOTE: Diagonal element L(k,k) is a UNIT element
1028* and not stored)
1029* A(k,k) := D(k,k) = W(k,k)
1030* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k)
1031*
1032* (NOTE: No need to use for Hermitian matrix
1033* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal
1034* element D(k,k) from W (potentially saves only one load))
1035 CALL ccopy( n-k+1, w( k, k ), 1, a( k, k ), 1 )
1036 IF( k.LT.n ) THEN
1037*
1038* (NOTE: No need to check if A(k,k) is NOT ZERO,
1039* since that was ensured earlier in pivot search:
1040* case A(k,k) = 0 falls into 2x2 pivot case(3))
1041*
1042* Handle division by a small number
1043*
1044 t = real( a( k, k ) )
1045 IF( abs( t ).GE.sfmin ) THEN
1046 r1 = one / t
1047 CALL csscal( n-k, r1, a( k+1, k ), 1 )
1048 ELSE
1049 DO 74 ii = k + 1, n
1050 a( ii, k ) = a( ii, k ) / t
1051 74 CONTINUE
1052 END IF
1053*
1054* (2) Conjugate column W(k)
1055*
1056 CALL clacgv( n-k, w( k+1, k ), 1 )
1057*
1058* Store the subdiagonal element of D in array E
1059*
1060 e( k ) = czero
1061*
1062 END IF
1063*
1064 ELSE
1065*
1066* 2-by-2 pivot block D(k): columns k and k+1 of W now hold
1067*
1068* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
1069*
1070* where L(k) and L(k+1) are the k-th and (k+1)-th columns
1071* of L
1072*
1073* (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2
1074* block D(k:k+1,k:k+1) in columns k and k+1 of A.
1075* NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT
1076* block and not stored.
1077* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1)
1078* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) =
1079* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) )
1080*
1081 IF( k.LT.n-1 ) THEN
1082*
1083* Factor out the columns of the inverse of 2-by-2 pivot
1084* block D, so that each column contains 1, to reduce the
1085* number of FLOPS when we multiply panel
1086* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
1087*
1088* D**(-1) = ( d11 cj(d21) )**(-1) =
1089* ( d21 d22 )
1090*
1091* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
1092* ( (-d21) ( d11 ) )
1093*
1094* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
1095*
1096* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
1097* ( ( -1 ) ( d11/conj(d21) ) )
1098*
1099* = 1/(|d21|**2) * 1/(D22*D11-1) *
1100*
1101* * ( d21*( D11 ) conj(d21)*( -1 ) ) =
1102* ( ( -1 ) ( D22 ) )
1103*
1104* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
1105* ( ( -1 ) ( D22 ) )
1106*
1107* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
1108* ( ( -1 ) ( D22 ) )
1109*
1110* Handle division by a small number. (NOTE: order of
1111* operations is important)
1112*
1113* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) )
1114* ( (( -1 ) ) (( D22 ) ) ),
1115*
1116* where D11 = d22/d21,
1117* D22 = d11/conj(d21),
1118* D21 = d21,
1119* T = 1/(D22*D11-1).
1120*
1121* (NOTE: No need to check for division by ZERO,
1122* since that was ensured earlier in pivot search:
1123* (a) d21 != 0 in 2x2 pivot case(4),
1124* since |d21| should be larger than |d11| and |d22|;
1125* (b) (D22*D11 - 1) != 0, since from (a),
1126* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
1127*
1128 d21 = w( k+1, k )
1129 d11 = w( k+1, k+1 ) / d21
1130 d22 = w( k, k ) / conjg( d21 )
1131 t = one / ( real( d11*d22 )-one )
1132*
1133* Update elements in columns A(k) and A(k+1) as
1134* dot products of rows of ( W(k) W(k+1) ) and columns
1135* of D**(-1)
1136*
1137 DO 80 j = k + 2, n
1138 a( j, k ) = t*( ( d11*w( j, k )-w( j, k+1 ) ) /
1139 $ conjg( d21 ) )
1140 a( j, k+1 ) = t*( ( d22*w( j, k+1 )-w( j, k ) ) /
1141 $ d21 )
1142 80 CONTINUE
1143 END IF
1144*
1145* Copy diagonal elements of D(K) to A,
1146* copy subdiagonal element of D(K) to E(K) and
1147* ZERO out subdiagonal entry of A
1148*
1149 a( k, k ) = w( k, k )
1150 a( k+1, k ) = czero
1151 a( k+1, k+1 ) = w( k+1, k+1 )
1152 e( k ) = w( k+1, k )
1153 e( k+1 ) = czero
1154*
1155* (2) Conjugate columns W(k) and W(k+1)
1156*
1157 CALL clacgv( n-k, w( k+1, k ), 1 )
1158 CALL clacgv( n-k-1, w( k+2, k+1 ), 1 )
1159*
1160 END IF
1161*
1162* End column K is nonsingular
1163*
1164 END IF
1165*
1166* Store details of the interchanges in IPIV
1167*
1168 IF( kstep.EQ.1 ) THEN
1169 ipiv( k ) = kp
1170 ELSE
1171 ipiv( k ) = -p
1172 ipiv( k+1 ) = -kp
1173 END IF
1174*
1175* Increase K and return to the start of the main loop
1176*
1177 k = k + kstep
1178 GO TO 70
1179*
1180 90 CONTINUE
1181*
1182* Update the lower triangle of A22 (= A(k:n,k:n)) as
1183*
1184* A22 := A22 - L21*D*L21**H = A22 - L21*W**H
1185*
1186* (note that conjg(W) is actually stored)
1187*
1188 CALL cgemmtr( 'Lower', 'No transpose', 'Transpose', n-k+1,
1189 $ k-1, -cone, a( k, 1 ), lda, w( k, 1 ), ldw,
1190 $ cone, a( k, k ), lda )
1191*
1192* Set KB to the number of columns factorized
1193*
1194 kb = k - 1
1195*
1196 END IF
1197 RETURN
1198*
1199* End of CLAHEF_RK
1200*
1201 END
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188
subroutine cgemmtr(uplo, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMMTR
Definition cgemmtr.f:191
subroutine cgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
CGEMV
Definition cgemv.f:160
subroutine clacgv(n, x, incx)
CLACGV conjugates a complex vector.
Definition clacgv.f:72
subroutine clahef_rk(uplo, n, nb, kb, a, lda, e, ipiv, w, ldw, info)
CLAHEF_RK computes a partial factorization of a complex Hermitian indefinite matrix using bounded Bun...
Definition clahef_rk.f:260
subroutine csscal(n, sa, cx, incx)
CSSCAL
Definition csscal.f:78
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
Definition cswap.f:81