LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ csytrf_aa()

subroutine csytrf_aa ( character uplo,
integer n,
complex, dimension( lda, * ) a,
integer lda,
integer, dimension( * ) ipiv,
complex, dimension( * ) work,
integer lwork,
integer info )

CSYTRF_AA

Download CSYTRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> CSYTRF_AA computes the factorization of a complex symmetric matrix A
!> using the Aasen's algorithm.  The form of the factorization is
!>
!>    A = U**T*T*U  or  A = L*T*L**T
!>
!> where U (or L) is a product of permutation and unit upper (lower)
!> triangular matrices, and T is a complex symmetric tridiagonal matrix.
!>
!> This is the blocked version of the algorithm, calling Level 3 BLAS.
!> 
Parameters
[in]UPLO
!>          UPLO is CHARACTER*1
!>          = 'U':  Upper triangle of A is stored;
!>          = 'L':  Lower triangle of A is stored.
!> 
[in]N
!>          N is INTEGER
!>          The order of the matrix A.  N >= 0.
!> 
[in,out]A
!>          A is COMPLEX array, dimension (LDA,N)
!>          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
!>          N-by-N upper triangular part of A contains the upper
!>          triangular part of the matrix A, and the strictly lower
!>          triangular part of A is not referenced.  If UPLO = 'L', the
!>          leading N-by-N lower triangular part of A contains the lower
!>          triangular part of the matrix A, and the strictly upper
!>          triangular part of A is not referenced.
!>
!>          On exit, the tridiagonal matrix is stored in the diagonals
!>          and the subdiagonals of A just below (or above) the diagonals,
!>          and L is stored below (or above) the subdiagonals, when UPLO
!>          is 'L' (or 'U').
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,N).
!> 
[out]IPIV
!>          IPIV is INTEGER array, dimension (N)
!>          On exit, it contains the details of the interchanges, i.e.,
!>          the row and column k of A were interchanged with the
!>          row and column IPIV(k).
!> 
[out]WORK
!>          WORK is COMPLEX array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The length of WORK. LWORK >= MAX(1,2*N). For optimum performance
!>          LWORK >= N*(1+NB), where NB is the optimal blocksize.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 129 of file csytrf_aa.f.

130*
131* -- LAPACK computational routine --
132* -- LAPACK is a software package provided by Univ. of Tennessee, --
133* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
134*
135 IMPLICIT NONE
136*
137* .. Scalar Arguments ..
138 CHARACTER UPLO
139 INTEGER N, LDA, LWORK, INFO
140* ..
141* .. Array Arguments ..
142 INTEGER IPIV( * )
143 COMPLEX A( LDA, * ), WORK( * )
144* ..
145*
146* =====================================================================
147* .. Parameters ..
148 COMPLEX ZERO, ONE
149 parameter( zero = 0.0e+0, one = 1.0e+0 )
150*
151* .. Local Scalars ..
152 LOGICAL LQUERY, UPPER
153 INTEGER J, LWKOPT
154 INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
155 COMPLEX ALPHA
156* ..
157* .. External Functions ..
158 LOGICAL LSAME
159 INTEGER ILAENV
160 REAL SROUNDUP_LWORK
161 EXTERNAL lsame, ilaenv, sroundup_lwork
162* ..
163* .. External Subroutines ..
164 EXTERNAL clasyf_aa, cgemm, cgemv, cscal, cswap,
165 $ ccopy,
166 $ xerbla
167* ..
168* .. Intrinsic Functions ..
169 INTRINSIC max
170* ..
171* .. Executable Statements ..
172*
173* Determine the block size
174*
175 nb = ilaenv( 1, 'CSYTRF_AA', uplo, n, -1, -1, -1 )
176*
177* Test the input parameters.
178*
179 info = 0
180 upper = lsame( uplo, 'U' )
181 lquery = ( lwork.EQ.-1 )
182 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
183 info = -1
184 ELSE IF( n.LT.0 ) THEN
185 info = -2
186 ELSE IF( lda.LT.max( 1, n ) ) THEN
187 info = -4
188 ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
189 info = -7
190 END IF
191*
192 IF( info.EQ.0 ) THEN
193 lwkopt = (nb+1)*n
194 work( 1 ) = sroundup_lwork(lwkopt)
195 END IF
196*
197 IF( info.NE.0 ) THEN
198 CALL xerbla( 'CSYTRF_AA', -info )
199 RETURN
200 ELSE IF( lquery ) THEN
201 RETURN
202 END IF
203*
204* Quick return
205*
206 IF ( n.EQ.0 ) THEN
207 RETURN
208 ENDIF
209 ipiv( 1 ) = 1
210 IF ( n.EQ.1 ) THEN
211 RETURN
212 END IF
213*
214* Adjust block size based on the workspace size
215*
216 IF( lwork.LT.((1+nb)*n) ) THEN
217 nb = ( lwork-n ) / n
218 END IF
219*
220 IF( upper ) THEN
221*
222* .....................................................
223* Factorize A as U**T*D*U using the upper triangle of A
224* .....................................................
225*
226* Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
227*
228 CALL ccopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
229*
230* J is the main loop index, increasing from 1 to N in steps of
231* JB, where JB is the number of columns factorized by CLASYF;
232* JB is either NB, or N-J+1 for the last block
233*
234 j = 0
235 10 CONTINUE
236 IF( j.GE.n )
237 $ GO TO 20
238*
239* each step of the main loop
240* J is the last column of the previous panel
241* J1 is the first column of the current panel
242* K1 identifies if the previous column of the panel has been
243* explicitly stored, e.g., K1=1 for the first panel, and
244* K1=0 for the rest
245*
246 j1 = j + 1
247 jb = min( n-j1+1, nb )
248 k1 = max(1, j)-j
249*
250* Panel factorization
251*
252 CALL clasyf_aa( uplo, 2-k1, n-j, jb,
253 $ a( max(1, j), j+1 ), lda,
254 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
255*
256* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
257*
258 DO j2 = j+2, min(n, j+jb+1)
259 ipiv( j2 ) = ipiv( j2 ) + j
260 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
261 CALL cswap( j1-k1-2, a( 1, j2 ), 1,
262 $ a( 1, ipiv(j2) ), 1 )
263 END IF
264 END DO
265 j = j + jb
266*
267* Trailing submatrix update, where
268* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
269* WORK stores the current block of the auxiriarly matrix H
270*
271 IF( j.LT.n ) THEN
272*
273* If first panel and JB=1 (NB=1), then nothing to do
274*
275 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
276*
277* Merge rank-1 update with BLAS-3 update
278*
279 alpha = a( j, j+1 )
280 a( j, j+1 ) = one
281 CALL ccopy( n-j, a( j-1, j+1 ), lda,
282 $ work( (j+1-j1+1)+jb*n ), 1 )
283 CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
284*
285* K1 identifies if the previous column of the panel has been
286* explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
287* while K1=0 and K2=1 for the rest
288*
289 IF( j1.GT.1 ) THEN
290*
291* Not first panel
292*
293 k2 = 1
294 ELSE
295*
296* First panel
297*
298 k2 = 0
299*
300* First update skips the first column
301*
302 jb = jb - 1
303 END IF
304*
305 DO j2 = j+1, n, nb
306 nj = min( nb, n-j2+1 )
307*
308* Update (J2, J2) diagonal block with CGEMV
309*
310 j3 = j2
311 DO mj = nj-1, 1, -1
312 CALL cgemv( 'No transpose', mj, jb+1,
313 $ -one, work( j3-j1+1+k1*n ), n,
314 $ a( j1-k2, j3 ), 1,
315 $ one, a( j3, j3 ), lda )
316 j3 = j3 + 1
317 END DO
318*
319* Update off-diagonal block of J2-th block row with CGEMM
320*
321 CALL cgemm( 'Transpose', 'Transpose',
322 $ nj, n-j3+1, jb+1,
323 $ -one, a( j1-k2, j2 ), lda,
324 $ work( j3-j1+1+k1*n ), n,
325 $ one, a( j2, j3 ), lda )
326 END DO
327*
328* Recover T( J, J+1 )
329*
330 a( j, j+1 ) = alpha
331 END IF
332*
333* WORK(J+1, 1) stores H(J+1, 1)
334*
335 CALL ccopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
336 END IF
337 GO TO 10
338 ELSE
339*
340* .....................................................
341* Factorize A as L*D*L**T using the lower triangle of A
342* .....................................................
343*
344* copy first column A(1:N, 1) into H(1:N, 1)
345* (stored in WORK(1:N))
346*
347 CALL ccopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
348*
349* J is the main loop index, increasing from 1 to N in steps of
350* JB, where JB is the number of columns factorized by CLASYF;
351* JB is either NB, or N-J+1 for the last block
352*
353 j = 0
354 11 CONTINUE
355 IF( j.GE.n )
356 $ GO TO 20
357*
358* each step of the main loop
359* J is the last column of the previous panel
360* J1 is the first column of the current panel
361* K1 identifies if the previous column of the panel has been
362* explicitly stored, e.g., K1=1 for the first panel, and
363* K1=0 for the rest
364*
365 j1 = j+1
366 jb = min( n-j1+1, nb )
367 k1 = max(1, j)-j
368*
369* Panel factorization
370*
371 CALL clasyf_aa( uplo, 2-k1, n-j, jb,
372 $ a( j+1, max(1, j) ), lda,
373 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
374*
375* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
376*
377 DO j2 = j+2, min(n, j+jb+1)
378 ipiv( j2 ) = ipiv( j2 ) + j
379 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
380 CALL cswap( j1-k1-2, a( j2, 1 ), lda,
381 $ a( ipiv(j2), 1 ), lda )
382 END IF
383 END DO
384 j = j + jb
385*
386* Trailing submatrix update, where
387* A(J2+1, J1-1) stores L(J2+1, J1) and
388* WORK(J2+1, 1) stores H(J2+1, 1)
389*
390 IF( j.LT.n ) THEN
391*
392* if first panel and JB=1 (NB=1), then nothing to do
393*
394 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
395*
396* Merge rank-1 update with BLAS-3 update
397*
398 alpha = a( j+1, j )
399 a( j+1, j ) = one
400 CALL ccopy( n-j, a( j+1, j-1 ), 1,
401 $ work( (j+1-j1+1)+jb*n ), 1 )
402 CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
403*
404* K1 identifies if the previous column of the panel has been
405* explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
406* while K1=0 and K2=1 for the rest
407*
408 IF( j1.GT.1 ) THEN
409*
410* Not first panel
411*
412 k2 = 1
413 ELSE
414*
415* First panel
416*
417 k2 = 0
418*
419* First update skips the first column
420*
421 jb = jb - 1
422 END IF
423*
424 DO j2 = j+1, n, nb
425 nj = min( nb, n-j2+1 )
426*
427* Update (J2, J2) diagonal block with CGEMV
428*
429 j3 = j2
430 DO mj = nj-1, 1, -1
431 CALL cgemv( 'No transpose', mj, jb+1,
432 $ -one, work( j3-j1+1+k1*n ), n,
433 $ a( j3, j1-k2 ), lda,
434 $ one, a( j3, j3 ), 1 )
435 j3 = j3 + 1
436 END DO
437*
438* Update off-diagonal block in J2-th block column with CGEMM
439*
440 CALL cgemm( 'No transpose', 'Transpose',
441 $ n-j3+1, nj, jb+1,
442 $ -one, work( j3-j1+1+k1*n ), n,
443 $ a( j2, j1-k2 ), lda,
444 $ one, a( j3, j2 ), lda )
445 END DO
446*
447* Recover T( J+1, J )
448*
449 a( j+1, j ) = alpha
450 END IF
451*
452* WORK(J+1, 1) stores H(J+1, 1)
453*
454 CALL ccopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
455 END IF
456 GO TO 11
457 END IF
458*
459 20 CONTINUE
460 work( 1 ) = sroundup_lwork(lwkopt)
461 RETURN
462*
463* End of CSYTRF_AA
464*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188
subroutine cgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
CGEMV
Definition cgemv.f:160
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:160
subroutine clasyf_aa(uplo, j1, m, nb, a, lda, ipiv, h, ldh, work)
CLASYF_AA
Definition clasyf_aa.f:142
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine cscal(n, ca, cx, incx)
CSCAL
Definition cscal.f:78
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
Definition cswap.f:81
Here is the call graph for this function:
Here is the caller graph for this function: