LAPACK 3.11.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ csytrf_aa()

subroutine csytrf_aa ( character  UPLO,
integer  N,
complex, dimension( lda, * )  A,
integer  LDA,
integer, dimension( * )  IPIV,
complex, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

CSYTRF_AA

Download CSYTRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CSYTRF_AA computes the factorization of a complex symmetric matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U**T*T*U  or  A = L*T*L**T

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a complex symmetric tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is COMPLEX array, dimension (LDA,N)
          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
          N-by-N upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading N-by-N lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.

          On exit, the tridiagonal matrix is stored in the diagonals
          and the subdiagonals of A just below (or above) the diagonals,
          and L is stored below (or above) the subdiaonals, when UPLO
          is 'L' (or 'U').
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          On exit, it contains the details of the interchanges, i.e.,
          the row and column k of A were interchanged with the
          row and column IPIV(k).
[out]WORK
          WORK is COMPLEX array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The length of WORK. LWORK >= MAX(1,2*N). For optimum performance
          LWORK >= N*(1+NB), where NB is the optimal blocksize.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 131 of file csytrf_aa.f.

132*
133* -- LAPACK computational routine --
134* -- LAPACK is a software package provided by Univ. of Tennessee, --
135* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
136*
137 IMPLICIT NONE
138*
139* .. Scalar Arguments ..
140 CHARACTER UPLO
141 INTEGER N, LDA, LWORK, INFO
142* ..
143* .. Array Arguments ..
144 INTEGER IPIV( * )
145 COMPLEX A( LDA, * ), WORK( * )
146* ..
147*
148* =====================================================================
149* .. Parameters ..
150 COMPLEX ZERO, ONE
151 parameter( zero = 0.0e+0, one = 1.0e+0 )
152*
153* .. Local Scalars ..
154 LOGICAL LQUERY, UPPER
155 INTEGER J, LWKOPT
156 INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
157 COMPLEX ALPHA
158* ..
159* .. External Functions ..
160 LOGICAL LSAME
161 INTEGER ILAENV
162 EXTERNAL lsame, ilaenv
163* ..
164* .. External Subroutines ..
165 EXTERNAL clasyf_aa, cgemm, cgemv, cscal, cswap, ccopy,
166 $ xerbla
167* ..
168* .. Intrinsic Functions ..
169 INTRINSIC max
170* ..
171* .. Executable Statements ..
172*
173* Determine the block size
174*
175 nb = ilaenv( 1, 'CSYTRF_AA', uplo, n, -1, -1, -1 )
176*
177* Test the input parameters.
178*
179 info = 0
180 upper = lsame( uplo, 'U' )
181 lquery = ( lwork.EQ.-1 )
182 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
183 info = -1
184 ELSE IF( n.LT.0 ) THEN
185 info = -2
186 ELSE IF( lda.LT.max( 1, n ) ) THEN
187 info = -4
188 ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
189 info = -7
190 END IF
191*
192 IF( info.EQ.0 ) THEN
193 lwkopt = (nb+1)*n
194 work( 1 ) = lwkopt
195 END IF
196*
197 IF( info.NE.0 ) THEN
198 CALL xerbla( 'CSYTRF_AA', -info )
199 RETURN
200 ELSE IF( lquery ) THEN
201 RETURN
202 END IF
203*
204* Quick return
205*
206 IF ( n.EQ.0 ) THEN
207 RETURN
208 ENDIF
209 ipiv( 1 ) = 1
210 IF ( n.EQ.1 ) THEN
211 RETURN
212 END IF
213*
214* Adjust block size based on the workspace size
215*
216 IF( lwork.LT.((1+nb)*n) ) THEN
217 nb = ( lwork-n ) / n
218 END IF
219*
220 IF( upper ) THEN
221*
222* .....................................................
223* Factorize A as U**T*D*U using the upper triangle of A
224* .....................................................
225*
226* Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
227*
228 CALL ccopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
229*
230* J is the main loop index, increasing from 1 to N in steps of
231* JB, where JB is the number of columns factorized by CLASYF;
232* JB is either NB, or N-J+1 for the last block
233*
234 j = 0
235 10 CONTINUE
236 IF( j.GE.n )
237 $ GO TO 20
238*
239* each step of the main loop
240* J is the last column of the previous panel
241* J1 is the first column of the current panel
242* K1 identifies if the previous column of the panel has been
243* explicitly stored, e.g., K1=1 for the first panel, and
244* K1=0 for the rest
245*
246 j1 = j + 1
247 jb = min( n-j1+1, nb )
248 k1 = max(1, j)-j
249*
250* Panel factorization
251*
252 CALL clasyf_aa( uplo, 2-k1, n-j, jb,
253 $ a( max(1, j), j+1 ), lda,
254 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
255*
256* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
257*
258 DO j2 = j+2, min(n, j+jb+1)
259 ipiv( j2 ) = ipiv( j2 ) + j
260 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
261 CALL cswap( j1-k1-2, a( 1, j2 ), 1,
262 $ a( 1, ipiv(j2) ), 1 )
263 END IF
264 END DO
265 j = j + jb
266*
267* Trailing submatrix update, where
268* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
269* WORK stores the current block of the auxiriarly matrix H
270*
271 IF( j.LT.n ) THEN
272*
273* If first panel and JB=1 (NB=1), then nothing to do
274*
275 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
276*
277* Merge rank-1 update with BLAS-3 update
278*
279 alpha = a( j, j+1 )
280 a( j, j+1 ) = one
281 CALL ccopy( n-j, a( j-1, j+1 ), lda,
282 $ work( (j+1-j1+1)+jb*n ), 1 )
283 CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
284*
285* K1 identifies if the previous column of the panel has been
286* explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
287* while K1=0 and K2=1 for the rest
288*
289 IF( j1.GT.1 ) THEN
290*
291* Not first panel
292*
293 k2 = 1
294 ELSE
295*
296* First panel
297*
298 k2 = 0
299*
300* First update skips the first column
301*
302 jb = jb - 1
303 END IF
304*
305 DO j2 = j+1, n, nb
306 nj = min( nb, n-j2+1 )
307*
308* Update (J2, J2) diagonal block with CGEMV
309*
310 j3 = j2
311 DO mj = nj-1, 1, -1
312 CALL cgemv( 'No transpose', mj, jb+1,
313 $ -one, work( j3-j1+1+k1*n ), n,
314 $ a( j1-k2, j3 ), 1,
315 $ one, a( j3, j3 ), lda )
316 j3 = j3 + 1
317 END DO
318*
319* Update off-diagonal block of J2-th block row with CGEMM
320*
321 CALL cgemm( 'Transpose', 'Transpose',
322 $ nj, n-j3+1, jb+1,
323 $ -one, a( j1-k2, j2 ), lda,
324 $ work( j3-j1+1+k1*n ), n,
325 $ one, a( j2, j3 ), lda )
326 END DO
327*
328* Recover T( J, J+1 )
329*
330 a( j, j+1 ) = alpha
331 END IF
332*
333* WORK(J+1, 1) stores H(J+1, 1)
334*
335 CALL ccopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
336 END IF
337 GO TO 10
338 ELSE
339*
340* .....................................................
341* Factorize A as L*D*L**T using the lower triangle of A
342* .....................................................
343*
344* copy first column A(1:N, 1) into H(1:N, 1)
345* (stored in WORK(1:N))
346*
347 CALL ccopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
348*
349* J is the main loop index, increasing from 1 to N in steps of
350* JB, where JB is the number of columns factorized by CLASYF;
351* JB is either NB, or N-J+1 for the last block
352*
353 j = 0
354 11 CONTINUE
355 IF( j.GE.n )
356 $ GO TO 20
357*
358* each step of the main loop
359* J is the last column of the previous panel
360* J1 is the first column of the current panel
361* K1 identifies if the previous column of the panel has been
362* explicitly stored, e.g., K1=1 for the first panel, and
363* K1=0 for the rest
364*
365 j1 = j+1
366 jb = min( n-j1+1, nb )
367 k1 = max(1, j)-j
368*
369* Panel factorization
370*
371 CALL clasyf_aa( uplo, 2-k1, n-j, jb,
372 $ a( j+1, max(1, j) ), lda,
373 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
374*
375* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
376*
377 DO j2 = j+2, min(n, j+jb+1)
378 ipiv( j2 ) = ipiv( j2 ) + j
379 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
380 CALL cswap( j1-k1-2, a( j2, 1 ), lda,
381 $ a( ipiv(j2), 1 ), lda )
382 END IF
383 END DO
384 j = j + jb
385*
386* Trailing submatrix update, where
387* A(J2+1, J1-1) stores L(J2+1, J1) and
388* WORK(J2+1, 1) stores H(J2+1, 1)
389*
390 IF( j.LT.n ) THEN
391*
392* if first panel and JB=1 (NB=1), then nothing to do
393*
394 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
395*
396* Merge rank-1 update with BLAS-3 update
397*
398 alpha = a( j+1, j )
399 a( j+1, j ) = one
400 CALL ccopy( n-j, a( j+1, j-1 ), 1,
401 $ work( (j+1-j1+1)+jb*n ), 1 )
402 CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
403*
404* K1 identifies if the previous column of the panel has been
405* explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
406* while K1=0 and K2=1 for the rest
407*
408 IF( j1.GT.1 ) THEN
409*
410* Not first panel
411*
412 k2 = 1
413 ELSE
414*
415* First panel
416*
417 k2 = 0
418*
419* First update skips the first column
420*
421 jb = jb - 1
422 END IF
423*
424 DO j2 = j+1, n, nb
425 nj = min( nb, n-j2+1 )
426*
427* Update (J2, J2) diagonal block with CGEMV
428*
429 j3 = j2
430 DO mj = nj-1, 1, -1
431 CALL cgemv( 'No transpose', mj, jb+1,
432 $ -one, work( j3-j1+1+k1*n ), n,
433 $ a( j3, j1-k2 ), lda,
434 $ one, a( j3, j3 ), 1 )
435 j3 = j3 + 1
436 END DO
437*
438* Update off-diagonal block in J2-th block column with CGEMM
439*
440 CALL cgemm( 'No transpose', 'Transpose',
441 $ n-j3+1, nj, jb+1,
442 $ -one, work( j3-j1+1+k1*n ), n,
443 $ a( j2, j1-k2 ), lda,
444 $ one, a( j3, j2 ), lda )
445 END DO
446*
447* Recover T( J+1, J )
448*
449 a( j+1, j ) = alpha
450 END IF
451*
452* WORK(J+1, 1) stores H(J+1, 1)
453*
454 CALL ccopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
455 END IF
456 GO TO 11
457 END IF
458*
459 20 CONTINUE
460 work( 1 ) = lwkopt
461 RETURN
462*
463* End of CSYTRF_AA
464*
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: ilaenv.f:162
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:53
subroutine ccopy(N, CX, INCX, CY, INCY)
CCOPY
Definition: ccopy.f:81
subroutine cswap(N, CX, INCX, CY, INCY)
CSWAP
Definition: cswap.f:81
subroutine cscal(N, CA, CX, INCX)
CSCAL
Definition: cscal.f:78
subroutine cgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
CGEMV
Definition: cgemv.f:158
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:187
subroutine clasyf_aa(UPLO, J1, M, NB, A, LDA, IPIV, H, LDH, WORK)
CLASYF_AA
Definition: clasyf_aa.f:144
Here is the call graph for this function:
Here is the caller graph for this function: