LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ dsytrf_aa()

subroutine dsytrf_aa ( character  uplo,
integer  n,
double precision, dimension( lda, * )  a,
integer  lda,
integer, dimension( * )  ipiv,
double precision, dimension( * )  work,
integer  lwork,
integer  info 
)

DSYTRF_AA

Download DSYTRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 DSYTRF_AA computes the factorization of a real symmetric matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U**T*T*U  or  A = L*T*L**T

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a symmetric tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is DOUBLE PRECISION array, dimension (LDA,N)
          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
          N-by-N upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading N-by-N lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.

          On exit, the tridiagonal matrix is stored in the diagonals
          and the subdiagonals of A just below (or above) the diagonals,
          and L is stored below (or above) the subdiagonals, when UPLO
          is 'L' (or 'U').
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          On exit, it contains the details of the interchanges, i.e.,
          the row and column k of A were interchanged with the
          row and column IPIV(k).
[out]WORK
          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The length of WORK.  LWORK >= MAX(1,2*N). For optimum performance
          LWORK >= N*(1+NB), where NB is the optimal blocksize.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 131 of file dsytrf_aa.f.

132*
133* -- LAPACK computational routine --
134* -- LAPACK is a software package provided by Univ. of Tennessee, --
135* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
136*
137 IMPLICIT NONE
138*
139* .. Scalar Arguments ..
140 CHARACTER UPLO
141 INTEGER N, LDA, LWORK, INFO
142* ..
143* .. Array Arguments ..
144 INTEGER IPIV( * )
145 DOUBLE PRECISION A( LDA, * ), WORK( * )
146* ..
147*
148* =====================================================================
149* .. Parameters ..
150 DOUBLE PRECISION ZERO, ONE
151 parameter( zero = 0.0d+0, one = 1.0d+0 )
152*
153* .. Local Scalars ..
154 LOGICAL LQUERY, UPPER
155 INTEGER J, LWKOPT
156 INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
157 DOUBLE PRECISION ALPHA
158* ..
159* .. External Functions ..
160 LOGICAL LSAME
161 INTEGER ILAENV
162 EXTERNAL lsame, ilaenv
163* ..
164* .. External Subroutines ..
165 EXTERNAL dlasyf_aa, dgemm, dgemv, dscal, dcopy, dswap,
166 $ xerbla
167* ..
168* .. Intrinsic Functions ..
169 INTRINSIC max
170* ..
171* .. Executable Statements ..
172*
173* Determine the block size
174*
175 nb = ilaenv( 1, 'DSYTRF_AA', uplo, n, -1, -1, -1 )
176*
177* Test the input parameters.
178*
179 info = 0
180 upper = lsame( uplo, 'U' )
181 lquery = ( lwork.EQ.-1 )
182 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
183 info = -1
184 ELSE IF( n.LT.0 ) THEN
185 info = -2
186 ELSE IF( lda.LT.max( 1, n ) ) THEN
187 info = -4
188 ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
189 info = -7
190 END IF
191*
192 IF( info.EQ.0 ) THEN
193 lwkopt = (nb+1)*n
194 work( 1 ) = lwkopt
195 END IF
196*
197 IF( info.NE.0 ) THEN
198 CALL xerbla( 'DSYTRF_AA', -info )
199 RETURN
200 ELSE IF( lquery ) THEN
201 RETURN
202 END IF
203*
204* Quick return
205*
206 IF ( n.EQ.0 ) THEN
207 RETURN
208 ENDIF
209 ipiv( 1 ) = 1
210 IF ( n.EQ.1 ) THEN
211 RETURN
212 END IF
213*
214* Adjust block size based on the workspace size
215*
216 IF( lwork.LT.((1+nb)*n) ) THEN
217 nb = ( lwork-n ) / n
218 END IF
219*
220 IF( upper ) THEN
221*
222* .....................................................
223* Factorize A as U**T*D*U using the upper triangle of A
224* .....................................................
225*
226* Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
227*
228 CALL dcopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
229*
230* J is the main loop index, increasing from 1 to N in steps of
231* JB, where JB is the number of columns factorized by DLASYF;
232* JB is either NB, or N-J+1 for the last block
233*
234 j = 0
235 10 CONTINUE
236 IF( j.GE.n )
237 $ GO TO 20
238*
239* each step of the main loop
240* J is the last column of the previous panel
241* J1 is the first column of the current panel
242* K1 identifies if the previous column of the panel has been
243* explicitly stored, e.g., K1=1 for the first panel, and
244* K1=0 for the rest
245*
246 j1 = j + 1
247 jb = min( n-j1+1, nb )
248 k1 = max(1, j)-j
249*
250* Panel factorization
251*
252 CALL dlasyf_aa( uplo, 2-k1, n-j, jb,
253 $ a( max(1, j), j+1 ), lda,
254 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
255*
256* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
257*
258 DO j2 = j+2, min(n, j+jb+1)
259 ipiv( j2 ) = ipiv( j2 ) + j
260 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
261 CALL dswap( j1-k1-2, a( 1, j2 ), 1,
262 $ a( 1, ipiv(j2) ), 1 )
263 END IF
264 END DO
265 j = j + jb
266*
267* Trailing submatrix update, where
268* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
269* WORK stores the current block of the auxiriarly matrix H
270*
271 IF( j.LT.n ) THEN
272*
273* If first panel and JB=1 (NB=1), then nothing to do
274*
275 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
276*
277* Merge rank-1 update with BLAS-3 update
278*
279 alpha = a( j, j+1 )
280 a( j, j+1 ) = one
281 CALL dcopy( n-j, a( j-1, j+1 ), lda,
282 $ work( (j+1-j1+1)+jb*n ), 1 )
283 CALL dscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
284*
285* K1 identifies if the previous column of the panel has been
286* explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
287* while K1=0 and K2=1 for the rest
288*
289 IF( j1.GT.1 ) THEN
290*
291* Not first panel
292*
293 k2 = 1
294 ELSE
295*
296* First panel
297*
298 k2 = 0
299*
300* First update skips the first column
301*
302 jb = jb - 1
303 END IF
304*
305 DO j2 = j+1, n, nb
306 nj = min( nb, n-j2+1 )
307*
308* Update (J2, J2) diagonal block with DGEMV
309*
310 j3 = j2
311 DO mj = nj-1, 1, -1
312 CALL dgemv( 'No transpose', mj, jb+1,
313 $ -one, work( j3-j1+1+k1*n ), n,
314 $ a( j1-k2, j3 ), 1,
315 $ one, a( j3, j3 ), lda )
316 j3 = j3 + 1
317 END DO
318*
319* Update off-diagonal block of J2-th block row with DGEMM
320*
321 CALL dgemm( 'Transpose', 'Transpose',
322 $ nj, n-j3+1, jb+1,
323 $ -one, a( j1-k2, j2 ), lda,
324 $ work( j3-j1+1+k1*n ), n,
325 $ one, a( j2, j3 ), lda )
326 END DO
327*
328* Recover T( J, J+1 )
329*
330 a( j, j+1 ) = alpha
331 END IF
332*
333* WORK(J+1, 1) stores H(J+1, 1)
334*
335 CALL dcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
336 END IF
337 GO TO 10
338 ELSE
339*
340* .....................................................
341* Factorize A as L*D*L**T using the lower triangle of A
342* .....................................................
343*
344* copy first column A(1:N, 1) into H(1:N, 1)
345* (stored in WORK(1:N))
346*
347 CALL dcopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
348*
349* J is the main loop index, increasing from 1 to N in steps of
350* JB, where JB is the number of columns factorized by DLASYF;
351* JB is either NB, or N-J+1 for the last block
352*
353 j = 0
354 11 CONTINUE
355 IF( j.GE.n )
356 $ GO TO 20
357*
358* each step of the main loop
359* J is the last column of the previous panel
360* J1 is the first column of the current panel
361* K1 identifies if the previous column of the panel has been
362* explicitly stored, e.g., K1=1 for the first panel, and
363* K1=0 for the rest
364*
365 j1 = j+1
366 jb = min( n-j1+1, nb )
367 k1 = max(1, j)-j
368*
369* Panel factorization
370*
371 CALL dlasyf_aa( uplo, 2-k1, n-j, jb,
372 $ a( j+1, max(1, j) ), lda,
373 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
374*
375* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
376*
377 DO j2 = j+2, min(n, j+jb+1)
378 ipiv( j2 ) = ipiv( j2 ) + j
379 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
380 CALL dswap( j1-k1-2, a( j2, 1 ), lda,
381 $ a( ipiv(j2), 1 ), lda )
382 END IF
383 END DO
384 j = j + jb
385*
386* Trailing submatrix update, where
387* A(J2+1, J1-1) stores L(J2+1, J1) and
388* WORK(J2+1, 1) stores H(J2+1, 1)
389*
390 IF( j.LT.n ) THEN
391*
392* if first panel and JB=1 (NB=1), then nothing to do
393*
394 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
395*
396* Merge rank-1 update with BLAS-3 update
397*
398 alpha = a( j+1, j )
399 a( j+1, j ) = one
400 CALL dcopy( n-j, a( j+1, j-1 ), 1,
401 $ work( (j+1-j1+1)+jb*n ), 1 )
402 CALL dscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
403*
404* K1 identifies if the previous column of the panel has been
405* explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
406* while K1=0 and K2=1 for the rest
407*
408 IF( j1.GT.1 ) THEN
409*
410* Not first panel
411*
412 k2 = 1
413 ELSE
414*
415* First panel
416*
417 k2 = 0
418*
419* First update skips the first column
420*
421 jb = jb - 1
422 END IF
423*
424 DO j2 = j+1, n, nb
425 nj = min( nb, n-j2+1 )
426*
427* Update (J2, J2) diagonal block with DGEMV
428*
429 j3 = j2
430 DO mj = nj-1, 1, -1
431 CALL dgemv( 'No transpose', mj, jb+1,
432 $ -one, work( j3-j1+1+k1*n ), n,
433 $ a( j3, j1-k2 ), lda,
434 $ one, a( j3, j3 ), 1 )
435 j3 = j3 + 1
436 END DO
437*
438* Update off-diagonal block in J2-th block column with DGEMM
439*
440 CALL dgemm( 'No transpose', 'Transpose',
441 $ n-j3+1, nj, jb+1,
442 $ -one, work( j3-j1+1+k1*n ), n,
443 $ a( j2, j1-k2 ), lda,
444 $ one, a( j3, j2 ), lda )
445 END DO
446*
447* Recover T( J+1, J )
448*
449 a( j+1, j ) = alpha
450 END IF
451*
452* WORK(J+1, 1) stores H(J+1, 1)
453*
454 CALL dcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
455 END IF
456 GO TO 11
457 END IF
458*
459 20 CONTINUE
460 work( 1 ) = lwkopt
461 RETURN
462*
463* End of DSYTRF_AA
464*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine dcopy(n, dx, incx, dy, incy)
DCOPY
Definition dcopy.f:82
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:188
subroutine dgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
DGEMV
Definition dgemv.f:158
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine dlasyf_aa(uplo, j1, m, nb, a, lda, ipiv, h, ldh, work)
DLASYF_AA
Definition dlasyf_aa.f:144
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
subroutine dscal(n, da, dx, incx)
DSCAL
Definition dscal.f:79
subroutine dswap(n, dx, incx, dy, incy)
DSWAP
Definition dswap.f:82
Here is the call graph for this function:
Here is the caller graph for this function: