159 $ IPIV2, WORK, LWORK, INFO )
169 INTEGER N, LDA, LTB, LWORK, INFO
172 INTEGER IPIV( * ), IPIV2( * )
173 REAL A( LDA, * ), TB( * ), WORK( * )
179 parameter( zero = 0.0e+0, one = 1.0e+0 )
182 LOGICAL UPPER, TQUERY, WQUERY
183 INTEGER I, J, K, I1, I2, TD
184 INTEGER LDTB, NB, KB, JB, NT, IINFO
191 EXTERNAL lsame, ilaenv, sroundup_lwork
206 upper = lsame( uplo,
'U' )
207 wquery = ( lwork.EQ.-1 )
208 tquery = ( ltb.EQ.-1 )
209 IF( .NOT.upper .AND. .NOT.lsame( uplo,
'L' ) )
THEN
211 ELSE IF( n.LT.0 )
THEN
213 ELSE IF( lda.LT.max( 1, n ) )
THEN
215 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery )
THEN
217 ELSE IF ( lwork .LT. n .AND. .NOT.wquery )
THEN
222 CALL xerbla(
'SSYTRF_AA_2STAGE', -info )
228 nb = ilaenv( 1,
'SSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
234 work( 1 ) = sroundup_lwork(n*nb)
237 IF( tquery .OR. wquery )
THEN
250 IF( ldtb .LT. 3*nb+1 )
THEN
253 IF( lwork .LT. nb*n )
THEN
287 IF( i .EQ. (j-1) )
THEN
292 CALL sgemm(
'NoTranspose',
'NoTranspose',
294 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
295 $ a( (i-1)*nb+1, j*nb+1 ), lda,
296 $ zero, work( i*nb+1 ), n )
304 CALL sgemm(
'NoTranspose',
'NoTranspose',
306 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
308 $ a( (i-2)*nb+1, j*nb+1 ), lda,
309 $ zero, work( i*nb+1 ), n )
315 CALL slacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
316 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
319 CALL sgemm(
'Transpose',
'NoTranspose',
321 $ -one, a( 1, j*nb+1 ), lda,
323 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
325 CALL sgemm(
'Transpose',
'NoTranspose',
327 $ one, a( (j-1)*nb+1, j*nb+1 ), lda,
328 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
329 $ zero, work( 1 ), n )
330 CALL sgemm(
'NoTranspose',
'NoTranspose',
332 $ -one, work( 1 ), n,
333 $ a( (j-2)*nb+1, j*nb+1 ), lda,
334 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
337 CALL ssygst( 1,
'Upper', kb,
338 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
339 $ a( (j-1)*nb+1, j*nb+1 ), lda, iinfo )
346 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
347 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
357 CALL sgemm(
'NoTranspose',
'NoTranspose',
359 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
360 $ a( (j-1)*nb+1, j*nb+1 ), lda,
361 $ zero, work( j*nb+1 ), n )
363 CALL sgemm(
'NoTranspose',
'NoTranspose',
365 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
367 $ a( (j-2)*nb+1, j*nb+1 ), lda,
368 $ zero, work( j*nb+1 ), n )
373 CALL sgemm(
'Transpose',
'NoTranspose',
374 $ nb, n-(j+1)*nb, j*nb,
375 $ -one, work( nb+1 ), n,
376 $ a( 1, (j+1)*nb+1 ), lda,
377 $ one, a( j*nb+1, (j+1)*nb+1 ), lda )
383 CALL scopy( n-(j+1)*nb,
384 $ a( j*nb+k, (j+1)*nb+1 ), lda,
385 $ work( 1+(k-1)*n ), 1 )
390 CALL sgetrf( n-(j+1)*nb, nb,
392 $ ipiv( (j+1)*nb+1 ), iinfo )
400 CALL scopy( n-(j+1)*nb,
401 $ work( 1+(k-1)*n ), 1,
402 $ a( j*nb+k, (j+1)*nb+1 ), lda )
407 kb = min(nb, n-(j+1)*nb)
408 CALL slaset(
'Full', kb, nb, zero, zero,
409 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
410 CALL slacpy(
'Upper', kb, nb,
412 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
414 CALL strsm(
'R',
'U',
'N',
'U', kb, nb, one,
415 $ a( (j-1)*nb+1, j*nb+1 ), lda,
416 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
424 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
425 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
428 CALL slaset(
'Lower', kb, nb, zero, one,
429 $ a( j*nb+1, (j+1)*nb+1), lda )
435 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
438 i2 = ipiv( (j+1)*nb+k )
441 CALL sswap( k-1, a( (j+1)*nb+1, i1 ), 1,
442 $ a( (j+1)*nb+1, i2 ), 1 )
445 $
CALL sswap( i2-i1-1, a( i1, i1+1 ), lda,
449 $
CALL sswap( n-i2, a( i1, i2+1 ), lda,
450 $ a( i2, i2+1 ), lda )
453 a( i1, i1 ) = a( i2, i2 )
457 CALL sswap( j*nb, a( 1, i1 ), 1,
478 IF( i .EQ. (j-1) )
THEN
483 CALL sgemm(
'NoTranspose',
'Transpose',
485 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
486 $ a( j*nb+1, (i-1)*nb+1 ), lda,
487 $ zero, work( i*nb+1 ), n )
495 CALL sgemm(
'NoTranspose',
'Transpose',
497 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
499 $ a( j*nb+1, (i-2)*nb+1 ), lda,
500 $ zero, work( i*nb+1 ), n )
506 CALL slacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
507 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
510 CALL sgemm(
'NoTranspose',
'NoTranspose',
512 $ -one, a( j*nb+1, 1 ), lda,
514 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
516 CALL sgemm(
'NoTranspose',
'NoTranspose',
518 $ one, a( j*nb+1, (j-1)*nb+1 ), lda,
519 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
520 $ zero, work( 1 ), n )
521 CALL sgemm(
'NoTranspose',
'Transpose',
523 $ -one, work( 1 ), n,
524 $ a( j*nb+1, (j-2)*nb+1 ), lda,
525 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
528 CALL ssygst( 1,
'Lower', kb,
529 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
530 $ a( j*nb+1, (j-1)*nb+1 ), lda, iinfo )
537 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
538 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
548 CALL sgemm(
'NoTranspose',
'Transpose',
550 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
551 $ a( j*nb+1, (j-1)*nb+1 ), lda,
552 $ zero, work( j*nb+1 ), n )
554 CALL sgemm(
'NoTranspose',
'Transpose',
556 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
558 $ a( j*nb+1, (j-2)*nb+1 ), lda,
559 $ zero, work( j*nb+1 ), n )
564 CALL sgemm(
'NoTranspose',
'NoTranspose',
565 $ n-(j+1)*nb, nb, j*nb,
566 $ -one, a( (j+1)*nb+1, 1 ), lda,
568 $ one, a( (j+1)*nb+1, j*nb+1 ), lda )
573 CALL sgetrf( n-(j+1)*nb, nb,
574 $ a( (j+1)*nb+1, j*nb+1 ), lda,
575 $ ipiv( (j+1)*nb+1 ), iinfo )
582 kb = min(nb, n-(j+1)*nb)
583 CALL slaset(
'Full', kb, nb, zero, zero,
584 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
585 CALL slacpy(
'Upper', kb, nb,
586 $ a( (j+1)*nb+1, j*nb+1 ), lda,
587 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
589 CALL strsm(
'R',
'L',
'T',
'U', kb, nb, one,
590 $ a( j*nb+1, (j-1)*nb+1 ), lda,
591 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
599 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
600 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
603 CALL slaset(
'Upper', kb, nb, zero, one,
604 $ a( (j+1)*nb+1, j*nb+1), lda )
610 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
613 i2 = ipiv( (j+1)*nb+k )
616 CALL sswap( k-1, a( i1, (j+1)*nb+1 ), lda,
617 $ a( i2, (j+1)*nb+1 ), lda )
620 $
CALL sswap( i2-i1-1, a( i1+1, i1 ), 1,
621 $ a( i2, i1+1 ), lda )
624 $
CALL sswap( n-i2, a( i2+1, i1 ), 1,
628 a( i1, i1 ) = a( i2, i2 )
632 CALL sswap( j*nb, a( i1, 1 ), lda,
647 CALL sgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )
subroutine xerbla(srname, info)
subroutine scopy(n, sx, incx, sy, incy)
SCOPY
subroutine sgbtrf(m, n, kl, ku, ab, ldab, ipiv, info)
SGBTRF
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
subroutine sgetrf(m, n, a, lda, ipiv, info)
SGETRF
subroutine ssygst(itype, uplo, n, a, lda, b, ldb, info)
SSYGST
subroutine ssytrf_aa_2stage(uplo, n, a, lda, tb, ltb, ipiv, ipiv2, work, lwork, info)
SSYTRF_AA_2STAGE
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
subroutine sswap(n, sx, incx, sy, incy)
SSWAP
subroutine strsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
STRSM