160
161
162
163
164
165 IMPLICIT NONE
166
167
168 CHARACTER UPLO
169 INTEGER N, LDA, LTB, LWORK, INFO
170
171
172 INTEGER IPIV( * ), IPIV2( * )
173 DOUBLE PRECISION A( LDA, * ), TB( * ), WORK( * )
174
175
176
177
178 DOUBLE PRECISION ZERO, ONE
179 parameter( zero = 0.0d+0, one = 1.0d+0 )
180
181
182 LOGICAL UPPER, TQUERY, WQUERY
183 INTEGER I, J, K, I1, I2, TD
184 INTEGER LDTB, NB, KB, JB, NT, IINFO
185 DOUBLE PRECISION PIV
186
187
188 LOGICAL LSAME
189 INTEGER ILAENV
191
192
196
197
198 INTRINSIC min, max
199
200
201
202
203
204 info = 0
205 upper =
lsame( uplo,
'U' )
206 wquery = ( lwork.EQ.-1 )
207 tquery = ( ltb.EQ.-1 )
208 IF( .NOT.upper .AND. .NOT.
lsame( uplo,
'L' ) )
THEN
209 info = -1
210 ELSE IF( n.LT.0 ) THEN
211 info = -2
212 ELSE IF( lda.LT.max( 1, n ) ) THEN
213 info = -4
214 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery ) THEN
215 info = -6
216 ELSE IF ( lwork .LT. n .AND. .NOT.wquery ) THEN
217 info = -10
218 END IF
219
220 IF( info.NE.0 ) THEN
221 CALL xerbla(
'DSYTRF_AA_2STAGE', -info )
222 RETURN
223 END IF
224
225
226
227 nb =
ilaenv( 1,
'DSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
228 IF( info.EQ.0 ) THEN
229 IF( tquery ) THEN
230 tb( 1 ) = (3*nb+1)*n
231 END IF
232 IF( wquery ) THEN
233 work( 1 ) = n*nb
234 END IF
235 END IF
236 IF( tquery .OR. wquery ) THEN
237 RETURN
238 END IF
239
240
241
242 IF ( n.EQ.0 ) THEN
243 RETURN
244 ENDIF
245
246
247
248 ldtb = ltb/n
249 IF( ldtb .LT. 3*nb+1 ) THEN
250 nb = (ldtb-1)/3
251 END IF
252 IF( lwork .LT. nb*n ) THEN
253 nb = lwork/n
254 END IF
255
256
257
258 nt = (n+nb-1)/nb
259 td = 2*nb
260 kb = min(nb, n)
261
262
263
264 DO j = 1, kb
265 ipiv( j ) = j
266 END DO
267
268
269
270 tb( 1 ) = nb
271
272 IF( upper ) THEN
273
274
275
276
277
278 DO j = 0, nt-1
279
280
281
282 kb = min(nb, n-j*nb)
283 DO i = 1, j-1
284 IF( i .EQ. 1 ) THEN
285
286 IF( i .EQ. (j-1) ) THEN
287 jb = nb+kb
288 ELSE
289 jb = 2*nb
290 END IF
291 CALL dgemm(
'NoTranspose',
'NoTranspose',
292 $ nb, kb, jb,
293 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
294 $ a( (i-1)*nb+1, j*nb+1 ), lda,
295 $ zero, work( i*nb+1 ), n )
296 ELSE
297
298 IF( i .EQ. j-1) THEN
299 jb = 2*nb+kb
300 ELSE
301 jb = 3*nb
302 END IF
303 CALL dgemm(
'NoTranspose',
'NoTranspose',
304 $ nb, kb, jb,
305 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
306 $ ldtb-1,
307 $ a( (i-2)*nb+1, j*nb+1 ), lda,
308 $ zero, work( i*nb+1 ), n )
309 END IF
310 END DO
311
312
313
314 CALL dlacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
315 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
316 IF( j.GT.1 ) THEN
317
318 CALL dgemm(
'Transpose',
'NoTranspose',
319 $ kb, kb, (j-1)*nb,
320 $ -one, a( 1, j*nb+1 ), lda,
321 $ work( nb+1 ), n,
322 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
323
324 CALL dgemm(
'Transpose',
'NoTranspose',
325 $ kb, nb, kb,
326 $ one, a( (j-1)*nb+1, j*nb+1 ), lda,
327 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
328 $ zero, work( 1 ), n )
329 CALL dgemm(
'NoTranspose',
'NoTranspose',
330 $ kb, kb, nb,
331 $ -one, work( 1 ), n,
332 $ a( (j-2)*nb+1, j*nb+1 ), lda,
333 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
334 END IF
335 IF( j.GT.0 ) THEN
336 CALL dsygst( 1,
'Upper', kb,
337 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
338 $ a( (j-1)*nb+1, j*nb+1 ), lda, iinfo )
339 END IF
340
341
342
343 DO i = 1, kb
344 DO k = i+1, kb
345 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
346 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
347 END DO
348 END DO
349
350 IF( j.LT.nt-1 ) THEN
351 IF( j.GT.0 ) THEN
352
353
354
355 IF( j.EQ.1 ) THEN
356 CALL dgemm(
'NoTranspose',
'NoTranspose',
357 $ kb, kb, kb,
358 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
359 $ a( (j-1)*nb+1, j*nb+1 ), lda,
360 $ zero, work( j*nb+1 ), n )
361 ELSE
362 CALL dgemm(
'NoTranspose',
'NoTranspose',
363 $ kb, kb, nb+kb,
364 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
365 $ ldtb-1,
366 $ a( (j-2)*nb+1, j*nb+1 ), lda,
367 $ zero, work( j*nb+1 ), n )
368 END IF
369
370
371
372 CALL dgemm(
'Transpose',
'NoTranspose',
373 $ nb, n-(j+1)*nb, j*nb,
374 $ -one, work( nb+1 ), n,
375 $ a( 1, (j+1)*nb+1 ), lda,
376 $ one, a( j*nb+1, (j+1)*nb+1 ), lda )
377 END IF
378
379
380
381 DO k = 1, nb
382 CALL dcopy( n-(j+1)*nb,
383 $ a( j*nb+k, (j+1)*nb+1 ), lda,
384 $ work( 1+(k-1)*n ), 1 )
385 END DO
386
387
388
389 CALL dgetrf( n-(j+1)*nb, nb,
390 $ work, n,
391 $ ipiv( (j+1)*nb+1 ), iinfo )
392
393
394
395
396
397
398 DO k = 1, nb
399 CALL dcopy( n-(j+1)*nb,
400 $ work( 1+(k-1)*n ), 1,
401 $ a( j*nb+k, (j+1)*nb+1 ), lda )
402 END DO
403
404
405
406 kb = min(nb, n-(j+1)*nb)
407 CALL dlaset(
'Full', kb, nb, zero, zero,
408 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
409 CALL dlacpy(
'Upper', kb, nb,
410 $ work, n,
411 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
412 IF( j.GT.0 ) THEN
413 CALL dtrsm(
'R',
'U',
'N',
'U', kb, nb, one,
414 $ a( (j-1)*nb+1, j*nb+1 ), lda,
415 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
416 END IF
417
418
419
420
421 DO k = 1, nb
422 DO i = 1, kb
423 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
424 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
425 END DO
426 END DO
427 CALL dlaset(
'Lower', kb, nb, zero, one,
428 $ a( j*nb+1, (j+1)*nb+1), lda )
429
430
431
432 DO k = 1, kb
433
434 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
435
436 i1 = (j+1)*nb+k
437 i2 = ipiv( (j+1)*nb+k )
438 IF( i1.NE.i2 ) THEN
439
440 CALL dswap( k-1, a( (j+1)*nb+1, i1 ), 1,
441 $ a( (j+1)*nb+1, i2 ), 1 )
442
443 IF( i2.GT.(i1+1) )
444 $
CALL dswap( i2-i1-1, a( i1, i1+1 ), lda,
445 $ a( i1+1, i2 ), 1 )
446
447 IF( i2.LT.n )
448 $
CALL dswap( n-i2, a( i1, i2+1 ), lda,
449 $ a( i2, i2+1 ), lda )
450
451 piv = a( i1, i1 )
452 a( i1, i1 ) = a( i2, i2 )
453 a( i2, i2 ) = piv
454
455 IF( j.GT.0 ) THEN
456 CALL dswap( j*nb, a( 1, i1 ), 1,
457 $ a( 1, i2 ), 1 )
458 END IF
459 ENDIF
460 END DO
461 END IF
462 END DO
463 ELSE
464
465
466
467
468
469 DO j = 0, nt-1
470
471
472
473 kb = min(nb, n-j*nb)
474 DO i = 1, j-1
475 IF( i.EQ.1 ) THEN
476
477 IF( i .EQ. j-1) THEN
478 jb = nb+kb
479 ELSE
480 jb = 2*nb
481 END IF
482 CALL dgemm(
'NoTranspose',
'Transpose',
483 $ nb, kb, jb,
484 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
485 $ a( j*nb+1, (i-1)*nb+1 ), lda,
486 $ zero, work( i*nb+1 ), n )
487 ELSE
488
489 IF( i .EQ. j-1) THEN
490 jb = 2*nb+kb
491 ELSE
492 jb = 3*nb
493 END IF
494 CALL dgemm(
'NoTranspose',
'Transpose',
495 $ nb, kb, jb,
496 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
497 $ ldtb-1,
498 $ a( j*nb+1, (i-2)*nb+1 ), lda,
499 $ zero, work( i*nb+1 ), n )
500 END IF
501 END DO
502
503
504
505 CALL dlacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
506 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
507 IF( j.GT.1 ) THEN
508
509 CALL dgemm(
'NoTranspose',
'NoTranspose',
510 $ kb, kb, (j-1)*nb,
511 $ -one, a( j*nb+1, 1 ), lda,
512 $ work( nb+1 ), n,
513 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
514
515 CALL dgemm(
'NoTranspose',
'NoTranspose',
516 $ kb, nb, kb,
517 $ one, a( j*nb+1, (j-1)*nb+1 ), lda,
518 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
519 $ zero, work( 1 ), n )
520 CALL dgemm(
'NoTranspose',
'Transpose',
521 $ kb, kb, nb,
522 $ -one, work( 1 ), n,
523 $ a( j*nb+1, (j-2)*nb+1 ), lda,
524 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
525 END IF
526 IF( j.GT.0 ) THEN
527 CALL dsygst( 1,
'Lower', kb,
528 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
529 $ a( j*nb+1, (j-1)*nb+1 ), lda, iinfo )
530 END IF
531
532
533
534 DO i = 1, kb
535 DO k = i+1, kb
536 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
537 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
538 END DO
539 END DO
540
541 IF( j.LT.nt-1 ) THEN
542 IF( j.GT.0 ) THEN
543
544
545
546 IF( j.EQ.1 ) THEN
547 CALL dgemm(
'NoTranspose',
'Transpose',
548 $ kb, kb, kb,
549 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
550 $ a( j*nb+1, (j-1)*nb+1 ), lda,
551 $ zero, work( j*nb+1 ), n )
552 ELSE
553 CALL dgemm(
'NoTranspose',
'Transpose',
554 $ kb, kb, nb+kb,
555 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
556 $ ldtb-1,
557 $ a( j*nb+1, (j-2)*nb+1 ), lda,
558 $ zero, work( j*nb+1 ), n )
559 END IF
560
561
562
563 CALL dgemm(
'NoTranspose',
'NoTranspose',
564 $ n-(j+1)*nb, nb, j*nb,
565 $ -one, a( (j+1)*nb+1, 1 ), lda,
566 $ work( nb+1 ), n,
567 $ one, a( (j+1)*nb+1, j*nb+1 ), lda )
568 END IF
569
570
571
572 CALL dgetrf( n-(j+1)*nb, nb,
573 $ a( (j+1)*nb+1, j*nb+1 ), lda,
574 $ ipiv( (j+1)*nb+1 ), iinfo )
575
576
577
578
579
580
581 kb = min(nb, n-(j+1)*nb)
582 CALL dlaset(
'Full', kb, nb, zero, zero,
583 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
584 CALL dlacpy(
'Upper', kb, nb,
585 $ a( (j+1)*nb+1, j*nb+1 ), lda,
586 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
587 IF( j.GT.0 ) THEN
588 CALL dtrsm(
'R',
'L',
'T',
'U', kb, nb, one,
589 $ a( j*nb+1, (j-1)*nb+1 ), lda,
590 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
591 END IF
592
593
594
595
596 DO k = 1, nb
597 DO i = 1, kb
598 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
599 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
600 END DO
601 END DO
602 CALL dlaset(
'Upper', kb, nb, zero, one,
603 $ a( (j+1)*nb+1, j*nb+1), lda )
604
605
606
607 DO k = 1, kb
608
609 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
610
611 i1 = (j+1)*nb+k
612 i2 = ipiv( (j+1)*nb+k )
613 IF( i1.NE.i2 ) THEN
614
615 CALL dswap( k-1, a( i1, (j+1)*nb+1 ), lda,
616 $ a( i2, (j+1)*nb+1 ), lda )
617
618 IF( i2.GT.(i1+1) )
619 $
CALL dswap( i2-i1-1, a( i1+1, i1 ), 1,
620 $ a( i2, i1+1 ), lda )
621
622 IF( i2.LT.n )
623 $
CALL dswap( n-i2, a( i2+1, i1 ), 1,
624 $ a( i2+1, i2 ), 1 )
625
626 piv = a( i1, i1 )
627 a( i1, i1 ) = a( i2, i2 )
628 a( i2, i2 ) = piv
629
630 IF( j.GT.0 ) THEN
631 CALL dswap( j*nb, a( i1, 1 ), lda,
632 $ a( i2, 1 ), lda )
633 END IF
634 ENDIF
635 END DO
636
637
638
639
640
641 END IF
642 END DO
643 END IF
644
645
646 CALL dgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )
647
648 RETURN
649
650
651
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
subroutine xerbla(SRNAME, INFO)
XERBLA
logical function lsame(CA, CB)
LSAME
subroutine dcopy(N, DX, INCX, DY, INCY)
DCOPY
subroutine dswap(N, DX, INCX, DY, INCY)
DSWAP
subroutine dtrsm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
DTRSM
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
subroutine dgbtrf(M, N, KL, KU, AB, LDAB, IPIV, INFO)
DGBTRF
subroutine dgetrf(M, N, A, LDA, IPIV, INFO)
DGETRF
subroutine dsygst(ITYPE, UPLO, N, A, LDA, B, LDB, INFO)
DSYGST