160
161
162
163
164
165 IMPLICIT NONE
166
167
168 CHARACTER UPLO
169 INTEGER N, LDA, LTB, LWORK, INFO
170
171
172 INTEGER IPIV( * ), IPIV2( * )
173 REAL A( LDA, * ), TB( * ), WORK( * )
174
175
176
177
178 REAL ZERO, ONE
179 parameter( zero = 0.0e+0, one = 1.0e+0 )
180
181
182 LOGICAL UPPER, TQUERY, WQUERY
183 INTEGER I, J, K, I1, I2, TD
184 INTEGER LDTB, NB, KB, JB, NT, IINFO
185 REAL PIV
186
187
188 LOGICAL LSAME
189 INTEGER ILAENV
190 REAL SROUNDUP_LWORK
192
193
197
198
199 INTRINSIC min, max
200
201
202
203
204
205 info = 0
206 upper =
lsame( uplo,
'U' )
207 wquery = ( lwork.EQ.-1 )
208 tquery = ( ltb.EQ.-1 )
209 IF( .NOT.upper .AND. .NOT.
lsame( uplo,
'L' ) )
THEN
210 info = -1
211 ELSE IF( n.LT.0 ) THEN
212 info = -2
213 ELSE IF( lda.LT.max( 1, n ) ) THEN
214 info = -4
215 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery ) THEN
216 info = -6
217 ELSE IF ( lwork .LT. n .AND. .NOT.wquery ) THEN
218 info = -10
219 END IF
220
221 IF( info.NE.0 ) THEN
222 CALL xerbla(
'SSYTRF_AA_2STAGE', -info )
223 RETURN
224 END IF
225
226
227
228 nb =
ilaenv( 1,
'SSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
229 IF( info.EQ.0 ) THEN
230 IF( tquery ) THEN
231 tb( 1 ) = (3*nb+1)*n
232 END IF
233 IF( wquery ) THEN
235 END IF
236 END IF
237 IF( tquery .OR. wquery ) THEN
238 RETURN
239 END IF
240
241
242
243 IF ( n.EQ.0 ) THEN
244 RETURN
245 ENDIF
246
247
248
249 ldtb = ltb/n
250 IF( ldtb .LT. 3*nb+1 ) THEN
251 nb = (ldtb-1)/3
252 END IF
253 IF( lwork .LT. nb*n ) THEN
254 nb = lwork/n
255 END IF
256
257
258
259 nt = (n+nb-1)/nb
260 td = 2*nb
261 kb = min(nb, n)
262
263
264
265 DO j = 1, kb
266 ipiv( j ) = j
267 END DO
268
269
270
271 tb( 1 ) = nb
272
273 IF( upper ) THEN
274
275
276
277
278
279 DO j = 0, nt-1
280
281
282
283 kb = min(nb, n-j*nb)
284 DO i = 1, j-1
285 IF( i.EQ.1 ) THEN
286
287 IF( i .EQ. (j-1) ) THEN
288 jb = nb+kb
289 ELSE
290 jb = 2*nb
291 END IF
292 CALL sgemm(
'NoTranspose',
'NoTranspose',
293 $ nb, kb, jb,
294 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
295 $ a( (i-1)*nb+1, j*nb+1 ), lda,
296 $ zero, work( i*nb+1 ), n )
297 ELSE
298
299 IF( i .EQ. j-1) THEN
300 jb = 2*nb+kb
301 ELSE
302 jb = 3*nb
303 END IF
304 CALL sgemm(
'NoTranspose',
'NoTranspose',
305 $ nb, kb, jb,
306 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
307 $ ldtb-1,
308 $ a( (i-2)*nb+1, j*nb+1 ), lda,
309 $ zero, work( i*nb+1 ), n )
310 END IF
311 END DO
312
313
314
315 CALL slacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
316 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
317 IF( j.GT.1 ) THEN
318
319 CALL sgemm(
'Transpose',
'NoTranspose',
320 $ kb, kb, (j-1)*nb,
321 $ -one, a( 1, j*nb+1 ), lda,
322 $ work( nb+1 ), n,
323 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
324
325 CALL sgemm(
'Transpose',
'NoTranspose',
326 $ kb, nb, kb,
327 $ one, a( (j-1)*nb+1, j*nb+1 ), lda,
328 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
329 $ zero, work( 1 ), n )
330 CALL sgemm(
'NoTranspose',
'NoTranspose',
331 $ kb, kb, nb,
332 $ -one, work( 1 ), n,
333 $ a( (j-2)*nb+1, j*nb+1 ), lda,
334 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
335 END IF
336 IF( j.GT.0 ) THEN
337 CALL ssygst( 1,
'Upper', kb,
338 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
339 $ a( (j-1)*nb+1, j*nb+1 ), lda, iinfo )
340 END IF
341
342
343
344 DO i = 1, kb
345 DO k = i+1, kb
346 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
347 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
348 END DO
349 END DO
350
351 IF( j.LT.nt-1 ) THEN
352 IF( j.GT.0 ) THEN
353
354
355
356 IF( j.EQ.1 ) THEN
357 CALL sgemm(
'NoTranspose',
'NoTranspose',
358 $ kb, kb, kb,
359 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
360 $ a( (j-1)*nb+1, j*nb+1 ), lda,
361 $ zero, work( j*nb+1 ), n )
362 ELSE
363 CALL sgemm(
'NoTranspose',
'NoTranspose',
364 $ kb, kb, nb+kb,
365 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
366 $ ldtb-1,
367 $ a( (j-2)*nb+1, j*nb+1 ), lda,
368 $ zero, work( j*nb+1 ), n )
369 END IF
370
371
372
373 CALL sgemm(
'Transpose',
'NoTranspose',
374 $ nb, n-(j+1)*nb, j*nb,
375 $ -one, work( nb+1 ), n,
376 $ a( 1, (j+1)*nb+1 ), lda,
377 $ one, a( j*nb+1, (j+1)*nb+1 ), lda )
378 END IF
379
380
381
382 DO k = 1, nb
383 CALL scopy( n-(j+1)*nb,
384 $ a( j*nb+k, (j+1)*nb+1 ), lda,
385 $ work( 1+(k-1)*n ), 1 )
386 END DO
387
388
389
390 CALL sgetrf( n-(j+1)*nb, nb,
391 $ work, n,
392 $ ipiv( (j+1)*nb+1 ), iinfo )
393
394
395
396
397
398
399 DO k = 1, nb
400 CALL scopy( n-(j+1)*nb,
401 $ work( 1+(k-1)*n ), 1,
402 $ a( j*nb+k, (j+1)*nb+1 ), lda )
403 END DO
404
405
406
407 kb = min(nb, n-(j+1)*nb)
408 CALL slaset(
'Full', kb, nb, zero, zero,
409 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
410 CALL slacpy(
'Upper', kb, nb,
411 $ work, n,
412 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
413 IF( j.GT.0 ) THEN
414 CALL strsm(
'R',
'U',
'N',
'U', kb, nb, one,
415 $ a( (j-1)*nb+1, j*nb+1 ), lda,
416 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
417 END IF
418
419
420
421
422 DO k = 1, nb
423 DO i = 1, kb
424 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
425 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
426 END DO
427 END DO
428 CALL slaset(
'Lower', kb, nb, zero, one,
429 $ a( j*nb+1, (j+1)*nb+1), lda )
430
431
432
433 DO k = 1, kb
434
435 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
436
437 i1 = (j+1)*nb+k
438 i2 = ipiv( (j+1)*nb+k )
439 IF( i1.NE.i2 ) THEN
440
441 CALL sswap( k-1, a( (j+1)*nb+1, i1 ), 1,
442 $ a( (j+1)*nb+1, i2 ), 1 )
443
444 IF( i2.GT.(i1+1) )
445 $
CALL sswap( i2-i1-1, a( i1, i1+1 ), lda,
446 $ a( i1+1, i2 ), 1 )
447
448 IF( i2.LT.n )
449 $
CALL sswap( n-i2, a( i1, i2+1 ), lda,
450 $ a( i2, i2+1 ), lda )
451
452 piv = a( i1, i1 )
453 a( i1, i1 ) = a( i2, i2 )
454 a( i2, i2 ) = piv
455
456 IF( j.GT.0 ) THEN
457 CALL sswap( j*nb, a( 1, i1 ), 1,
458 $ a( 1, i2 ), 1 )
459 END IF
460 ENDIF
461 END DO
462 END IF
463 END DO
464 ELSE
465
466
467
468
469
470 DO j = 0, nt-1
471
472
473
474 kb = min(nb, n-j*nb)
475 DO i = 1, j-1
476 IF( i.EQ.1 ) THEN
477
478 IF( i .EQ. (j-1) ) THEN
479 jb = nb+kb
480 ELSE
481 jb = 2*nb
482 END IF
483 CALL sgemm(
'NoTranspose',
'Transpose',
484 $ nb, kb, jb,
485 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
486 $ a( j*nb+1, (i-1)*nb+1 ), lda,
487 $ zero, work( i*nb+1 ), n )
488 ELSE
489
490 IF( i .EQ. j-1) THEN
491 jb = 2*nb+kb
492 ELSE
493 jb = 3*nb
494 END IF
495 CALL sgemm(
'NoTranspose',
'Transpose',
496 $ nb, kb, jb,
497 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
498 $ ldtb-1,
499 $ a( j*nb+1, (i-2)*nb+1 ), lda,
500 $ zero, work( i*nb+1 ), n )
501 END IF
502 END DO
503
504
505
506 CALL slacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
507 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
508 IF( j.GT.1 ) THEN
509
510 CALL sgemm(
'NoTranspose',
'NoTranspose',
511 $ kb, kb, (j-1)*nb,
512 $ -one, a( j*nb+1, 1 ), lda,
513 $ work( nb+1 ), n,
514 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
515
516 CALL sgemm(
'NoTranspose',
'NoTranspose',
517 $ kb, nb, kb,
518 $ one, a( j*nb+1, (j-1)*nb+1 ), lda,
519 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
520 $ zero, work( 1 ), n )
521 CALL sgemm(
'NoTranspose',
'Transpose',
522 $ kb, kb, nb,
523 $ -one, work( 1 ), n,
524 $ a( j*nb+1, (j-2)*nb+1 ), lda,
525 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
526 END IF
527 IF( j.GT.0 ) THEN
528 CALL ssygst( 1,
'Lower', kb,
529 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
530 $ a( j*nb+1, (j-1)*nb+1 ), lda, iinfo )
531 END IF
532
533
534
535 DO i = 1, kb
536 DO k = i+1, kb
537 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
538 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
539 END DO
540 END DO
541
542 IF( j.LT.nt-1 ) THEN
543 IF( j.GT.0 ) THEN
544
545
546
547 IF( j.EQ.1 ) THEN
548 CALL sgemm(
'NoTranspose',
'Transpose',
549 $ kb, kb, kb,
550 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
551 $ a( j*nb+1, (j-1)*nb+1 ), lda,
552 $ zero, work( j*nb+1 ), n )
553 ELSE
554 CALL sgemm(
'NoTranspose',
'Transpose',
555 $ kb, kb, nb+kb,
556 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
557 $ ldtb-1,
558 $ a( j*nb+1, (j-2)*nb+1 ), lda,
559 $ zero, work( j*nb+1 ), n )
560 END IF
561
562
563
564 CALL sgemm(
'NoTranspose',
'NoTranspose',
565 $ n-(j+1)*nb, nb, j*nb,
566 $ -one, a( (j+1)*nb+1, 1 ), lda,
567 $ work( nb+1 ), n,
568 $ one, a( (j+1)*nb+1, j*nb+1 ), lda )
569 END IF
570
571
572
573 CALL sgetrf( n-(j+1)*nb, nb,
574 $ a( (j+1)*nb+1, j*nb+1 ), lda,
575 $ ipiv( (j+1)*nb+1 ), iinfo )
576
577
578
579
580
581
582 kb = min(nb, n-(j+1)*nb)
583 CALL slaset(
'Full', kb, nb, zero, zero,
584 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
585 CALL slacpy(
'Upper', kb, nb,
586 $ a( (j+1)*nb+1, j*nb+1 ), lda,
587 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
588 IF( j.GT.0 ) THEN
589 CALL strsm(
'R',
'L',
'T',
'U', kb, nb, one,
590 $ a( j*nb+1, (j-1)*nb+1 ), lda,
591 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
592 END IF
593
594
595
596
597 DO k = 1, nb
598 DO i = 1, kb
599 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
600 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
601 END DO
602 END DO
603 CALL slaset(
'Upper', kb, nb, zero, one,
604 $ a( (j+1)*nb+1, j*nb+1), lda )
605
606
607
608 DO k = 1, kb
609
610 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
611
612 i1 = (j+1)*nb+k
613 i2 = ipiv( (j+1)*nb+k )
614 IF( i1.NE.i2 ) THEN
615
616 CALL sswap( k-1, a( i1, (j+1)*nb+1 ), lda,
617 $ a( i2, (j+1)*nb+1 ), lda )
618
619 IF( i2.GT.(i1+1) )
620 $
CALL sswap( i2-i1-1, a( i1+1, i1 ), 1,
621 $ a( i2, i1+1 ), lda )
622
623 IF( i2.LT.n )
624 $
CALL sswap( n-i2, a( i2+1, i1 ), 1,
625 $ a( i2+1, i2 ), 1 )
626
627 piv = a( i1, i1 )
628 a( i1, i1 ) = a( i2, i2 )
629 a( i2, i2 ) = piv
630
631 IF( j.GT.0 ) THEN
632 CALL sswap( j*nb, a( i1, 1 ), lda,
633 $ a( i2, 1 ), lda )
634 END IF
635 ENDIF
636 END DO
637
638
639
640
641
642 END IF
643 END DO
644 END IF
645
646
647 CALL sgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )
648
649 RETURN
650
651
652
subroutine xerbla(srname, info)
subroutine scopy(n, sx, incx, sy, incy)
SCOPY
subroutine sgbtrf(m, n, kl, ku, ab, ldab, ipiv, info)
SGBTRF
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
subroutine sgetrf(m, n, a, lda, ipiv, info)
SGETRF
subroutine ssygst(itype, uplo, n, a, lda, b, ldb, info)
SSYGST
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
logical function lsame(ca, cb)
LSAME
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine sswap(n, sx, incx, sy, incy)
SSWAP
subroutine strsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
STRSM