160
161
162
163
164
165 IMPLICIT NONE
166
167
168 CHARACTER UPLO
169 INTEGER N, LDA, LTB, LWORK, INFO
170
171
172 INTEGER IPIV( * ), IPIV2( * )
173 COMPLEX*16 A( LDA, * ), TB( * ), WORK( * )
174
175
176
177
178 COMPLEX*16 CZERO, CONE
179 parameter( czero = ( 0.0d+0, 0.0d+0 ),
180 $ cone = ( 1.0d+0, 0.0d+0 ) )
181
182
183 LOGICAL UPPER, TQUERY, WQUERY
184 INTEGER I, J, K, I1, I2, TD
185 INTEGER LDTB, NB, KB, JB, NT, IINFO
186 COMPLEX*16 PIV
187
188
189 LOGICAL LSAME
190 INTEGER ILAENV
192
193
196
197
198 INTRINSIC min, max
199
200
201
202
203
204 info = 0
205 upper =
lsame( uplo,
'U' )
206 wquery = ( lwork.EQ.-1 )
207 tquery = ( ltb.EQ.-1 )
208 IF( .NOT.upper .AND. .NOT.
lsame( uplo,
'L' ) )
THEN
209 info = -1
210 ELSE IF( n.LT.0 ) THEN
211 info = -2
212 ELSE IF( lda.LT.max( 1, n ) ) THEN
213 info = -4
214 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery ) THEN
215 info = -6
216 ELSE IF ( lwork .LT. n .AND. .NOT.wquery ) THEN
217 info = -10
218 END IF
219
220 IF( info.NE.0 ) THEN
221 CALL xerbla(
'ZSYTRF_AA_2STAGE', -info )
222 RETURN
223 END IF
224
225
226
227 nb =
ilaenv( 1,
'ZSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
228 IF( info.EQ.0 ) THEN
229 IF( tquery ) THEN
230 tb( 1 ) = (3*nb+1)*n
231 END IF
232 IF( wquery ) THEN
233 work( 1 ) = n*nb
234 END IF
235 END IF
236 IF( tquery .OR. wquery ) THEN
237 RETURN
238 END IF
239
240
241
242 IF ( n.EQ.0 ) THEN
243 RETURN
244 ENDIF
245
246
247
248 ldtb = ltb/n
249 IF( ldtb .LT. 3*nb+1 ) THEN
250 nb = (ldtb-1)/3
251 END IF
252 IF( lwork .LT. nb*n ) THEN
253 nb = lwork/n
254 END IF
255
256
257
258 nt = (n+nb-1)/nb
259 td = 2*nb
260 kb = min(nb, n)
261
262
263
264 DO j = 1, kb
265 ipiv( j ) = j
266 END DO
267
268
269
270 tb( 1 ) = nb
271
272 IF( upper ) THEN
273
274
275
276
277
278 DO j = 0, nt-1
279
280
281
282 kb = min(nb, n-j*nb)
283 DO i = 1, j-1
284 IF( i.EQ.1 ) THEN
285
286 IF( i .EQ. (j-1) ) THEN
287 jb = nb+kb
288 ELSE
289 jb = 2*nb
290 END IF
291 CALL zgemm(
'NoTranspose',
'NoTranspose',
292 $ nb, kb, jb,
293 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
294 $ a( (i-1)*nb+1, j*nb+1 ), lda,
295 $ czero, work( i*nb+1 ), n )
296 ELSE
297
298 IF( i .EQ. (j-1) ) THEN
299 jb = 2*nb+kb
300 ELSE
301 jb = 3*nb
302 END IF
303 CALL zgemm(
'NoTranspose',
'NoTranspose',
304 $ nb, kb, jb,
305 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
306 $ ldtb-1,
307 $ a( (i-2)*nb+1, j*nb+1 ), lda,
308 $ czero, work( i*nb+1 ), n )
309 END IF
310 END DO
311
312
313
314 CALL zlacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
315 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
316 IF( j.GT.1 ) THEN
317
318 CALL zgemm(
'Transpose',
'NoTranspose',
319 $ kb, kb, (j-1)*nb,
320 $ -cone, a( 1, j*nb+1 ), lda,
321 $ work( nb+1 ), n,
322 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
323
324 CALL zgemm(
'Transpose',
'NoTranspose',
325 $ kb, nb, kb,
326 $ cone, a( (j-1)*nb+1, j*nb+1 ), lda,
327 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
328 $ czero, work( 1 ), n )
329 CALL zgemm(
'NoTranspose',
'NoTranspose',
330 $ kb, kb, nb,
331 $ -cone, work( 1 ), n,
332 $ a( (j-2)*nb+1, j*nb+1 ), lda,
333 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
334 END IF
335
336
337
338 DO i = 1, kb
339 DO k = i+1, kb
340 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
341 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
342 END DO
343 END DO
344 IF( j.GT.0 ) THEN
345
346
347
348 CALL ztrsm(
'L',
'U',
'T',
'N', kb, kb, cone,
349 $ a( (j-1)*nb+1, j*nb+1 ), lda,
350 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
351 CALL ztrsm(
'R',
'U',
'N',
'N', kb, kb, cone,
352 $ a( (j-1)*nb+1, j*nb+1 ), lda,
353 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
354 END IF
355
356 IF( j.LT.nt-1 ) THEN
357 IF( j.GT.0 ) THEN
358
359
360
361 IF( j.EQ.1 ) THEN
362 CALL zgemm(
'NoTranspose',
'NoTranspose',
363 $ kb, kb, kb,
364 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
365 $ a( (j-1)*nb+1, j*nb+1 ), lda,
366 $ czero, work( j*nb+1 ), n )
367 ELSE
368 CALL zgemm(
'NoTranspose',
'NoTranspose',
369 $ kb, kb, nb+kb,
370 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
371 $ ldtb-1,
372 $ a( (j-2)*nb+1, j*nb+1 ), lda,
373 $ czero, work( j*nb+1 ), n )
374 END IF
375
376
377
378 CALL zgemm(
'Transpose',
'NoTranspose',
379 $ nb, n-(j+1)*nb, j*nb,
380 $ -cone, work( nb+1 ), n,
381 $ a( 1, (j+1)*nb+1 ), lda,
382 $ cone, a( j*nb+1, (j+1)*nb+1 ), lda )
383 END IF
384
385
386
387 DO k = 1, nb
388 CALL zcopy( n-(j+1)*nb,
389 $ a( j*nb+k, (j+1)*nb+1 ), lda,
390 $ work( 1+(k-1)*n ), 1 )
391 END DO
392
393
394
395 CALL zgetrf( n-(j+1)*nb, nb,
396 $ work, n,
397 $ ipiv( (j+1)*nb+1 ), iinfo )
398
399
400
401
402
403
404 DO k = 1, nb
405 CALL zcopy( n-(j+1)*nb,
406 $ work( 1+(k-1)*n ), 1,
407 $ a( j*nb+k, (j+1)*nb+1 ), lda )
408 END DO
409
410
411
412 kb = min(nb, n-(j+1)*nb)
413 CALL zlaset(
'Full', kb, nb, czero, czero,
414 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
415 CALL zlacpy(
'Upper', kb, nb,
416 $ work, n,
417 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
418 IF( j.GT.0 ) THEN
419 CALL ztrsm(
'R',
'U',
'N',
'U', kb, nb, cone,
420 $ a( (j-1)*nb+1, j*nb+1 ), lda,
421 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
422 END IF
423
424
425
426
427 DO k = 1, nb
428 DO i = 1, kb
429 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
430 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
431 END DO
432 END DO
433 CALL zlaset(
'Lower', kb, nb, czero, cone,
434 $ a( j*nb+1, (j+1)*nb+1), lda )
435
436
437
438 DO k = 1, kb
439
440 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
441
442 i1 = (j+1)*nb+k
443 i2 = ipiv( (j+1)*nb+k )
444 IF( i1.NE.i2 ) THEN
445
446 CALL zswap( k-1, a( (j+1)*nb+1, i1 ), 1,
447 $ a( (j+1)*nb+1, i2 ), 1 )
448
449 IF( i2.GT.(i1+1) )
450 $
CALL zswap( i2-i1-1, a( i1, i1+1 ), lda,
451 $ a( i1+1, i2 ), 1 )
452
453 IF( i2.LT.n )
454 $
CALL zswap( n-i2, a( i1, i2+1 ), lda,
455 $ a( i2, i2+1 ), lda )
456
457 piv = a( i1, i1 )
458 a( i1, i1 ) = a( i2, i2 )
459 a( i2, i2 ) = piv
460
461 IF( j.GT.0 ) THEN
462 CALL zswap( j*nb, a( 1, i1 ), 1,
463 $ a( 1, i2 ), 1 )
464 END IF
465 ENDIF
466 END DO
467 END IF
468 END DO
469 ELSE
470
471
472
473
474
475 DO j = 0, nt-1
476
477
478
479 kb = min(nb, n-j*nb)
480 DO i = 1, j-1
481 IF( i.EQ.1 ) THEN
482
483 IF( i .EQ. (j-1) ) THEN
484 jb = nb+kb
485 ELSE
486 jb = 2*nb
487 END IF
488 CALL zgemm(
'NoTranspose',
'Transpose',
489 $ nb, kb, jb,
490 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
491 $ a( j*nb+1, (i-1)*nb+1 ), lda,
492 $ czero, work( i*nb+1 ), n )
493 ELSE
494
495 IF( i .EQ. (j-1) ) THEN
496 jb = 2*nb+kb
497 ELSE
498 jb = 3*nb
499 END IF
500 CALL zgemm(
'NoTranspose',
'Transpose',
501 $ nb, kb, jb,
502 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
503 $ ldtb-1,
504 $ a( j*nb+1, (i-2)*nb+1 ), lda,
505 $ czero, work( i*nb+1 ), n )
506 END IF
507 END DO
508
509
510
511 CALL zlacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
512 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
513 IF( j.GT.1 ) THEN
514
515 CALL zgemm(
'NoTranspose',
'NoTranspose',
516 $ kb, kb, (j-1)*nb,
517 $ -cone, a( j*nb+1, 1 ), lda,
518 $ work( nb+1 ), n,
519 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
520
521 CALL zgemm(
'NoTranspose',
'NoTranspose',
522 $ kb, nb, kb,
523 $ cone, a( j*nb+1, (j-1)*nb+1 ), lda,
524 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
525 $ czero, work( 1 ), n )
526 CALL zgemm(
'NoTranspose',
'Transpose',
527 $ kb, kb, nb,
528 $ -cone, work( 1 ), n,
529 $ a( j*nb+1, (j-2)*nb+1 ), lda,
530 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
531 END IF
532
533
534
535 DO i = 1, kb
536 DO k = i+1, kb
537 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
538 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
539 END DO
540 END DO
541 IF( j.GT.0 ) THEN
542
543
544
545 CALL ztrsm(
'L',
'L',
'N',
'N', kb, kb, cone,
546 $ a( j*nb+1, (j-1)*nb+1 ), lda,
547 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
548 CALL ztrsm(
'R',
'L',
'T',
'N', kb, kb, cone,
549 $ a( j*nb+1, (j-1)*nb+1 ), lda,
550 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
551 END IF
552
553
554
555 DO i = 1, kb
556 DO k = i+1, kb
557 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
558 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
559 END DO
560 END DO
561
562 IF( j.LT.nt-1 ) THEN
563 IF( j.GT.0 ) THEN
564
565
566
567 IF( j.EQ.1 ) THEN
568 CALL zgemm(
'NoTranspose',
'Transpose',
569 $ kb, kb, kb,
570 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
571 $ a( j*nb+1, (j-1)*nb+1 ), lda,
572 $ czero, work( j*nb+1 ), n )
573 ELSE
574 CALL zgemm(
'NoTranspose',
'Transpose',
575 $ kb, kb, nb+kb,
576 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
577 $ ldtb-1,
578 $ a( j*nb+1, (j-2)*nb+1 ), lda,
579 $ czero, work( j*nb+1 ), n )
580 END IF
581
582
583
584 CALL zgemm(
'NoTranspose',
'NoTranspose',
585 $ n-(j+1)*nb, nb, j*nb,
586 $ -cone, a( (j+1)*nb+1, 1 ), lda,
587 $ work( nb+1 ), n,
588 $ cone, a( (j+1)*nb+1, j*nb+1 ), lda )
589 END IF
590
591
592
593 CALL zgetrf( n-(j+1)*nb, nb,
594 $ a( (j+1)*nb+1, j*nb+1 ), lda,
595 $ ipiv( (j+1)*nb+1 ), iinfo )
596
597
598
599
600
601
602 kb = min(nb, n-(j+1)*nb)
603 CALL zlaset(
'Full', kb, nb, czero, czero,
604 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
605 CALL zlacpy(
'Upper', kb, nb,
606 $ a( (j+1)*nb+1, j*nb+1 ), lda,
607 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
608 IF( j.GT.0 ) THEN
609 CALL ztrsm(
'R',
'L',
'T',
'U', kb, nb, cone,
610 $ a( j*nb+1, (j-1)*nb+1 ), lda,
611 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
612 END IF
613
614
615
616
617 DO k = 1, nb
618 DO i = 1, kb
619 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
620 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
621 END DO
622 END DO
623 CALL zlaset(
'Upper', kb, nb, czero, cone,
624 $ a( (j+1)*nb+1, j*nb+1 ), lda )
625
626
627
628 DO k = 1, kb
629
630 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
631
632 i1 = (j+1)*nb+k
633 i2 = ipiv( (j+1)*nb+k )
634 IF( i1.NE.i2 ) THEN
635
636 CALL zswap( k-1, a( i1, (j+1)*nb+1 ), lda,
637 $ a( i2, (j+1)*nb+1 ), lda )
638
639 IF( i2.GT.(i1+1) )
640 $
CALL zswap( i2-i1-1, a( i1+1, i1 ), 1,
641 $ a( i2, i1+1 ), lda )
642
643 IF( i2.LT.n )
644 $
CALL zswap( n-i2, a( i2+1, i1 ), 1,
645 $ a( i2+1, i2 ), 1 )
646
647 piv = a( i1, i1 )
648 a( i1, i1 ) = a( i2, i2 )
649 a( i2, i2 ) = piv
650
651 IF( j.GT.0 ) THEN
652 CALL zswap( j*nb, a( i1, 1 ), lda,
653 $ a( i2, 1 ), lda )
654 END IF
655 ENDIF
656 END DO
657
658
659
660
661
662 END IF
663 END DO
664 END IF
665
666
667 CALL zgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )
668
669 RETURN
670
671
672
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
subroutine xerbla(SRNAME, INFO)
XERBLA
logical function lsame(CA, CB)
LSAME
subroutine zswap(N, ZX, INCX, ZY, INCY)
ZSWAP
subroutine zcopy(N, ZX, INCX, ZY, INCY)
ZCOPY
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
subroutine ztrsm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
ZTRSM
subroutine zgbtrf(M, N, KL, KU, AB, LDAB, IPIV, INFO)
ZGBTRF
subroutine zlaswp(N, A, LDA, K1, K2, IPIV, INCX)
ZLASWP performs a series of row interchanges on a general rectangular matrix.
subroutine zlacpy(UPLO, M, N, A, LDA, B, LDB)
ZLACPY copies all or part of one two-dimensional array to another.
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
subroutine zgetrf(M, N, A, LDA, IPIV, INFO)
ZGETRF VARIANT: Crout Level 3 BLAS version of the algorithm.