1 SUBROUTINE pzdbtrf( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK,
9 INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N
13 COMPLEX*16 A( * ), AF( * ), WORK( * )
348 DOUBLE PRECISION ONE, ZERO
349 parameter( one = 1.0d+0 )
350 parameter( zero = 0.0d+0 )
351 COMPLEX*16 CONE, CZERO
352 parameter( cone = ( 1.0d+0, 0.0d+0 ) )
353 parameter( czero = ( 0.0d+0, 0.0d+0 ) )
355 parameter( int_one = 1 )
356 INTEGER DESCMULT, BIGNUM
357 parameter(descmult = 100, bignum = descmult * descmult)
358 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
359 $ lld_, mb_, m_, nb_, n_, rsrc_
360 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
361 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
362 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
365 INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT,
366 $ ictxt_new, ictxt_save, idum3, ja_new, laf_min,
367 $ level_dist, llda, max_bw, mbw2, mycol, myrow,
368 $ my_num_cols, nb, next_tri_size_m,
369 $ next_tri_size_n, np, npcol, nprow, np_save,
370 $ odd_size, ofst, part_offset, part_size,
371 $ prev_tri_size_m, prev_tri_size_n, return_code,
372 $ store_n_a, up_prev_tri_size_m,
373 $ up_prev_tri_size_n, work_size_min, work_u
376 INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 )
379 EXTERNAL blacs_get, blacs_gridexit, blacs_gridinfo,
381 $ zgemm, zgerv2d, zgesd2d, zlamov,
zlatcpy,
382 $ zpbtrf, zpotrf, zsyrk, ztbtrs, ztrmm, ztrrv2d,
383 $ ztrsd2d, ztrsm, ztrtrs
388 EXTERNAL lsame, numroc
391 INTRINSIC ichar,
min, mod
406 IF( return_code .NE. 0)
THEN
407 info = -( 6*100 + 2 )
412 ictxt = desca_1xp( 2 )
413 csrc = desca_1xp( 5 )
415 llda = desca_1xp( 6 )
416 store_n_a = desca_1xp( 3 )
423 max_bw =
max(bwl,bwu)
424 mbw2 = max_bw * max_bw
426 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
431 IF( lwork .LT. -1)
THEN
433 ELSE IF ( lwork .EQ. -1 )
THEN
443 IF( n+ja-1 .GT. store_n_a )
THEN
444 info = -( 6*100 + 6 )
447 IF(( bwl .GT. n-1 ) .OR.
448 $ ( bwl .LT. 0 ) )
THEN
452 IF(( bwu .GT. n-1 ) .OR.
453 $ ( bwu .LT. 0 ) )
THEN
457 IF( llda .LT. (bwl+bwu+1) )
THEN
458 info = -( 6*100 + 6 )
462 info = -( 6*100 + 4 )
467 IF( nprow .NE. 1 )
THEN
471 IF( n .GT. np*nb-mod( ja-1, nb ))
THEN
474 $
'PZDBTRF, D&C alg.: only 1 block per proc',
479 IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*
max(bwl,bwu) ))
THEN
482 $
'PZDBTRF, D&C alg.: NB too small',
490 laf_min = nb*(bwl+bwu)+6*
max(bwl,bwu)*
max(bwl,bwu)
492 IF( laf .LT. laf_min )
THEN
497 $
'PZDBTRF: auxiliary storage error ',
504 work_size_min =
max(bwl,bwu)*
max(bwl,bwu)
506 work( 1 ) = work_size_min
508 IF( lwork .LT. work_size_min )
THEN
509 IF( lwork .NE. -1 )
THEN
512 $
'PZDBTRF: worksize error ',
520 param_check( 9, 1 ) = desca(5)
521 param_check( 8, 1 ) = desca(4)
522 param_check( 7, 1 ) = desca(3)
523 param_check( 6, 1 ) = desca(1)
524 param_check( 5, 1 ) = ja
525 param_check( 4, 1 ) = bwu
526 param_check( 3, 1 ) = bwl
527 param_check( 2, 1 ) = n
528 param_check( 1, 1 ) = idum3
530 param_check( 9, 2 ) = 605
531 param_check( 8, 2 ) = 604
532 param_check( 7, 2 ) = 603
533 param_check( 6, 2 ) = 601
534 param_check( 5, 2 ) = 5
535 param_check( 4, 2 ) = 3
536 param_check( 3, 2 ) = 2
537 param_check( 2, 2 ) = 1
538 param_check( 1, 2 ) = 10
546 ELSE IF( info.LT.-descmult )
THEN
549 info = -info * descmult
554 CALL globchk( ictxt, 9, param_check, 9,
555 $ param_check( 1, 3 ), info )
560 IF( info.EQ.bignum )
THEN
562 ELSE IF( mod( info, descmult ) .EQ. 0 )
THEN
563 info = -info / descmult
569 CALL pxerbla( ictxt,
'PZDBTRF', -info )
582 part_offset = nb*( (ja-1)/(npcol*nb) )
584 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb )
THEN
585 part_offset = part_offset + nb
588 IF ( mycol .LT. csrc )
THEN
589 part_offset = part_offset - nb
598 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
602 ja_new = mod( ja-1, nb ) + 1
607 np = ( ja_new+n-2 )/nb + 1
611 CALL reshape( ictxt, int_one, ictxt_new, int_one,
612 $ first_proc, int_one, np )
618 desca_1xp( 2 ) = ictxt_new
622 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
626 IF( myrow .LT. 0 )
THEN
639 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
643 IF ( mycol .EQ. 0 )
THEN
644 part_offset = part_offset+mod( ja_new-1, part_size )
645 my_num_cols = my_num_cols - mod(ja_new-1, part_size )
650 ofst = part_offset*llda
654 odd_size = my_num_cols
655 IF ( mycol .LT. np-1 )
THEN
656 odd_size = odd_size - max_bw
661 work_u = bwu*odd_size + 3*mbw2
672 DO 20 i=1, work_size_min
686 IF( mycol .GT. 0 )
THEN
687 prev_tri_size_m=
min( bwl,
688 $ numroc( n, part_size, mycol, 0, npcol ) )
689 prev_tri_size_n=
min( bwl,
690 $ numroc( n, part_size, mycol-1, 0, npcol ) )
693 IF( mycol .GT. 0 )
THEN
694 up_prev_tri_size_m=
min( bwu,
695 $ numroc( n, part_size, mycol, 0, npcol ) )
696 up_prev_tri_size_n=
min( bwu,
697 $ numroc( n, part_size, mycol-1, 0, npcol ) )
700 IF( mycol .LT. npcol-1 )
THEN
701 next_tri_size_m=
min( bwl,
702 $ numroc( n, part_size, mycol+1, 0, npcol ) )
703 next_tri_size_n=
min( bwl,
704 $ numroc( n, part_size, mycol, 0, npcol ) )
707 IF ( mycol .LT. np-1 )
THEN
713 CALL ztrsd2d( ictxt,
'U',
'N', next_tri_size_m,
715 $ a( ofst+(my_num_cols-bwl)*llda+(bwl+bwu+1) ),
716 $ llda-1, 0, mycol+1 )
723 CALL zdbtrf( odd_size, odd_size, bwl, bwu, a( ofst + 1),
732 IF ( mycol .LT. np-1 )
THEN
740 $ a(( ofst+(bwl+bwu+1)+(odd_size-bwl)*llda )),
741 $ llda-1, af( odd_size*bwu+2*mbw2+1+max_bw-bwl ),
743 CALL zlamov(
'L', bwu, bwu, a( ( ofst+1+odd_size*llda ) ),
745 $ af( work_u+odd_size*bwl+2*mbw2+1+max_bw-bwu ),
750 CALL ztbtrs(
'L',
'N',
'U', bwu, bwl, bwu,
751 $ a( ofst+bwu+1+(odd_size-bwu )*llda ), llda,
752 $ af( work_u+odd_size*bwl+2*mbw2+1+max_bw-bwu ),
757 CALL ztbtrs(
'U',
'C',
'N', bwl, bwu, bwl,
758 $ a( ofst+1+(odd_size-bwl)*llda ), llda,
759 $ af( odd_size*bwu+2*mbw2+1+max_bw-bwl ), max_bw,
766 $ af( odd_size*bwu+2*mbw2+1+max_bw-bwl ), max_bw,
767 $ a(( ofst+(bwl+bwu+1)+(odd_size-bwl)*llda )),
772 CALL zlamov(
'L', bwu, bwu,
773 $ af( work_u+odd_size*bwl+2*mbw2+1+max_bw-bwu ),
774 $ max_bw, a(( ofst+1+odd_size*llda )), llda-1 )
784 CALL zgemm(
'C',
'N', max_bw, max_bw, max_bw, -cone ,
785 $ af( odd_size*bwu+2*mbw2+1), max_bw,
786 $ af( work_u+odd_size*bwl+2*mbw2+1), max_bw, cone,
787 $ a( ofst+odd_size*llda+1+bwu ), llda-1 )
796 IF ( mycol .NE. 0 )
THEN
806 CALL ztrrv2d( ictxt,
'U',
'N', prev_tri_size_m,
807 $ prev_tri_size_n, af( work_u+1 ), odd_size, 0,
814 CALL ztbtrs(
'L',
'N',
'U', odd_size, bwl, bwl,
815 $ a( ofst + bwu+1 ), llda, af( work_u+1 ),
824 CALL zlatcpy(
'L', up_prev_tri_size_n, up_prev_tri_size_m,
825 $ a( ofst+1 ), llda-1, af( 1 ), odd_size )
827 CALL ztbtrs(
'U',
'C',
'N', odd_size, bwu, bwu,
828 $ a( ofst + 1 ), llda,
829 $ af( 1 ), odd_size, info )
838 af( odd_size*bwu+2*mbw2+i ) = czero
841 CALL zgemm(
'C',
'N', bwu, bwl, odd_size,
842 $ -cone, af( 1 ), odd_size,
843 $ af( work_u+1 ), odd_size, czero,
844 $ af( 1+
max(0,bwl-bwu)+odd_size*bwu+
845 $ (2*max_bw+
max(0,bwu-bwl))*max_bw),
852 CALL zgesd2d( ictxt, max_bw, max_bw,
853 $ af( odd_size*bwu+2*mbw2+1 ), max_bw, 0,
857 IF ( mycol .LT. np-1 )
THEN
869 $ af( work_u+odd_size-bwl+1 ), odd_size,
870 $ af( (odd_size)*bwu+1+(max_bw-bwl) ),
873 CALL ztrmm(
'R',
'U',
'C',
'N', bwl, bwl, -cone,
874 $ a( ( ofst+(bwl+bwu+1)+(odd_size-bwl)*llda ) ),
875 $ llda-1, af( (odd_size)*bwu+1+(max_bw-bwl) ),
885 $ af( odd_size-bwu+1 ), odd_size,
886 $ af( work_u+(odd_size)*bwl+1+max_bw-bwu ),
889 CALL ztrmm(
'R',
'L',
'N',
'N', bwu, bwu, -cone,
890 $ a( ( ofst+1+odd_size*llda ) ), llda-1,
891 $ af( work_u+(odd_size)*bwl+1+max_bw-bwu ),
905 CALL igamx2d( ictxt,
'A',
' ', 1, 1, info, 1, info, info,
908 IF( mycol.EQ.0 )
THEN
909 CALL igebs2d( ictxt,
'A',
' ', 1, 1, info, 1 )
911 CALL igebr2d( ictxt,
'A',
' ', 1, 1, info, 1, 0, 0 )
914 IF ( info.NE.0 )
THEN
929 IF( mycol .EQ. npcol-1 )
THEN
937 IF( (mod( mycol+1, 2 ) .EQ. 0) .AND. ( mycol .GT. 0 ) )
THEN
939 CALL zgesd2d( ictxt, max_bw, max_bw,
940 $ af( odd_size*bwu+1 ),
941 $ max_bw, 0, mycol-1 )
943 CALL zgesd2d( ictxt, max_bw, max_bw,
944 $ af( work_u+odd_size*bwl+1 ),
945 $ max_bw, 0, mycol-1 )
952 CALL zlamov(
'N', max_bw, max_bw,
953 $ a( ofst+odd_size*llda+bwu+1 ),
954 $ llda-1, af( odd_size*bwu+mbw2+1 ),
959 IF( mycol.LT. npcol-1 )
THEN
961 CALL zgerv2d( ictxt, max_bw, max_bw,
962 $ af( odd_size*bwu+2*mbw2+1 ),
968 CALL zaxpy( mbw2, cone,
969 $ af( odd_size*bwu+2*mbw2+1 ),
970 $ 1, af( odd_size*bwu+mbw2+1 ), 1 )
985 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 )
GOTO 11
989 IF( mycol-level_dist .GE. 0 )
THEN
990 CALL zgerv2d( ictxt, max_bw, max_bw, work( 1 ),
991 $ max_bw, 0, mycol-level_dist )
993 CALL zaxpy( mbw2, cone, work( 1 ), 1,
994 $ af( odd_size*bwu+mbw2+1 ), 1 )
1000 IF( mycol+level_dist .LT. npcol-1 )
THEN
1001 CALL zgerv2d( ictxt, max_bw, max_bw, work( 1 ),
1002 $ max_bw, 0, mycol+level_dist )
1004 CALL zaxpy( mbw2, cone, work( 1 ), 1,
1005 $ af( odd_size*bwu+mbw2+1 ), 1 )
1009 level_dist = level_dist*2
1021 CALL zdbtrf( max_bw, max_bw,
min( max_bw-1, bwl ),
1022 $
min( max_bw-1, bwu ), af( odd_size*bwu+mbw2+1
1023 $ -(
min( max_bw-1, bwu ))), max_bw+1, info )
1025 IF( info.NE.0 )
THEN
1026 info = npcol + mycol
1034 IF( level_dist .EQ. 1 )
THEN
1035 comm_proc = mycol + 1
1040 CALL zlamov(
'N', max_bw, max_bw, af( odd_size*bwu+1 ),
1041 $ max_bw, af( work_u+odd_size*bwl+2*mbw2+1 ),
1044 CALL zlamov(
'N', max_bw, max_bw, af( work_u+odd_size*bwl+1 ),
1045 $ max_bw, af( odd_size*bwu+2*mbw2+1 ), max_bw )
1048 comm_proc = mycol + level_dist/2
1051 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )
THEN
1053 CALL zgerv2d( ictxt, max_bw, max_bw,
1054 $ af( odd_size*bwu+1 ),
1055 $ max_bw, 0, comm_proc )
1057 CALL zgerv2d( ictxt, max_bw, max_bw,
1058 $ af( work_u+odd_size*bwl+1 ),
1059 $ max_bw, 0, comm_proc )
1061 IF( info .EQ. 0 )
THEN
1068 $
'L',
'N',
'U', bwu,
min( bwl, bwu-1 ), bwu,
1070 $ mbw2+1+(max_bw+1)*(max_bw-bwu)), max_bw+1,
1071 $ af( work_u+odd_size*bwl+1+max_bw-bwu), max_bw, info )
1077 $
'U',
'C',
'N', bwl,
min( bwu, bwl-1 ), bwl,
1079 $ mbw2+1-
min( bwu, bwl-1 )+(max_bw+1)*(max_bw-bwl)), max_bw+1,
1080 $ af( odd_size*bwu+1+max_bw-bwl), max_bw, info )
1087 CALL zgemm(
'C',
'N', max_bw, max_bw, max_bw, -cone,
1088 $ af( (odd_size)*bwu+1 ), max_bw,
1089 $ af( work_u+(odd_size)*bwl+1 ), max_bw, czero,
1090 $ work( 1 ), max_bw )
1094 CALL zgesd2d( ictxt, max_bw, max_bw, work( 1 ), max_bw,
1095 $ 0, mycol+level_dist )
1105 IF( (mycol/level_dist .GT. 0 ).AND.
1106 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) )
THEN
1108 IF( level_dist .GT. 1)
THEN
1113 CALL zgerv2d( ictxt, max_bw, max_bw,
1114 $ af( work_u+odd_size*bwl+2*mbw2+1 ),
1115 $ max_bw, 0, mycol-level_dist/2 )
1120 CALL zgerv2d( ictxt, max_bw, max_bw,
1121 $ af( odd_size*bwu+2*mbw2+1 ),
1122 $ max_bw, 0, mycol-level_dist/2 )
1127 IF( info.EQ.0 )
THEN
1134 CALL zlatcpy(
'N', max_bw, max_bw, af( work_u+odd_size*bwl+
1135 $ 2*mbw2+1), max_bw, work( 1 ), max_bw )
1138 $
'L',
'N',
'U', max_bw,
min( bwl, max_bw-1 ), bwl,
1139 $ af( odd_size*bwu+mbw2+1), max_bw+1,
1140 $ work( 1+max_bw*(max_bw-bwl) ), max_bw, info )
1145 $
'N', max_bw, max_bw, work( 1 ), max_bw,
1146 $ af( work_u+odd_size*bwl+
1147 $ 2*mbw2+1), max_bw )
1153 CALL zlatcpy(
'N', max_bw, max_bw, af( odd_size*bwu+
1154 $ 2*mbw2+1), max_bw, work( 1 ), max_bw )
1157 $
'U',
'C',
'N', max_bw,
min( bwu, max_bw-1 ), bwu,
1158 $ af( odd_size*bwu+mbw2+1-
min( bwu, max_bw-1 )), max_bw+1,
1159 $ work( 1+max_bw*(max_bw-bwu) ), max_bw, info )
1164 $
'N', max_bw, max_bw, work( 1 ), max_bw,
1166 $ 2*mbw2+1), max_bw )
1175 CALL zgemm(
'N',
'C', max_bw, max_bw, max_bw, -cone,
1176 $ af( (odd_size)*bwu+2*mbw2+1 ), max_bw,
1177 $ af( work_u+(odd_size)*bwl+2*mbw2+1 ), max_bw,
1178 $ czero, work( 1 ), max_bw )
1182 CALL zgesd2d( ictxt, max_bw, max_bw, work( 1 ), max_bw,
1183 $ 0, mycol-level_dist )
1187 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )
THEN
1191 IF( ( mod( mycol/( 2*level_dist ),2 )) .EQ.0 )
THEN
1192 comm_proc = mycol + level_dist
1194 comm_proc = mycol - level_dist
1201 CALL zgemm(
'N',
'N', max_bw, max_bw, max_bw, -cone,
1202 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw,
1203 $ af( odd_size*bwu+1 ), max_bw, czero,
1204 $ work( 1 ), max_bw )
1208 CALL zgesd2d( ictxt, max_bw, max_bw, work( 1 ), max_bw,
1211 CALL zgemm(
'N',
'N', max_bw, max_bw, max_bw, -cone,
1212 $ af( odd_size*bwu+2*mbw2+1 ), max_bw,
1213 $ af( work_u+odd_size*bwl+1 ), max_bw, czero,
1214 $ work( 1 ), max_bw )
1218 CALL zgesd2d( ictxt, max_bw, max_bw, work( 1 ), max_bw,
1234 IF( ictxt_save .NE. ictxt_new )
THEN
1235 CALL blacs_gridexit( ictxt_new )
1247 work( 1 ) = work_size_min
1251 CALL igamx2d( ictxt,
'A',
' ', 1, 1, info, 1, info, info,
1254 IF( mycol.EQ.0 )
THEN
1255 CALL igebs2d( ictxt,
'A',
' ', 1, 1, info, 1 )
1257 CALL igebr2d( ictxt,
'A',
' ', 1, 1, info, 1, 0, 0 )