1 SUBROUTINE psdbtrf( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK,
9 INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N
13 REAL A( * ), AF( * ), WORK( * )
343 parameter( one = 1.0e+0 )
345 parameter( zero = 0.0e+0 )
347 parameter( int_one = 1 )
348 INTEGER DESCMULT, BIGNUM
349 parameter( descmult = 100, bignum = descmult*descmult )
350 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
351 $ lld_, mb_, m_, nb_, n_, rsrc_
352 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
353 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
354 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
357 INTEGER COMM_PROC, CSRC, FIRST_PROC, I, I1, I2, ICTXT,
358 $ ictxt_new, ictxt_save, idum3, ja_new, laf_min,
359 $ level_dist, llda, max_bw, mbw2, mycol, myrow,
360 $ my_num_cols, nb, next_tri_size_m,
361 $ next_tri_size_n, np, npcol, nprow, np_save,
362 $ odd_size, ofst, part_offset, part_size,
363 $ prev_tri_size_m, prev_tri_size_n, return_code,
364 $ store_n_a, up_prev_tri_size_m,
365 $ up_prev_tri_size_n, work_size_min, work_u
368 INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 )
371 EXTERNAL blacs_gridexit, blacs_gridinfo, saxpy,
sdbtrf,
373 $ slamov,
slatcpy, stbtrs, strmm, strrv2d,
374 $ strsd2d,
globchk, igamx2d, igebr2d, igebs2d,
397 IF( return_code.NE.0 )
THEN
403 ictxt = desca_1xp( 2 )
404 csrc = desca_1xp( 5 )
406 llda = desca_1xp( 6 )
407 store_n_a = desca_1xp( 3 )
414 max_bw =
max( bwl, bwu )
417 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
422 IF( lwork.LT.-1 )
THEN
424 ELSE IF( lwork.EQ.-1 )
THEN
434 IF( n+ja-1.GT.store_n_a )
THEN
438 IF( ( bwl.GT.n-1 ) .OR. ( bwl.LT.0 ) )
THEN
442 IF( ( bwu.GT.n-1 ) .OR. ( bwu.LT.0 ) )
THEN
446 IF( llda.LT.( bwl+bwu+1 ) )
THEN
456 IF( nprow.NE.1 )
THEN
460 IF( n.GT.np*nb-mod( ja-1, nb ) )
THEN
462 CALL pxerbla( ictxt,
'PSDBTRF, D&C alg.: only 1 block per proc'
467 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*
max( bwl, bwu ) ) )
THEN
469 CALL pxerbla( ictxt,
'PSDBTRF, D&C alg.: NB too small', -info )
476 laf_min = nb*( bwl+bwu ) + 6*
max( bwl, bwu )*
max( bwl, bwu )
478 IF( laf.LT.laf_min )
THEN
482 CALL pxerbla( ictxt,
'PSDBTRF: auxiliary storage error ',
489 work_size_min =
max( bwl, bwu )*
max( bwl, bwu )
491 work( 1 ) = work_size_min
493 IF( lwork.LT.work_size_min )
THEN
494 IF( lwork.NE.-1 )
THEN
496 CALL pxerbla( ictxt,
'PSDBTRF: worksize error ', -info )
503 param_check( 9, 1 ) = desca( 5 )
504 param_check( 8, 1 ) = desca( 4 )
505 param_check( 7, 1 ) = desca( 3 )
506 param_check( 6, 1 ) = desca( 1 )
507 param_check( 5, 1 ) = ja
508 param_check( 4, 1 ) = bwu
509 param_check( 3, 1 ) = bwl
510 param_check( 2, 1 ) = n
511 param_check( 1, 1 ) = idum3
513 param_check( 9, 2 ) = 605
514 param_check( 8, 2 ) = 604
515 param_check( 7, 2 ) = 603
516 param_check( 6, 2 ) = 601
517 param_check( 5, 2 ) = 5
518 param_check( 4, 2 ) = 3
519 param_check( 3, 2 ) = 2
520 param_check( 2, 2 ) = 1
521 param_check( 1, 2 ) = 10
529 ELSE IF( info.LT.-descmult )
THEN
532 info = -info*descmult
537 CALL globchk( ictxt, 9, param_check, 9, param_check( 1, 3 ),
543 IF( info.EQ.bignum )
THEN
545 ELSE IF( mod( info, descmult ).EQ.0 )
THEN
546 info = -info / descmult
552 CALL pxerbla( ictxt,
'PSDBTRF', -info )
565 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
567 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb )
THEN
568 part_offset = part_offset + nb
571 IF( mycol.LT.csrc )
THEN
572 part_offset = part_offset - nb
581 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
585 ja_new = mod( ja-1, nb ) + 1
590 np = ( ja_new+n-2 ) / nb + 1
594 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
601 desca_1xp( 2 ) = ictxt_new
605 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
609 IF( myrow.LT.0 )
THEN
622 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
626 IF( mycol.EQ.0 )
THEN
627 part_offset = part_offset + mod( ja_new-1, part_size )
628 my_num_cols = my_num_cols - mod( ja_new-1, part_size )
633 ofst = part_offset*llda
637 odd_size = my_num_cols
638 IF( mycol.LT.np-1 )
THEN
639 odd_size = odd_size - max_bw
644 work_u = bwu*odd_size + 3*mbw2
655 DO 20 i = 1, work_size_min
669 IF( mycol.GT.0 )
THEN
670 prev_tri_size_m =
min( bwl, numroc( n, part_size, mycol, 0,
672 prev_tri_size_n =
min( bwl, numroc( n, part_size, mycol-1, 0,
676 IF( mycol.GT.0 )
THEN
677 up_prev_tri_size_m =
min( bwu,
678 $ numroc( n, part_size, mycol, 0, npcol ) )
679 up_prev_tri_size_n =
min( bwu,
680 $ numroc( n, part_size, mycol-1, 0,
684 IF( mycol.LT.npcol-1 )
THEN
685 next_tri_size_m =
min( bwl, numroc( n, part_size, mycol+1, 0,
687 next_tri_size_n =
min( bwl, numroc( n, part_size, mycol, 0,
691 IF( mycol.LT.np-1 )
THEN
697 CALL strsd2d( ictxt,
'U',
'N', next_tri_size_m,
698 $ next_tri_size_n, a( ofst+( my_num_cols-bwl )*
699 $ llda+( bwl+bwu+1 ) ), llda-1, 0, mycol+1 )
706 CALL sdbtrf( odd_size, odd_size, bwl, bwu, a( ofst+1 ), llda,
715 IF( mycol.LT.np-1 )
THEN
722 CALL slatcpy(
'U', bwl, bwl, a( ( ofst+( bwl+bwu+1 )+
723 $ ( odd_size-bwl )*llda ) ), llda-1,
724 $ af( odd_size*bwu+2*mbw2+1+max_bw-bwl ), max_bw )
725 CALL slamov(
'L', bwu, bwu, a( ( ofst+1+odd_size*llda ) ),
726 $ llda-1, af( work_u+odd_size*bwl+2*mbw2+1+max_bw-
731 CALL stbtrs(
'L',
'N',
'U', bwu, bwl, bwu,
732 $ a( ofst+bwu+1+( odd_size-bwu )*llda ), llda,
733 $ af( work_u+odd_size*bwl+2*mbw2+1+max_bw-bwu ),
738 CALL stbtrs(
'U',
'T',
'N', bwl, bwu, bwl,
739 $ a( ofst+1+( odd_size-bwl )*llda ), llda,
740 $ af( odd_size*bwu+2*mbw2+1+max_bw-bwl ), max_bw,
746 CALL slatcpy(
'L', bwl, bwl, af( odd_size*bwu+2*mbw2+1+max_bw-
747 $ bwl ), max_bw, a( ( ofst+( bwl+bwu+1 )+
748 $ ( odd_size-bwl )*llda ) ), llda-1 )
752 CALL slamov(
'L', bwu, bwu, af( work_u+odd_size*bwl+2*mbw2+1+
753 $ max_bw-bwu ), max_bw, a( ( ofst+1+odd_size*
764 CALL sgemm(
'T',
'N', max_bw, max_bw, max_bw, -one,
765 $ af( odd_size*bwu+2*mbw2+1 ), max_bw,
766 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw, one,
767 $ a( ofst+odd_size*llda+1+bwu ), llda-1 )
776 IF( mycol.NE.0 )
THEN
786 CALL strrv2d( ictxt,
'U',
'N', prev_tri_size_m,
787 $ prev_tri_size_n, af( work_u+1 ), bwl, 0,
797 DO 40 i2 = i1 + 1, bwl
798 af( work_u+i2+( i1-1 )*bwl ) = af( work_u+i1+( i2-1 )*
800 af( work_u+i1+( i2-1 )*bwl ) = zero
804 DO 60 i1 = 2, odd_size
805 i2 =
min( i1-1, bwl )
806 CALL sgemv(
'N', bwl, i2, -one,
807 $ af( work_u+1+( i1-1-i2 )*bwl ), bwl,
808 $ a( ofst+bwu+1+i2+( i1-1-i2 )*llda ), llda-1,
809 $ one, af( work_u+1+( i1-1 )*bwl ), 1 )
818 CALL slamov(
'L', up_prev_tri_size_n, up_prev_tri_size_m,
819 $ a( ofst+1 ), llda-1, af( 1 ), bwu )
821 DO 80 i1 = 1, odd_size
822 i2 =
min( bwu, i1-1 )
823 CALL sgemv(
'N', bwu, i2, -one, af( ( i1-1-i2 )*bwu+1 ),
824 $ bwu, a( ofst+bwu+1-i2+( i1-1 )*llda ), 1,
825 $ one, af( ( i1-1 )*bwu+1 ), 1 )
828 af( ( i1-1 )*bwu+i ) = af( ( i1-1 )*bwu+i ) /
829 $ a( ( i1-1 )*llda+bwu+1 )
839 af( odd_size*bwu+2*mbw2+i ) = zero
842 CALL sgemm(
'N',
'T', bwu, bwl, odd_size, -one, af( 1 ),
843 $ bwu, af( work_u+1 ), bwl, zero,
844 $ af( 1+
max( 0, bwl-bwu )+odd_size*bwu+( 2*max_bw+
845 $
max( 0, bwu-bwl ) )*max_bw ), max_bw )
851 CALL sgesd2d( ictxt, max_bw, max_bw,
852 $ af( odd_size*bwu+2*mbw2+1 ), max_bw, 0,
856 IF( mycol.LT.np-1 )
THEN
867 CALL slamov(
'N', bwl, bwl,
868 $ af( work_u+( odd_size-bwl )*bwl+1 ), bwl,
869 $ af( ( odd_size )*bwu+1+( max_bw-bwl ) ),
872 CALL strmm(
'R',
'U',
'T',
'N', bwl, bwl, -one,
873 $ a( ( ofst+( bwl+bwu+1 )+( odd_size-bwl )*
874 $ llda ) ), llda-1, af( ( odd_size )*bwu+1+
875 $ ( max_bw-bwl ) ), max_bw )
883 CALL slamov(
'N', bwu, bwu, af( ( odd_size-bwu )*bwu+1 ),
884 $ bwu, af( work_u+( odd_size )*bwl+1+max_bw-
887 CALL strmm(
'R',
'L',
'N',
'N', bwu, bwu, -one,
888 $ a( ( ofst+1+odd_size*llda ) ), llda-1,
889 $ af( work_u+( odd_size )*bwl+1+max_bw-bwu ),
903 CALL igamx2d( ictxt,
'A',
' ', 1, 1, info, 1, info, info, -1, 0,
906 IF( mycol.EQ.0 )
THEN
907 CALL igebs2d( ictxt,
'A',
' ', 1, 1, info, 1 )
909 CALL igebr2d( ictxt,
'A',
' ', 1, 1, info, 1, 0, 0 )
927 IF( mycol.EQ.npcol-1 )
THEN
935 IF( ( mod( mycol+1, 2 ).EQ.0 ) .AND. ( mycol.GT.0 ) )
THEN
937 CALL sgesd2d( ictxt, max_bw, max_bw, af( odd_size*bwu+1 ),
938 $ max_bw, 0, mycol-1 )
940 CALL sgesd2d( ictxt, max_bw, max_bw,
941 $ af( work_u+odd_size*bwl+1 ), max_bw, 0, mycol-1 )
948 CALL slamov(
'N', max_bw, max_bw, a( ofst+odd_size*llda+bwu+1 ),
949 $ llda-1, af( odd_size*bwu+mbw2+1 ), max_bw )
953 IF( mycol.LT.npcol-1 )
THEN
955 CALL sgerv2d( ictxt, max_bw, max_bw,
956 $ af( odd_size*bwu+2*mbw2+1 ), max_bw, 0, mycol+1 )
960 CALL saxpy( mbw2, one, af( odd_size*bwu+2*mbw2+1 ), 1,
961 $ af( odd_size*bwu+mbw2+1 ), 1 )
976 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
981 IF( mycol-level_dist.GE.0 )
THEN
982 CALL sgerv2d( ictxt, max_bw, max_bw, work( 1 ), max_bw, 0,
985 CALL saxpy( mbw2, one, work( 1 ), 1, af( odd_size*bwu+mbw2+1 ),
992 IF( mycol+level_dist.LT.npcol-1 )
THEN
993 CALL sgerv2d( ictxt, max_bw, max_bw, work( 1 ), max_bw, 0,
996 CALL saxpy( mbw2, one, work( 1 ), 1, af( odd_size*bwu+mbw2+1 ),
1001 level_dist = level_dist*2
1013 CALL sdbtrf( max_bw, max_bw,
min( max_bw-1, bwl ),
1014 $
min( max_bw-1, bwu ), af( odd_size*bwu+mbw2+1-
1015 $ (
min( max_bw-1, bwu ) ) ), max_bw+1, info )
1017 IF( info.NE.0 )
THEN
1018 info = npcol + mycol
1026 IF( level_dist.EQ.1 )
THEN
1027 comm_proc = mycol + 1
1032 CALL slamov(
'N', max_bw, max_bw, af( odd_size*bwu+1 ), max_bw,
1033 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw )
1035 CALL slamov(
'N', max_bw, max_bw, af( work_u+odd_size*bwl+1 ),
1036 $ max_bw, af( odd_size*bwu+2*mbw2+1 ), max_bw )
1039 comm_proc = mycol + level_dist / 2
1042 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1044 CALL sgerv2d( ictxt, max_bw, max_bw, af( odd_size*bwu+1 ),
1045 $ max_bw, 0, comm_proc )
1047 CALL sgerv2d( ictxt, max_bw, max_bw,
1048 $ af( work_u+odd_size*bwl+1 ), max_bw, 0,
1051 IF( info.EQ.0 )
THEN
1057 CALL stbtrs(
'L',
'N',
'U', bwu,
min( bwl, bwu-1 ), bwu,
1058 $ af( odd_size*bwu+mbw2+1+( max_bw+1 )*( max_bw-
1059 $ bwu ) ), max_bw+1, af( work_u+odd_size*bwl+1+
1060 $ max_bw-bwu ), max_bw, info )
1065 CALL stbtrs(
'U',
'T',
'N', bwl,
min( bwu, bwl-1 ), bwl,
1066 $ af( odd_size*bwu+mbw2+1-
min( bwu,
1067 $ bwl-1 )+( max_bw+1 )*( max_bw-bwl ) ),
1068 $ max_bw+1, af( odd_size*bwu+1+max_bw-bwl ),
1076 CALL sgemm(
'T',
'N', max_bw, max_bw, max_bw, -one,
1077 $ af( ( odd_size )*bwu+1 ), max_bw,
1078 $ af( work_u+( odd_size )*bwl+1 ), max_bw, zero,
1079 $ work( 1 ), max_bw )
1083 CALL sgesd2d( ictxt, max_bw, max_bw, work( 1 ), max_bw, 0,
1084 $ mycol+level_dist )
1094 IF( ( mycol / level_dist.GT.0 ) .AND.
1095 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
THEN
1097 IF( level_dist.GT.1 )
THEN
1102 CALL sgerv2d( ictxt, max_bw, max_bw,
1103 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw, 0,
1104 $ mycol-level_dist / 2 )
1109 CALL sgerv2d( ictxt, max_bw, max_bw,
1110 $ af( odd_size*bwu+2*mbw2+1 ), max_bw, 0,
1111 $ mycol-level_dist / 2 )
1116 IF( info.EQ.0 )
THEN
1123 CALL slatcpy(
'N', max_bw, max_bw,
1124 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw,
1125 $ work( 1 ), max_bw )
1127 CALL stbtrs(
'L',
'N',
'U', max_bw,
min( bwl, max_bw-1 ),
1128 $ bwl, af( odd_size*bwu+mbw2+1 ), max_bw+1,
1129 $ work( 1+max_bw*( max_bw-bwl ) ), max_bw, info )
1133 CALL slatcpy(
'N', max_bw, max_bw, work( 1 ), max_bw,
1134 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw )
1140 CALL slatcpy(
'N', max_bw, max_bw,
1141 $ af( odd_size*bwu+2*mbw2+1 ), max_bw,
1142 $ work( 1 ), max_bw )
1144 CALL stbtrs(
'U',
'T',
'N', max_bw,
min( bwu, max_bw-1 ),
1145 $ bwu, af( odd_size*bwu+mbw2+1-
min( bwu,
1146 $ max_bw-1 ) ), max_bw+1,
1147 $ work( 1+max_bw*( max_bw-bwu ) ), max_bw, info )
1151 CALL slatcpy(
'N', max_bw, max_bw, work( 1 ), max_bw,
1152 $ af( odd_size*bwu+2*mbw2+1 ), max_bw )
1161 CALL sgemm(
'N',
'T', max_bw, max_bw, max_bw, -one,
1162 $ af( ( odd_size )*bwu+2*mbw2+1 ), max_bw,
1163 $ af( work_u+( odd_size )*bwl+2*mbw2+1 ), max_bw,
1164 $ zero, work( 1 ), max_bw )
1168 CALL sgesd2d( ictxt, max_bw, max_bw, work( 1 ), max_bw, 0,
1169 $ mycol-level_dist )
1173 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1177 IF( ( mod( mycol / ( 2*level_dist ), 2 ) ).EQ.0 )
THEN
1178 comm_proc = mycol + level_dist
1180 comm_proc = mycol - level_dist
1187 CALL sgemm(
'N',
'N', max_bw, max_bw, max_bw, -one,
1188 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw,
1189 $ af( odd_size*bwu+1 ), max_bw, zero, work( 1 ),
1194 CALL sgesd2d( ictxt, max_bw, max_bw, work( 1 ), max_bw, 0,
1197 CALL sgemm(
'N',
'N', max_bw, max_bw, max_bw, -one,
1198 $ af( odd_size*bwu+2*mbw2+1 ), max_bw,
1199 $ af( work_u+odd_size*bwl+1 ), max_bw, zero,
1200 $ work( 1 ), max_bw )
1204 CALL sgesd2d( ictxt, max_bw, max_bw, work( 1 ), max_bw, 0,
1220 IF( ictxt_save.NE.ictxt_new )
THEN
1221 CALL blacs_gridexit( ictxt_new )
1233 work( 1 ) = work_size_min
1237 CALL igamx2d( ictxt,
'A',
' ', 1, 1, info, 1, info, info, -1, 0,
1240 IF( mycol.EQ.0 )
THEN
1241 CALL igebs2d( ictxt,
'A',
' ', 1, 1, info, 1 )
1243 CALL igebr2d( ictxt,
'A',
' ', 1, 1, info, 1, 0, 0 )