1 SUBROUTINE pddbtrsv( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA,
2 $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO )
10 INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS
13 INTEGER DESCA( * ), DESCB( * )
14 DOUBLE PRECISION A( * ), AF( * ), B( * ), WORK( * )
374 parameter( one = 1.0d+0 )
375 DOUBLE PRECISION ZERO
376 parameter( zero = 0.0d+0 )
378 parameter( int_one = 1 )
379 INTEGER DESCMULT, BIGNUM
380 parameter( descmult = 100, bignum = descmult*descmult )
381 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
382 $ lld_, mb_, m_, nb_, n_, rsrc_
383 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
384 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
385 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
388 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
389 $ idum1, idum2, idum3, ja_new, level_dist, llda,
390 $ lldb, max_bw, mbw2, mycol, myrow, my_num_cols,
391 $ nb, np, npcol, nprow, np_save, odd_size, ofst,
392 $ part_offset, part_size, return_code, store_m_b,
393 $ store_n_a, work_size_min, work_u
396 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
397 $ param_check( 18, 3 )
401 $ dgemm, dgerv2d, dgesd2d, dlamov,
dmatadd,
407 EXTERNAL lsame, numroc
410 INTRINSIC ichar,
max,
min, mod
426 IF( return_code.NE.0 )
THEN
432 IF( return_code.NE.0 )
THEN
439 IF( desca_1xp( 2 ).NE.descb_px1( 2 ) )
THEN
447 IF( desca_1xp( 4 ).NE.descb_px1( 4 ) )
THEN
453 IF( desca_1xp( 5 ).NE.descb_px1( 5 ) )
THEN
459 ictxt = desca_1xp( 2 )
460 csrc = desca_1xp( 5 )
462 llda = desca_1xp( 6 )
463 store_n_a = desca_1xp( 3 )
464 lldb = descb_px1( 6 )
465 store_m_b = descb_px1( 3 )
472 max_bw =
max( bwl, bwu )
475 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
480 IF( lsame( uplo,
'U' ) )
THEN
482 ELSE IF( lsame( uplo,
'L' ) )
THEN
488 IF( lsame( trans,
'N' ) )
THEN
490 ELSE IF( lsame( trans,
'T' ) )
THEN
492 ELSE IF( lsame( trans,
'C' ) )
THEN
498 IF( lwork.LT.-1 )
THEN
500 ELSE IF( lwork.EQ.-1 )
THEN
510 IF( n+ja-1.GT.store_n_a )
THEN
514 IF( ( bwl.GT.n-1 ) .OR. ( bwl.LT.0 ) )
THEN
518 IF( ( bwu.GT.n-1 ) .OR. ( bwu.LT.0 ) )
THEN
522 IF( llda.LT.( bwl+bwu+1 ) )
THEN
530 IF( n+ib-1.GT.store_m_b )
THEN
534 IF( lldb.LT.nb )
THEN
550 IF( nprow.NE.1 )
THEN
554 IF( n.GT.np*nb-mod( ja-1, nb ) )
THEN
557 $
'PDDBTRSV, D&C alg.: only 1 block per proc',
562 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*
max( bwl, bwu ) ) )
THEN
564 CALL pxerbla( ictxt,
'PDDBTRSV, D&C alg.: NB too small',
570 work_size_min =
max( bwl, bwu )*nrhs
572 work( 1 ) = work_size_min
574 IF( lwork.LT.work_size_min )
THEN
575 IF( lwork.NE.-1 )
THEN
577 CALL pxerbla( ictxt,
'PDDBTRSV: worksize error', -info )
584 param_check( 18, 1 ) = descb( 5 )
585 param_check( 17, 1 ) = descb( 4 )
586 param_check( 16, 1 ) = descb( 3 )
587 param_check( 15, 1 ) = descb( 2 )
588 param_check( 14, 1 ) = descb( 1 )
589 param_check( 13, 1 ) = ib
590 param_check( 12, 1 ) = desca( 5 )
591 param_check( 11, 1 ) = desca( 4 )
592 param_check( 10, 1 ) = desca( 3 )
593 param_check( 9, 1 ) = desca( 1 )
594 param_check( 8, 1 ) = ja
595 param_check( 7, 1 ) = nrhs
596 param_check( 6, 1 ) = bwu
597 param_check( 5, 1 ) = bwl
598 param_check( 4, 1 ) = n
599 param_check( 3, 1 ) = idum3
600 param_check( 2, 1 ) = idum2
601 param_check( 1, 1 ) = idum1
603 param_check( 18, 2 ) = 1205
604 param_check( 17, 2 ) = 1204
605 param_check( 16, 2 ) = 1203
606 param_check( 15, 2 ) = 1202
607 param_check( 14, 2 ) = 1201
608 param_check( 13, 2 ) = 11
609 param_check( 12, 2 ) = 905
610 param_check( 11, 2 ) = 904
611 param_check( 10, 2 ) = 903
612 param_check( 9, 2 ) = 901
613 param_check( 8, 2 ) = 8
614 param_check( 7, 2 ) = 6
615 param_check( 6, 2 ) = 5
616 param_check( 5, 2 ) = 4
617 param_check( 4, 2 ) = 3
618 param_check( 3, 2 ) = 16
619 param_check( 2, 2 ) = 2
620 param_check( 1, 2 ) = 1
628 ELSE IF( info.LT.-descmult )
THEN
631 info = -info*descmult
636 CALL globchk( ictxt, 18, param_check, 18, param_check( 1, 3 ),
642 IF( info.EQ.bignum )
THEN
644 ELSE IF( mod( info, descmult ).EQ.0 )
THEN
645 info = -info / descmult
651 CALL pxerbla( ictxt,
'PDDBTRSV', -info )
667 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
669 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb )
THEN
670 part_offset = part_offset + nb
673 IF( mycol.LT.csrc )
THEN
674 part_offset = part_offset - nb
683 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
687 ja_new = mod( ja-1, nb ) + 1
692 np = ( ja_new+n-2 ) / nb + 1
696 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
703 desca_1xp( 2 ) = ictxt_new
704 descb_px1( 2 ) = ictxt_new
708 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
712 IF( myrow.LT.0 )
THEN
725 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
729 IF( mycol.EQ.0 )
THEN
730 part_offset = part_offset + mod( ja_new-1, part_size )
731 my_num_cols = my_num_cols - mod( ja_new-1, part_size )
736 ofst = part_offset*llda
740 odd_size = my_num_cols
741 IF( mycol.LT.np-1 )
THEN
742 odd_size = odd_size - max_bw
747 work_u = bwu*odd_size + 3*mbw2
753 IF( lsame( uplo,
'L' ) )
THEN
755 IF( lsame( trans,
'N' ) )
THEN
766 CALL dtbtrs( uplo,
'N',
'U', odd_size, bwl, nrhs,
767 $ a( ofst+1+bwu ), llda, b( part_offset+1 ),
771 IF( mycol.LT.np-1 )
THEN
779 CALL dlamov(
'N', bwl, nrhs,
780 $ b( part_offset+odd_size-bwl+1 ), lldb,
781 $ work( 1 ), max_bw )
783 CALL dtrmm(
'L',
'U',
'N',
'N', bwl, nrhs, -one,
784 $ a( ( ofst+( bwl+bwu+1 )+( odd_size-bwl )*
785 $ llda ) ), llda-1, work( 1 ), max_bw )
787 CALL dmatadd( bwl, nrhs, one, work( 1 ), max_bw, one,
788 $ b( part_offset+odd_size+1 ), lldb )
794 DO 10 idum1 = 1, work_size_min
799 IF( mycol.NE.0 )
THEN
804 CALL dgemm(
'N',
'N', bwu, nrhs, odd_size, -one, af( 1 ),
805 $ bwu, b( part_offset+1 ), lldb, zero,
806 $ work( 1+max_bw-bwu ), max_bw )
818 IF( mycol.GT.0 )
THEN
820 CALL dgesd2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
827 IF( mycol.LT.npcol-1 )
THEN
829 CALL dgerv2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
834 CALL dmatadd( max_bw, nrhs, one, work( 1 ), max_bw, one,
835 $ b( part_offset+odd_size+1 ), lldb )
842 IF( mycol.EQ.npcol-1 )
THEN
857 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
862 IF( mycol-level_dist.GE.0 )
THEN
864 CALL dgerv2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
867 CALL dmatadd( max_bw, nrhs, one, work( 1 ), max_bw, one,
868 $ b( part_offset+odd_size+1 ), lldb )
874 IF( mycol+level_dist.LT.npcol-1 )
THEN
876 CALL dgerv2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
879 CALL dmatadd( max_bw, nrhs, one, work( 1 ), max_bw, one,
880 $ b( part_offset+odd_size+1 ), lldb )
884 level_dist = level_dist*2
897 CALL dtbtrs(
'L',
'N',
'U', max_bw,
min( bwl, max_bw-1 ),
898 $ nrhs, af( odd_size*bwu+mbw2+1 ), max_bw+1,
899 $ b( part_offset+odd_size+1 ), lldb, info )
908 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
912 CALL dgemm(
'T',
'N', max_bw, nrhs, max_bw, -one,
913 $ af( ( odd_size )*bwu+1 ), max_bw,
914 $ b( part_offset+odd_size+1 ), lldb, zero,
915 $ work( 1 ), max_bw )
919 CALL dgesd2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
926 IF( ( mycol / level_dist.GT.0 ) .AND.
927 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
934 CALL dgemm(
'N',
'N', max_bw, nrhs, max_bw, -one,
935 $ af( odd_size*bwu+2*mbw2+1 ), max_bw,
936 $ b( part_offset+odd_size+1 ), lldb, zero,
937 $ work( 1 ), max_bw )
941 CALL dgesd2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
961 IF( mycol.EQ.npcol-1 )
THEN
969 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
972 level_dist = level_dist*2
978 IF( ( mycol / level_dist.GT.0 ) .AND.
979 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
984 CALL dgerv2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
991 CALL dgemm(
'T',
'N', max_bw, nrhs, max_bw, -one,
992 $ af( odd_size*bwu+2*mbw2+1 ), max_bw,
993 $ work( 1 ), max_bw, one,
994 $ b( part_offset+odd_size+1 ), lldb )
999 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1003 CALL dgerv2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
1004 $ mycol+level_dist )
1008 CALL dgemm(
'N',
'N', max_bw, nrhs, max_bw, -one,
1009 $ af( ( odd_size )*bwu+1 ), max_bw, work( 1 ),
1010 $ max_bw, one, b( part_offset+odd_size+1 ),
1019 CALL dtbtrs(
'L',
'T',
'U', max_bw,
min( bwl, max_bw-1 ),
1020 $ nrhs, af( odd_size*bwu+mbw2+1 ), max_bw+1,
1021 $ b( part_offset+odd_size+1 ), lldb, info )
1023 IF( info.NE.0 )
THEN
1032 IF( level_dist.EQ.1 )
1035 level_dist = level_dist / 2
1039 IF( mycol+level_dist.LT.npcol-1 )
THEN
1041 CALL dgesd2d( ictxt, max_bw, nrhs,
1042 $ b( part_offset+odd_size+1 ), lldb, 0,
1043 $ mycol+level_dist )
1049 IF( mycol-level_dist.GE.0 )
THEN
1051 CALL dgesd2d( ictxt, max_bw, nrhs,
1052 $ b( part_offset+odd_size+1 ), lldb, 0,
1053 $ mycol-level_dist )
1071 IF( mycol.LT.npcol-1 )
THEN
1073 CALL dgesd2d( ictxt, max_bw, nrhs,
1074 $ b( part_offset+odd_size+1 ), lldb, 0,
1081 IF( mycol.GT.0 )
THEN
1083 CALL dgerv2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
1094 IF( mycol.NE.0 )
THEN
1098 CALL dgemm(
'T',
'N', odd_size, nrhs, bwu, -one, af( 1 ),
1099 $ bwu, work( 1+max_bw-bwu ), max_bw, one,
1100 $ b( part_offset+1 ), lldb )
1105 IF( mycol.LT.np-1 )
THEN
1113 CALL dlamov(
'N', bwl, nrhs, b( part_offset+odd_size+1 ),
1114 $ lldb, work( 1+max_bw-bwl ), max_bw )
1116 CALL dtrmm(
'L',
'U',
'T',
'N', bwl, nrhs, -one,
1117 $ a( ( ofst+( bwl+bwu+1 )+( odd_size-bwl )*
1118 $ llda ) ), llda-1, work( 1+max_bw-bwl ),
1121 CALL dmatadd( bwl, nrhs, one, work( 1+max_bw-bwl ),
1122 $ max_bw, one, b( part_offset+odd_size-bwl+
1129 CALL dtbtrs( uplo,
'T',
'U', odd_size, bwl, nrhs,
1130 $ a( ofst+1+bwu ), llda, b( part_offset+1 ),
1141 IF( lsame( trans,
'T' ) )
THEN
1152 CALL dtbtrs( uplo,
'T',
'N', odd_size, bwu, nrhs,
1153 $ a( ofst+1 ), llda, b( part_offset+1 ), lldb,
1157 IF( mycol.LT.np-1 )
THEN
1165 CALL dlamov(
'N', bwu, nrhs,
1166 $ b( part_offset+odd_size-bwu+1 ), lldb,
1167 $ work( 1 ), max_bw )
1169 CALL dtrmm(
'L',
'L',
'T',
'N', bwu, nrhs, -one,
1170 $ a( ( ofst+1+odd_size*llda ) ), llda-1,
1171 $ work( 1 ), max_bw )
1173 CALL dmatadd( bwu, nrhs, one, work( 1 ), max_bw, one,
1174 $ b( part_offset+odd_size+1 ), lldb )
1180 DO 100 idum1 = 1, work_size_min
1185 IF( mycol.NE.0 )
THEN
1189 CALL dgemm(
'N',
'N', bwl, nrhs, odd_size, -one,
1190 $ af( work_u+1 ), bwl, b( part_offset+1 ),
1191 $ lldb, zero, work( 1+max_bw-bwl ), max_bw )
1202 IF( mycol.GT.0 )
THEN
1204 CALL dgesd2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
1211 IF( mycol.LT.npcol-1 )
THEN
1213 CALL dgerv2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
1218 CALL dmatadd( max_bw, nrhs, one, work( 1 ), max_bw, one,
1219 $ b( part_offset+odd_size+1 ), lldb )
1226 IF( mycol.EQ.npcol-1 )
THEN
1241 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
1246 IF( mycol-level_dist.GE.0 )
THEN
1248 CALL dgerv2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
1249 $ mycol-level_dist )
1251 CALL dmatadd( max_bw, nrhs, one, work( 1 ), max_bw, one,
1252 $ b( part_offset+odd_size+1 ), lldb )
1258 IF( mycol+level_dist.LT.npcol-1 )
THEN
1260 CALL dgerv2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
1261 $ mycol+level_dist )
1263 CALL dmatadd( max_bw, nrhs, one, work( 1 ), max_bw, one,
1264 $ b( part_offset+odd_size+1 ), lldb )
1268 level_dist = level_dist*2
1281 CALL dtbtrs(
'U',
'T',
'N', max_bw,
min( bwu, max_bw-1 ),
1282 $ nrhs, af( odd_size*bwu+mbw2+1-
min( bwu,
1283 $ max_bw-1 ) ), max_bw+1,
1284 $ b( part_offset+odd_size+1 ), lldb, info )
1286 IF( info.NE.0 )
THEN
1293 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1297 CALL dgemm(
'T',
'N', max_bw, nrhs, max_bw, -one,
1298 $ af( work_u+( odd_size )*bwl+1 ), max_bw,
1299 $ b( part_offset+odd_size+1 ), lldb, zero,
1300 $ work( 1 ), max_bw )
1304 CALL dgesd2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
1305 $ mycol+level_dist )
1311 IF( ( mycol / level_dist.GT.0 ) .AND.
1312 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
1319 CALL dgemm(
'N',
'N', max_bw, nrhs, max_bw, -one,
1320 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw,
1321 $ b( part_offset+odd_size+1 ), lldb, zero,
1322 $ work( 1 ), max_bw )
1326 CALL dgesd2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
1327 $ mycol-level_dist )
1346 IF( mycol.EQ.npcol-1 )
THEN
1354 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
1357 level_dist = level_dist*2
1363 IF( ( mycol / level_dist.GT.0 ) .AND.
1364 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
1369 CALL dgerv2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
1370 $ mycol-level_dist )
1376 CALL dgemm(
'T',
'N', max_bw, nrhs, max_bw, -one,
1377 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw,
1378 $ work( 1 ), max_bw, one,
1379 $ b( part_offset+odd_size+1 ), lldb )
1384 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1388 CALL dgerv2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
1389 $ mycol+level_dist )
1393 CALL dgemm(
'N',
'N', max_bw, nrhs, max_bw, -one,
1394 $ af( work_u+( odd_size )*bwl+1 ), max_bw,
1395 $ work( 1 ), max_bw, one,
1396 $ b( part_offset+odd_size+1 ), lldb )
1404 CALL dtbtrs(
'U',
'N',
'N', max_bw,
min( bwu, max_bw-1 ),
1405 $ nrhs, af( odd_size*bwu+mbw2+1-
min( bwu,
1406 $ max_bw-1 ) ), max_bw+1,
1407 $ b( part_offset+odd_size+1 ), lldb, info )
1409 IF( info.NE.0 )
THEN
1418 IF( level_dist.EQ.1 )
1421 level_dist = level_dist / 2
1425 IF( mycol+level_dist.LT.npcol-1 )
THEN
1427 CALL dgesd2d( ictxt, max_bw, nrhs,
1428 $ b( part_offset+odd_size+1 ), lldb, 0,
1429 $ mycol+level_dist )
1435 IF( mycol-level_dist.GE.0 )
THEN
1437 CALL dgesd2d( ictxt, max_bw, nrhs,
1438 $ b( part_offset+odd_size+1 ), lldb, 0,
1439 $ mycol-level_dist )
1457 IF( mycol.LT.npcol-1 )
THEN
1459 CALL dgesd2d( ictxt, max_bw, nrhs,
1460 $ b( part_offset+odd_size+1 ), lldb, 0,
1467 IF( mycol.GT.0 )
THEN
1469 CALL dgerv2d( ictxt, max_bw, nrhs, work( 1 ), max_bw, 0,
1480 IF( mycol.NE.0 )
THEN
1484 CALL dgemm(
'T',
'N', odd_size, nrhs, bwl, -one,
1485 $ af( work_u+1 ), bwl, work( 1+max_bw-bwl ),
1486 $ max_bw, one, b( part_offset+1 ), lldb )
1491 IF( mycol.LT.np-1 )
THEN
1499 CALL dlamov(
'N', bwu, nrhs, b( part_offset+odd_size+1 ),
1500 $ lldb, work( 1+max_bw-bwu ), max_bw+bwl )
1502 CALL dtrmm(
'L',
'L',
'N',
'N', bwu, nrhs, -one,
1503 $ a( ( ofst+1+odd_size*llda ) ), llda-1,
1504 $ work( 1+max_bw-bwu ), max_bw+bwl )
1506 CALL dmatadd( bwu, nrhs, one, work( 1+max_bw-bwu ),
1507 $ max_bw+bwl, one, b( part_offset+odd_size-
1514 CALL dtbtrs( uplo,
'N',
'N', odd_size, bwu, nrhs,
1515 $ a( ofst+1 ), llda, b( part_offset+1 ), lldb,
1529 IF( ictxt_save.NE.ictxt_new )
THEN
1530 CALL blacs_gridexit( ictxt_new )
1542 work( 1 ) = work_size_min