1 SUBROUTINE pdpbtrsv( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B,
2 $ IB, DESCB, AF, LAF, WORK, LWORK, INFO )
10 INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS
13 INTEGER DESCA( * ), DESCB( * )
14 DOUBLE PRECISION A( * ), AF( * ), B( * ), WORK( * )
369 parameter( one = 1.0d+0 )
370 DOUBLE PRECISION ZERO
371 parameter( zero = 0.0d+0 )
373 parameter( int_one = 1 )
374 INTEGER DESCMULT, BIGNUM
375 parameter( descmult = 100, bignum = descmult*descmult )
376 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
377 $ lld_, mb_, m_, nb_, n_, rsrc_
378 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
379 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
380 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
383 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
384 $ idum1, idum2, idum3, ja_new, level_dist, llda,
385 $ lldb, mbw2, mycol, myrow, my_num_cols, nb, np,
386 $ npcol, nprow, np_save, odd_size, ofst,
387 $ part_offset, part_size, return_code, store_m_b,
388 $ store_n_a, work_size_min
391 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
392 $ param_check( 17, 3 )
396 $ dgemm, dgerv2d, dgesd2d, dlamov,
dmatadd,
403 EXTERNAL lsame, numroc
422 IF( return_code.NE.0 )
THEN
428 IF( return_code.NE.0 )
THEN
435 IF( desca_1xp( 2 ).NE.descb_px1( 2 ) )
THEN
443 IF( desca_1xp( 4 ).NE.descb_px1( 4 ) )
THEN
449 IF( desca_1xp( 5 ).NE.descb_px1( 5 ) )
THEN
455 ictxt = desca_1xp( 2 )
456 csrc = desca_1xp( 5 )
458 llda = desca_1xp( 6 )
459 store_n_a = desca_1xp( 3 )
460 lldb = descb_px1( 6 )
461 store_m_b = descb_px1( 3 )
470 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
475 IF( lsame( uplo,
'U' ) )
THEN
477 ELSE IF( lsame( uplo,
'L' ) )
THEN
483 IF( lsame( trans,
'N' ) )
THEN
485 ELSE IF( lsame( trans,
'T' ) )
THEN
487 ELSE IF( lsame( trans,
'C' ) )
THEN
493 IF( lwork.LT.-1 )
THEN
495 ELSE IF( lwork.EQ.-1 )
THEN
505 IF( n+ja-1.GT.store_n_a )
THEN
509 IF( ( bw.GT.n-1 ) .OR. ( bw.LT.0 ) )
THEN
513 IF( llda.LT.( bw+1 ) )
THEN
521 IF( n+ib-1.GT.store_m_b )
THEN
525 IF( lldb.LT.nb )
THEN
541 IF( nprow.NE.1 )
THEN
545 IF( n.GT.np*nb-mod( ja-1, nb ) )
THEN
548 $
'PDPBTRSV, D&C alg.: only 1 block per proc',
553 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*bw ) )
THEN
555 CALL pxerbla( ictxt,
'PDPBTRSV, D&C alg.: NB too small',
561 work_size_min = bw*nrhs
563 work( 1 ) = work_size_min
565 IF( lwork.LT.work_size_min )
THEN
566 IF( lwork.NE.-1 )
THEN
568 CALL pxerbla( ictxt,
'PDPBTRSV: worksize error', -info )
575 param_check( 17, 1 ) = descb( 5 )
576 param_check( 16, 1 ) = descb( 4 )
577 param_check( 15, 1 ) = descb( 3 )
578 param_check( 14, 1 ) = descb( 2 )
579 param_check( 13, 1 ) = descb( 1 )
580 param_check( 12, 1 ) = ib
581 param_check( 11, 1 ) = desca( 5 )
582 param_check( 10, 1 ) = desca( 4 )
583 param_check( 9, 1 ) = desca( 3 )
584 param_check( 8, 1 ) = desca( 1 )
585 param_check( 7, 1 ) = ja
586 param_check( 6, 1 ) = nrhs
587 param_check( 5, 1 ) = bw
588 param_check( 4, 1 ) = n
589 param_check( 3, 1 ) = idum3
590 param_check( 2, 1 ) = idum2
591 param_check( 1, 1 ) = idum1
593 param_check( 17, 2 ) = 1105
594 param_check( 16, 2 ) = 1104
595 param_check( 15, 2 ) = 1103
596 param_check( 14, 2 ) = 1102
597 param_check( 13, 2 ) = 1101
598 param_check( 12, 2 ) = 10
599 param_check( 11, 2 ) = 805
600 param_check( 10, 2 ) = 804
601 param_check( 9, 2 ) = 803
602 param_check( 8, 2 ) = 801
603 param_check( 7, 2 ) = 7
604 param_check( 6, 2 ) = 5
605 param_check( 5, 2 ) = 4
606 param_check( 4, 2 ) = 3
607 param_check( 3, 2 ) = 14
608 param_check( 2, 2 ) = 2
609 param_check( 1, 2 ) = 1
617 ELSE IF( info.LT.-descmult )
THEN
620 info = -info*descmult
625 CALL globchk( ictxt, 17, param_check, 17, param_check( 1, 3 ),
631 IF( info.EQ.bignum )
THEN
633 ELSE IF( mod( info, descmult ).EQ.0 )
THEN
634 info = -info / descmult
640 CALL pxerbla( ictxt,
'PDPBTRSV', -info )
656 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
658 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb )
THEN
659 part_offset = part_offset + nb
662 IF( mycol.LT.csrc )
THEN
663 part_offset = part_offset - nb
672 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
676 ja_new = mod( ja-1, nb ) + 1
681 np = ( ja_new+n-2 ) / nb + 1
685 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
692 desca_1xp( 2 ) = ictxt_new
693 descb_px1( 2 ) = ictxt_new
697 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
701 IF( myrow.LT.0 )
THEN
714 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
718 IF( mycol.EQ.0 )
THEN
719 part_offset = part_offset + mod( ja_new-1, part_size )
720 my_num_cols = my_num_cols - mod( ja_new-1, part_size )
725 ofst = part_offset*llda
729 odd_size = my_num_cols
730 IF( mycol.LT.np-1 )
THEN
731 odd_size = odd_size - bw
738 IF( lsame( uplo,
'L' ) )
THEN
740 IF( lsame( trans,
'N' ) )
THEN
751 CALL dtbtrs( uplo,
'N',
'N', odd_size, bw, nrhs,
752 $ a( ofst+1 ), llda, b( part_offset+1 ), lldb,
756 IF( mycol.LT.np-1 )
THEN
764 CALL dlamov(
'N', bw, nrhs,
765 $ b( part_offset+odd_size-bw+1 ), lldb,
768 CALL dtrmm(
'L',
'U',
'N',
'N', bw, nrhs, -one,
769 $ a( ( ofst+( bw+1 )+( odd_size-bw )*llda ) ),
770 $ llda-1, work( 1 ), bw )
772 CALL dmatadd( bw, nrhs, one, work( 1 ), bw, one,
773 $ b( part_offset+odd_size+1 ), lldb )
778 IF( mycol.NE.0 )
THEN
782 CALL dgemm(
'T',
'N', bw, nrhs, odd_size, -one, af( 1 ),
783 $ odd_size, b( part_offset+1 ), lldb, zero,
784 $ work( 1+bw-bw ), bw )
795 IF( mycol.GT.0 )
THEN
797 CALL dgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
804 IF( mycol.LT.npcol-1 )
THEN
806 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
811 CALL dmatadd( bw, nrhs, one, work( 1 ), bw, one,
812 $ b( part_offset+odd_size+1 ), lldb )
819 IF( mycol.EQ.npcol-1 )
THEN
834 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
839 IF( mycol-level_dist.GE.0 )
THEN
841 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
844 CALL dmatadd( bw, nrhs, one, work( 1 ), bw, one,
845 $ b( part_offset+odd_size+1 ), lldb )
851 IF( mycol+level_dist.LT.npcol-1 )
THEN
853 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
856 CALL dmatadd( bw, nrhs, one, work( 1 ), bw, one,
857 $ b( part_offset+odd_size+1 ), lldb )
861 level_dist = level_dist*2
874 CALL dtrtrs(
'L',
'N',
'N', bw, nrhs,
875 $ af( odd_size*bw+mbw2+1 ), bw,
876 $ b( part_offset+odd_size+1 ), lldb, info )
885 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
889 CALL dgemm(
'T',
'N', bw, nrhs, bw, -one,
890 $ af( ( odd_size )*bw+1 ), bw,
891 $ b( part_offset+odd_size+1 ), lldb, zero,
896 CALL dgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
903 IF( ( mycol / level_dist.GT.0 ) .AND.
904 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
911 CALL dgemm(
'N',
'N', bw, nrhs, bw, -one,
912 $ af( odd_size*bw+2*mbw2+1 ), bw,
913 $ b( part_offset+odd_size+1 ), lldb, zero,
918 CALL dgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
938 IF( mycol.EQ.npcol-1 )
THEN
946 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
949 level_dist = level_dist*2
955 IF( ( mycol / level_dist.GT.0 ) .AND.
956 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
961 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
968 CALL dgemm(
'T',
'N', bw, nrhs, bw, -one,
969 $ af( odd_size*bw+2*mbw2+1 ), bw, work( 1 ),
970 $ bw, one, b( part_offset+odd_size+1 ), lldb )
975 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
979 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
984 CALL dgemm(
'N',
'N', bw, nrhs, bw, -one,
985 $ af( ( odd_size )*bw+1 ), bw, work( 1 ), bw,
986 $ one, b( part_offset+odd_size+1 ), lldb )
994 CALL dtrtrs(
'L',
'T',
'N', bw, nrhs,
995 $ af( odd_size*bw+mbw2+1 ), bw,
996 $ b( part_offset+odd_size+1 ), lldb, info )
1007 IF( level_dist.EQ.1 )
1010 level_dist = level_dist / 2
1014 IF( mycol+level_dist.LT.npcol-1 )
THEN
1016 CALL dgesd2d( ictxt, bw, nrhs,
1017 $ b( part_offset+odd_size+1 ), lldb, 0,
1018 $ mycol+level_dist )
1024 IF( mycol-level_dist.GE.0 )
THEN
1026 CALL dgesd2d( ictxt, bw, nrhs,
1027 $ b( part_offset+odd_size+1 ), lldb, 0,
1028 $ mycol-level_dist )
1046 IF( mycol.LT.npcol-1 )
THEN
1048 CALL dgesd2d( ictxt, bw, nrhs,
1049 $ b( part_offset+odd_size+1 ), lldb, 0,
1056 IF( mycol.GT.0 )
THEN
1058 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1069 IF( mycol.NE.0 )
THEN
1073 CALL dgemm(
'N',
'N', odd_size, nrhs, bw, -one, af( 1 ),
1074 $ odd_size, work( 1+bw-bw ), bw, one,
1075 $ b( part_offset+1 ), lldb )
1080 IF( mycol.LT.np-1 )
THEN
1088 CALL dlamov(
'N', bw, nrhs, b( part_offset+odd_size+1 ),
1089 $ lldb, work( 1+bw-bw ), bw )
1091 CALL dtrmm(
'L',
'U',
'T',
'N', bw, nrhs, -one,
1092 $ a( ( ofst+( bw+1 )+( odd_size-bw )*llda ) ),
1093 $ llda-1, work( 1+bw-bw ), bw )
1095 CALL dmatadd( bw, nrhs, one, work( 1+bw-bw ), bw, one,
1096 $ b( part_offset+odd_size-bw+1 ), lldb )
1102 CALL dtbtrs( uplo,
'T',
'N', odd_size, bw, nrhs,
1103 $ a( ofst+1 ), llda, b( part_offset+1 ), lldb,
1114 IF( lsame( trans,
'T' ) )
THEN
1125 CALL dtbtrs( uplo,
'T',
'N', odd_size, bw, nrhs,
1126 $ a( ofst+1 ), llda, b( part_offset+1 ), lldb,
1130 IF( mycol.LT.np-1 )
THEN
1138 CALL dlamov(
'N', bw, nrhs,
1139 $ b( part_offset+odd_size-bw+1 ), lldb,
1142 CALL dtrmm(
'L',
'L',
'T',
'N', bw, nrhs, -one,
1143 $ a( ( ofst+1+odd_size*llda ) ), llda-1,
1146 CALL dmatadd( bw, nrhs, one, work( 1 ), bw, one,
1147 $ b( part_offset+odd_size+1 ), lldb )
1152 IF( mycol.NE.0 )
THEN
1156 CALL dgemm(
'T',
'N', bw, nrhs, odd_size, -one, af( 1 ),
1157 $ odd_size, b( part_offset+1 ), lldb, zero,
1158 $ work( 1+bw-bw ), bw )
1169 IF( mycol.GT.0 )
THEN
1171 CALL dgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1178 IF( mycol.LT.npcol-1 )
THEN
1180 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1185 CALL dmatadd( bw, nrhs, one, work( 1 ), bw, one,
1186 $ b( part_offset+odd_size+1 ), lldb )
1193 IF( mycol.EQ.npcol-1 )
THEN
1208 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
1213 IF( mycol-level_dist.GE.0 )
THEN
1215 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1216 $ mycol-level_dist )
1218 CALL dmatadd( bw, nrhs, one, work( 1 ), bw, one,
1219 $ b( part_offset+odd_size+1 ), lldb )
1225 IF( mycol+level_dist.LT.npcol-1 )
THEN
1227 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1228 $ mycol+level_dist )
1230 CALL dmatadd( bw, nrhs, one, work( 1 ), bw, one,
1231 $ b( part_offset+odd_size+1 ), lldb )
1235 level_dist = level_dist*2
1248 CALL dtrtrs(
'L',
'N',
'N', bw, nrhs,
1249 $ af( odd_size*bw+mbw2+1 ), bw,
1250 $ b( part_offset+odd_size+1 ), lldb, info )
1252 IF( info.NE.0 )
THEN
1259 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1263 CALL dgemm(
'T',
'N', bw, nrhs, bw, -one,
1264 $ af( ( odd_size )*bw+1 ), bw,
1265 $ b( part_offset+odd_size+1 ), lldb, zero,
1270 CALL dgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1271 $ mycol+level_dist )
1277 IF( ( mycol / level_dist.GT.0 ) .AND.
1278 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
1285 CALL dgemm(
'N',
'N', bw, nrhs, bw, -one,
1286 $ af( odd_size*bw+2*mbw2+1 ), bw,
1287 $ b( part_offset+odd_size+1 ), lldb, zero,
1292 CALL dgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1293 $ mycol-level_dist )
1312 IF( mycol.EQ.npcol-1 )
THEN
1320 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
1323 level_dist = level_dist*2
1329 IF( ( mycol / level_dist.GT.0 ) .AND.
1330 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
1335 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1336 $ mycol-level_dist )
1342 CALL dgemm(
'T',
'N', bw, nrhs, bw, -one,
1343 $ af( odd_size*bw+2*mbw2+1 ), bw, work( 1 ),
1344 $ bw, one, b( part_offset+odd_size+1 ), lldb )
1349 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1353 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1354 $ mycol+level_dist )
1358 CALL dgemm(
'N',
'N', bw, nrhs, bw, -one,
1359 $ af( ( odd_size )*bw+1 ), bw, work( 1 ), bw,
1360 $ one, b( part_offset+odd_size+1 ), lldb )
1368 CALL dtrtrs(
'L',
'T',
'N', bw, nrhs,
1369 $ af( odd_size*bw+mbw2+1 ), bw,
1370 $ b( part_offset+odd_size+1 ), lldb, info )
1372 IF( info.NE.0 )
THEN
1381 IF( level_dist.EQ.1 )
1384 level_dist = level_dist / 2
1388 IF( mycol+level_dist.LT.npcol-1 )
THEN
1390 CALL dgesd2d( ictxt, bw, nrhs,
1391 $ b( part_offset+odd_size+1 ), lldb, 0,
1392 $ mycol+level_dist )
1398 IF( mycol-level_dist.GE.0 )
THEN
1400 CALL dgesd2d( ictxt, bw, nrhs,
1401 $ b( part_offset+odd_size+1 ), lldb, 0,
1402 $ mycol-level_dist )
1420 IF( mycol.LT.npcol-1 )
THEN
1422 CALL dgesd2d( ictxt, bw, nrhs,
1423 $ b( part_offset+odd_size+1 ), lldb, 0,
1430 IF( mycol.GT.0 )
THEN
1432 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1443 IF( mycol.NE.0 )
THEN
1447 CALL dgemm(
'N',
'N', odd_size, nrhs, bw, -one, af( 1 ),
1448 $ odd_size, work( 1+bw-bw ), bw, one,
1449 $ b( part_offset+1 ), lldb )
1454 IF( mycol.LT.np-1 )
THEN
1462 CALL dlamov(
'N', bw, nrhs, b( part_offset+odd_size+1 ),
1463 $ lldb, work( 1+bw-bw ), bw )
1465 CALL dtrmm(
'L',
'L',
'N',
'N', bw, nrhs, -one,
1466 $ a( ( ofst+1+odd_size*llda ) ), llda-1,
1467 $ work( 1+bw-bw ), bw )
1469 CALL dmatadd( bw, nrhs, one, work( 1+bw-bw ), bw, one,
1470 $ b( part_offset+odd_size-bw+1 ), lldb )
1476 CALL dtbtrs( uplo,
'N',
'N', odd_size, bw, nrhs,
1477 $ a( ofst+1 ), llda, b( part_offset+1 ), lldb,
1491 IF( ictxt_save.NE.ictxt_new )
THEN
1492 CALL blacs_gridexit( ictxt_new )
1504 work( 1 ) = work_size_min