1 SUBROUTINE pcdbtrsv( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA,
2 $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO )
10 INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS
13 INTEGER DESCA( * ), DESCB( * )
14 COMPLEX A( * ), AF( * ), B( * ), WORK( * )
374 parameter( one = 1.0e+0 )
375 parameter( zero = 0.0e+0 )
377 parameter( cone = ( 1.0e+0, 0.0e+0 ) )
378 parameter( czero = ( 0.0e+0, 0.0e+0 ) )
380 parameter( int_one = 1 )
381 INTEGER DESCMULT, BIGNUM
382 parameter(descmult = 100, bignum = descmult * descmult)
383 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
384 $ lld_, mb_, m_, nb_, n_, rsrc_
385 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
386 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
387 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
390 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
391 $ idum1, idum2, idum3, ja_new, level_dist, llda,
392 $ lldb, max_bw, mbw2, mycol, myrow, my_num_cols,
393 $ nb, np, npcol, nprow, np_save, odd_size, ofst,
394 $ part_offset, part_size, return_code, store_m_b,
395 $ store_n_a, work_size_min, work_u
398 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
399 $ param_check( 18, 3 )
402 EXTERNAL blacs_get, blacs_gridexit, blacs_gridinfo,
403 $ cgemm, cgerv2d, cgesd2d, clamov,
cmatadd,
410 EXTERNAL lsame, numroc
413 INTRINSIC ichar,
min, mod
429 IF( return_code .NE. 0)
THEN
430 info = -( 9*100 + 2 )
435 IF( return_code .NE. 0)
THEN
436 info = -( 12*100 + 2 )
442 IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) )
THEN
443 info = -( 12*100 + 2 )
450 IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) )
THEN
451 info = -( 12*100 + 4 )
456 IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) )
THEN
457 info = -( 12*100 + 5 )
462 ictxt = desca_1xp( 2 )
463 csrc = desca_1xp( 5 )
465 llda = desca_1xp( 6 )
466 store_n_a = desca_1xp( 3 )
467 lldb = descb_px1( 6 )
468 store_m_b = descb_px1( 3 )
475 max_bw =
max(bwl,bwu)
476 mbw2 = max_bw * max_bw
478 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
483 IF( lsame( uplo,
'U' ) )
THEN
485 ELSE IF ( lsame( uplo,
'L' ) )
THEN
491 IF( lsame( trans,
'N' ) )
THEN
493 ELSE IF ( lsame( trans,
'C' ) )
THEN
499 IF( lwork .LT. -1)
THEN
501 ELSE IF ( lwork .EQ. -1 )
THEN
511 IF( n+ja-1 .GT. store_n_a )
THEN
512 info = -( 9*100 + 6 )
515 IF(( bwl .GT. n-1 ) .OR.
516 $ ( bwl .LT. 0 ) )
THEN
520 IF(( bwu .GT. n-1 ) .OR.
521 $ ( bwu .LT. 0 ) )
THEN
525 IF( llda .LT. (bwl+bwu+1) )
THEN
526 info = -( 9*100 + 6 )
530 info = -( 9*100 + 4 )
533 IF( n+ib-1 .GT. store_m_b )
THEN
534 info = -( 12*100 + 3 )
537 IF( lldb .LT. nb )
THEN
538 info = -( 12*100 + 6 )
541 IF( nrhs .LT. 0 )
THEN
553 IF( nprow .NE. 1 )
THEN
557 IF( n .GT. np*nb-mod( ja-1, nb ))
THEN
560 $
'PCDBTRSV, D&C alg.: only 1 block per proc',
565 IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*
max(bwl,bwu) ))
THEN
568 $
'PCDBTRSV, D&C alg.: NB too small',
577 work( 1 ) = work_size_min
579 IF( lwork .LT. work_size_min )
THEN
580 IF( lwork .NE. -1 )
THEN
583 $
'PCDBTRSV: worksize error',
591 param_check( 18, 1 ) = descb(5)
592 param_check( 17, 1 ) = descb(4)
593 param_check( 16, 1 ) = descb(3)
594 param_check( 15, 1 ) = descb(2)
595 param_check( 14, 1 ) = descb(1)
596 param_check( 13, 1 ) = ib
597 param_check( 12, 1 ) = desca(5)
598 param_check( 11, 1 ) = desca(4)
599 param_check( 10, 1 ) = desca(3)
600 param_check( 9, 1 ) = desca(1)
601 param_check( 8, 1 ) = ja
602 param_check( 7, 1 ) = nrhs
603 param_check( 6, 1 ) = bwu
604 param_check( 5, 1 ) = bwl
605 param_check( 4, 1 ) = n
606 param_check( 3, 1 ) = idum3
607 param_check( 2, 1 ) = idum2
608 param_check( 1, 1 ) = idum1
610 param_check( 18, 2 ) = 1205
611 param_check( 17, 2 ) = 1204
612 param_check( 16, 2 ) = 1203
613 param_check( 15, 2 ) = 1202
614 param_check( 14, 2 ) = 1201
615 param_check( 13, 2 ) = 11
616 param_check( 12, 2 ) = 905
617 param_check( 11, 2 ) = 904
618 param_check( 10, 2 ) = 903
619 param_check( 9, 2 ) = 901
620 param_check( 8, 2 ) = 8
621 param_check( 7, 2 ) = 6
622 param_check( 6, 2 ) = 5
623 param_check( 5, 2 ) = 4
624 param_check( 4, 2 ) = 3
625 param_check( 3, 2 ) = 16
626 param_check( 2, 2 ) = 2
627 param_check( 1, 2 ) = 1
635 ELSE IF( info.LT.-descmult )
THEN
638 info = -info * descmult
643 CALL globchk( ictxt, 18, param_check, 18,
644 $ param_check( 1, 3 ), info )
649 IF( info.EQ.bignum )
THEN
651 ELSE IF( mod( info, descmult ) .EQ. 0 )
THEN
652 info = -info / descmult
658 CALL pxerbla( ictxt,
'PCDBTRSV', -info )
674 part_offset = nb*( (ja-1)/(npcol*nb) )
676 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb )
THEN
677 part_offset = part_offset + nb
680 IF ( mycol .LT. csrc )
THEN
681 part_offset = part_offset - nb
690 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
694 ja_new = mod( ja-1, nb ) + 1
699 np = ( ja_new+n-2 )/nb + 1
703 CALL reshape( ictxt, int_one, ictxt_new, int_one,
704 $ first_proc, int_one, np )
710 desca_1xp( 2 ) = ictxt_new
711 descb_px1( 2 ) = ictxt_new
715 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
719 IF( myrow .LT. 0 )
THEN
732 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
736 IF ( mycol .EQ. 0 )
THEN
737 part_offset = part_offset+mod( ja_new-1, part_size )
738 my_num_cols = my_num_cols - mod(ja_new-1, part_size )
743 ofst = part_offset*llda
747 odd_size = my_num_cols
748 IF ( mycol .LT. np-1 )
THEN
749 odd_size = odd_size - max_bw
754 work_u = bwu*odd_size + 3*mbw2
760 IF ( lsame( uplo,
'L' ) )
THEN
762 IF ( lsame( trans,
'N' ) )
THEN
773 CALL ctbtrs( uplo,
'N',
'U', odd_size,
775 $ a( ofst+1+bwu ), llda,
776 $ b( part_offset+1 ), lldb, info )
779 IF ( mycol .LT. np-1 )
THEN
787 CALL clamov(
'N', bwl, nrhs,
788 $ b( part_offset+odd_size-bwl+1), lldb,
789 $ work( 1 ), max_bw )
791 CALL ctrmm(
'L',
'U',
'N',
'N', bwl, nrhs, -cone,
792 $ a(( ofst+(bwl+bwu+1)+(odd_size-bwl)*llda )),
793 $ llda-1, work( 1 ), max_bw )
795 CALL cmatadd( bwl, nrhs, cone, work( 1 ), max_bw,
796 $ cone, b( part_offset+odd_size+1 ), lldb )
802 DO 10 idum1=1, work_size_min
807 IF ( mycol .NE. 0 )
THEN
811 CALL cgemm(
'C',
'N', bwu, nrhs, odd_size, -cone, af( 1 ),
812 $ odd_size, b( part_offset+1 ), lldb, czero,
813 $ work( 1+max_bw-bwu ), max_bw )
824 IF( mycol .GT. 0)
THEN
826 CALL cgesd2d( ictxt, max_bw, nrhs,
834 IF( mycol .LT. npcol-1)
THEN
836 CALL cgerv2d( ictxt, max_bw, nrhs,
842 CALL cmatadd( max_bw, nrhs, cone,
843 $ work( 1 ), max_bw, cone,
844 $ b( part_offset+odd_size + 1 ), lldb )
851 IF( mycol .EQ. npcol-1 )
THEN
866 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 )
GOTO 11
870 IF( mycol-level_dist .GE. 0 )
THEN
872 CALL cgerv2d( ictxt, max_bw, nrhs,
874 $ max_bw, 0, mycol-level_dist )
876 CALL cmatadd( max_bw, nrhs, cone,
877 $ work( 1 ), max_bw, cone,
878 $ b( part_offset+odd_size + 1 ), lldb )
884 IF( mycol+level_dist .LT. npcol-1 )
THEN
886 CALL cgerv2d( ictxt, max_bw, nrhs,
888 $ max_bw, 0, mycol+level_dist )
890 CALL cmatadd( max_bw, nrhs, cone,
891 $ work( 1 ), max_bw, cone,
892 $ b( part_offset+odd_size + 1 ), lldb )
896 level_dist = level_dist*2
909 CALL ctbtrs(
'L',
'N',
'U', max_bw,
min( bwl, max_bw-1 ), nrhs,
910 $ af( odd_size*bwu+mbw2+1 ), max_bw+1,
911 $ b( part_offset+odd_size+1 ), lldb, info )
920 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )
THEN
924 CALL cgemm(
'C',
'N', max_bw, nrhs, max_bw, -cone,
925 $ af( (odd_size)*bwu+1 ),
927 $ b( part_offset+odd_size+1 ),
934 CALL cgesd2d( ictxt, max_bw, nrhs,
936 $ max_bw, 0, mycol+level_dist )
942 IF( (mycol/level_dist .GT. 0 ).AND.
943 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) )
THEN
949 CALL cgemm(
'N',
'N', max_bw, nrhs, max_bw, -cone,
950 $ af( odd_size*bwu+2*mbw2+1 ),
952 $ b( part_offset+odd_size+1 ),
959 CALL cgesd2d( ictxt, max_bw, nrhs,
961 $ max_bw, 0, mycol-level_dist )
980 IF( mycol .EQ. npcol-1 )
THEN
988 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 )
GOTO 26
990 level_dist = level_dist*2
996 IF( (mycol/level_dist .GT. 0 ).AND.
997 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) )
THEN
1001 CALL cgerv2d( ictxt, max_bw, nrhs,
1003 $ max_bw, 0, mycol-level_dist )
1009 CALL cgemm(
'C',
'N', max_bw, nrhs, max_bw, -cone,
1010 $ af( odd_size*bwu+2*mbw2+1 ),
1014 $ b( part_offset+odd_size+1 ),
1020 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )
THEN
1024 CALL cgerv2d( ictxt, max_bw, nrhs,
1026 $ max_bw, 0, mycol+level_dist )
1030 CALL cgemm(
'N',
'N', max_bw, nrhs, max_bw, -cone,
1031 $ af( (odd_size)*bwu+1 ),
1035 $ b( part_offset+odd_size+1 ),
1044 CALL ctbtrs(
'L',
'C',
'U', max_bw,
min( bwl, max_bw-1 ), nrhs,
1045 $ af( odd_size*bwu+mbw2+1 ), max_bw+1,
1046 $ b( part_offset+odd_size+1 ), lldb, info )
1048 IF( info.NE.0 )
THEN
1057 IF( level_dist .EQ. 1 )
GOTO 21
1059 level_dist = level_dist/2
1063 IF( mycol+level_dist .LT. npcol-1 )
THEN
1065 CALL cgesd2d( ictxt, max_bw, nrhs,
1066 $ b( part_offset+odd_size+1 ),
1067 $ lldb, 0, mycol+level_dist )
1073 IF( mycol-level_dist .GE. 0 )
THEN
1075 CALL cgesd2d( ictxt, max_bw, nrhs,
1076 $ b( part_offset+odd_size+1 ),
1077 $ lldb, 0, mycol-level_dist )
1095 IF( mycol .LT. npcol-1)
THEN
1097 CALL cgesd2d( ictxt, max_bw, nrhs,
1098 $ b( part_offset+odd_size+1 ), lldb,
1105 IF( mycol .GT. 0)
THEN
1107 CALL cgerv2d( ictxt, max_bw, nrhs,
1108 $ work( 1 ), max_bw,
1119 IF ( mycol .NE. 0 )
THEN
1123 CALL cgemm(
'N',
'N', odd_size, nrhs, bwu, -cone, af( 1 ),
1124 $ odd_size, work( 1+max_bw-bwu ), max_bw, cone,
1125 $ b( part_offset+1 ), lldb )
1130 IF ( mycol .LT. np-1 )
THEN
1138 CALL clamov(
'N', bwl, nrhs, b( part_offset+odd_size+1), lldb,
1139 $ work( 1+max_bw-bwl ), max_bw )
1141 CALL ctrmm(
'L',
'U',
'C',
'N', bwl, nrhs, -cone,
1142 $ a(( ofst+(bwl+bwu+1)+(odd_size-bwl)*llda )),
1143 $ llda-1, work( 1+max_bw-bwl ), max_bw )
1145 CALL cmatadd( bwl, nrhs, cone, work( 1+max_bw-bwl ), max_bw,
1146 $ cone, b( part_offset+odd_size-bwl+1 ), lldb )
1152 CALL ctbtrs( uplo,
'C',
'U', odd_size,
1155 $ llda, b( part_offset+1 ),
1166 IF ( lsame( trans,
'C' ) )
THEN
1177 CALL ctbtrs( uplo,
'C',
'N', odd_size,
1179 $ a( ofst+1 ), llda,
1180 $ b( part_offset+1 ), lldb, info )
1183 IF ( mycol .LT. np-1 )
THEN
1191 CALL clamov(
'N', bwu, nrhs,
1192 $ b( part_offset+odd_size-bwu+1), lldb,
1193 $ work( 1 ), max_bw )
1195 CALL ctrmm(
'L',
'L',
'C',
'N', bwu, nrhs, -cone,
1196 $ a(( ofst+1+odd_size*llda )), llda-1, work( 1 ),
1199 CALL cmatadd( bwu, nrhs, cone, work( 1 ), max_bw,
1200 $ cone, b( part_offset+odd_size+1 ), lldb )
1206 DO 20 idum1=1, work_size_min
1211 IF ( mycol .NE. 0 )
THEN
1215 CALL cgemm(
'C',
'N', bwl, nrhs, odd_size, -cone,
1216 $ af( work_u+1 ), odd_size, b( part_offset+1 ),
1217 $ lldb, czero, work( 1+max_bw-bwl ), max_bw )
1228 IF( mycol .GT. 0)
THEN
1230 CALL cgesd2d( ictxt, max_bw, nrhs,
1231 $ work( 1 ), max_bw,
1238 IF( mycol .LT. npcol-1)
THEN
1240 CALL cgerv2d( ictxt, max_bw, nrhs,
1241 $ work( 1 ), max_bw,
1246 CALL cmatadd( max_bw, nrhs, cone,
1247 $ work( 1 ), max_bw, cone,
1248 $ b( part_offset+odd_size + 1 ), lldb )
1255 IF( mycol .EQ. npcol-1 )
THEN
1270 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 )
GOTO 41
1274 IF( mycol-level_dist .GE. 0 )
THEN
1276 CALL cgerv2d( ictxt, max_bw, nrhs,
1278 $ max_bw, 0, mycol-level_dist )
1280 CALL cmatadd( max_bw, nrhs, cone,
1281 $ work( 1 ), max_bw, cone,
1282 $ b( part_offset+odd_size + 1 ), lldb )
1288 IF( mycol+level_dist .LT. npcol-1 )
THEN
1290 CALL cgerv2d( ictxt, max_bw, nrhs,
1292 $ max_bw, 0, mycol+level_dist )
1294 CALL cmatadd( max_bw, nrhs, cone,
1295 $ work( 1 ), max_bw, cone,
1296 $ b( part_offset+odd_size + 1 ), lldb )
1300 level_dist = level_dist*2
1313 CALL ctbtrs(
'U',
'C',
'N', max_bw,
min( bwu, max_bw-1 ), nrhs,
1314 $ af( odd_size*bwu+mbw2+1-
min( bwu, max_bw-1 ) ),
1315 $ max_bw+1, b( part_offset+odd_size+1 ), lldb,
1318 IF( info.NE.0 )
THEN
1325 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )
THEN
1329 CALL cgemm(
'C',
'N', max_bw, nrhs, max_bw, -cone,
1330 $ af( work_u+(odd_size)*bwl+1 ),
1332 $ b( part_offset+odd_size+1 ),
1339 CALL cgesd2d( ictxt, max_bw, nrhs,
1341 $ max_bw, 0, mycol+level_dist )
1347 IF( (mycol/level_dist .GT. 0 ).AND.
1348 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) )
THEN
1354 CALL cgemm(
'N',
'N', max_bw, nrhs, max_bw, -cone,
1355 $ af( work_u+odd_size*bwl+2*mbw2+1 ),
1357 $ b( part_offset+odd_size+1 ),
1364 CALL cgesd2d( ictxt, max_bw, nrhs,
1366 $ max_bw, 0, mycol-level_dist )
1385 IF( mycol .EQ. npcol-1 )
THEN
1393 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 )
GOTO 56
1395 level_dist = level_dist*2
1401 IF( (mycol/level_dist .GT. 0 ).AND.
1402 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) )
THEN
1406 CALL cgerv2d( ictxt, max_bw, nrhs,
1408 $ max_bw, 0, mycol-level_dist )
1414 CALL cgemm(
'C',
'N', max_bw, nrhs, max_bw, -cone,
1415 $ af( work_u+odd_size*bwl+2*mbw2+1 ),
1419 $ b( part_offset+odd_size+1 ),
1425 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )
THEN
1429 CALL cgerv2d( ictxt, max_bw, nrhs,
1431 $ max_bw, 0, mycol+level_dist )
1435 CALL cgemm(
'N',
'N', max_bw, nrhs, max_bw, -cone,
1436 $ af( work_u+(odd_size)*bwl+1 ),
1440 $ b( part_offset+odd_size+1 ),
1449 CALL ctbtrs(
'U',
'N',
'N', max_bw,
min( bwu, max_bw-1 ), nrhs,
1450 $ af( odd_size*bwu+mbw2+1-
min( bwu, max_bw-1 ) ),
1451 $ max_bw+1, b( part_offset+odd_size+1 ), lldb,
1454 IF( info.NE.0 )
THEN
1463 IF( level_dist .EQ. 1 )
GOTO 51
1465 level_dist = level_dist/2
1469 IF( mycol+level_dist .LT. npcol-1 )
THEN
1471 CALL cgesd2d( ictxt, max_bw, nrhs,
1472 $ b( part_offset+odd_size+1 ),
1473 $ lldb, 0, mycol+level_dist )
1479 IF( mycol-level_dist .GE. 0 )
THEN
1481 CALL cgesd2d( ictxt, max_bw, nrhs,
1482 $ b( part_offset+odd_size+1 ),
1483 $ lldb, 0, mycol-level_dist )
1501 IF( mycol .LT. npcol-1)
THEN
1503 CALL cgesd2d( ictxt, max_bw, nrhs,
1504 $ b( part_offset+odd_size+1 ), lldb,
1511 IF( mycol .GT. 0)
THEN
1513 CALL cgerv2d( ictxt, max_bw, nrhs,
1514 $ work( 1 ), max_bw,
1525 IF ( mycol .NE. 0 )
THEN
1529 CALL cgemm(
'N',
'N', odd_size, nrhs, bwl, -cone,
1530 $ af( work_u+1 ), odd_size, work( 1+max_bw-bwl ),
1531 $ max_bw, cone, b( part_offset+1 ), lldb )
1536 IF ( mycol .LT. np-1 )
THEN
1544 CALL clamov(
'N', bwu, nrhs, b( part_offset+odd_size+1), lldb,
1545 $ work( 1+max_bw-bwu ), max_bw+bwl )
1547 CALL ctrmm(
'L',
'L',
'N',
'N', bwu, nrhs, -cone,
1548 $ a(( ofst+1+odd_size*llda )), llda-1,
1549 $ work( 1+max_bw-bwu ), max_bw+bwl )
1551 CALL cmatadd( bwu, nrhs, cone, work( 1+max_bw-bwu ),
1553 $ b( part_offset+odd_size-bwu+1 ), lldb )
1559 CALL ctbtrs( uplo,
'N',
'N', odd_size,
1562 $ llda, b( part_offset+1 ),
1576 IF( ictxt_save .NE. ictxt_new )
THEN
1577 CALL blacs_gridexit( ictxt_new )
1589 work( 1 ) = work_size_min