1 SUBROUTINE pspbtrsv( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B,
2 $ IB, DESCB, AF, LAF, WORK, LWORK, INFO )
10 INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS
13 INTEGER DESCA( * ), DESCB( * )
14 REAL A( * ), AF( * ), B( * ), WORK( * )
369 parameter( one = 1.0e+0 )
371 parameter( zero = 0.0e+0 )
373 parameter( int_one = 1 )
374 INTEGER DESCMULT, BIGNUM
375 parameter( descmult = 100, bignum = descmult*descmult )
376 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
377 $ lld_, mb_, m_, nb_, n_, rsrc_
378 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
379 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
380 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
383 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
384 $ idum1, idum2, idum3, ja_new, level_dist, llda,
385 $ lldb, mbw2, mycol, myrow, my_num_cols, nb, np,
386 $ npcol, nprow, np_save, odd_size, ofst,
387 $ part_offset, part_size, return_code, store_m_b,
388 $ store_n_a, work_size_min
391 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
392 $ param_check( 17, 3 )
397 $ sgesd2d, slamov,
smatadd, stbtrs, strmm, strtrs
402 EXTERNAL lsame, numroc
421 IF( return_code.NE.0 )
THEN
427 IF( return_code.NE.0 )
THEN
434 IF( desca_1xp( 2 ).NE.descb_px1( 2 ) )
THEN
442 IF( desca_1xp( 4 ).NE.descb_px1( 4 ) )
THEN
448 IF( desca_1xp( 5 ).NE.descb_px1( 5 ) )
THEN
454 ictxt = desca_1xp( 2 )
455 csrc = desca_1xp( 5 )
457 llda = desca_1xp( 6 )
458 store_n_a = desca_1xp( 3 )
459 lldb = descb_px1( 6 )
460 store_m_b = descb_px1( 3 )
469 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
474 IF( lsame( uplo,
'U' ) )
THEN
476 ELSE IF( lsame( uplo,
'L' ) )
THEN
482 IF( lsame( trans,
'N' ) )
THEN
484 ELSE IF( lsame( trans,
'T' ) )
THEN
486 ELSE IF( lsame( trans,
'C' ) )
THEN
492 IF( lwork.LT.-1 )
THEN
494 ELSE IF( lwork.EQ.-1 )
THEN
504 IF( n+ja-1.GT.store_n_a )
THEN
508 IF( ( bw.GT.n-1 ) .OR. ( bw.LT.0 ) )
THEN
512 IF( llda.LT.( bw+1 ) )
THEN
520 IF( n+ib-1.GT.store_m_b )
THEN
524 IF( lldb.LT.nb )
THEN
540 IF( nprow.NE.1 )
THEN
544 IF( n.GT.np*nb-mod( ja-1, nb ) )
THEN
547 $
'PSPBTRSV, D&C alg.: only 1 block per proc',
552 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*bw ) )
THEN
554 CALL pxerbla( ictxt,
'PSPBTRSV, D&C alg.: NB too small',
560 work_size_min = bw*nrhs
562 work( 1 ) = work_size_min
564 IF( lwork.LT.work_size_min )
THEN
565 IF( lwork.NE.-1 )
THEN
567 CALL pxerbla( ictxt,
'PSPBTRSV: worksize error', -info )
574 param_check( 17, 1 ) = descb( 5 )
575 param_check( 16, 1 ) = descb( 4 )
576 param_check( 15, 1 ) = descb( 3 )
577 param_check( 14, 1 ) = descb( 2 )
578 param_check( 13, 1 ) = descb( 1 )
579 param_check( 12, 1 ) = ib
580 param_check( 11, 1 ) = desca( 5 )
581 param_check( 10, 1 ) = desca( 4 )
582 param_check( 9, 1 ) = desca( 3 )
583 param_check( 8, 1 ) = desca( 1 )
584 param_check( 7, 1 ) = ja
585 param_check( 6, 1 ) = nrhs
586 param_check( 5, 1 ) = bw
587 param_check( 4, 1 ) = n
588 param_check( 3, 1 ) = idum3
589 param_check( 2, 1 ) = idum2
590 param_check( 1, 1 ) = idum1
592 param_check( 17, 2 ) = 1105
593 param_check( 16, 2 ) = 1104
594 param_check( 15, 2 ) = 1103
595 param_check( 14, 2 ) = 1102
596 param_check( 13, 2 ) = 1101
597 param_check( 12, 2 ) = 10
598 param_check( 11, 2 ) = 805
599 param_check( 10, 2 ) = 804
600 param_check( 9, 2 ) = 803
601 param_check( 8, 2 ) = 801
602 param_check( 7, 2 ) = 7
603 param_check( 6, 2 ) = 5
604 param_check( 5, 2 ) = 4
605 param_check( 4, 2 ) = 3
606 param_check( 3, 2 ) = 14
607 param_check( 2, 2 ) = 2
608 param_check( 1, 2 ) = 1
616 ELSE IF( info.LT.-descmult )
THEN
619 info = -info*descmult
624 CALL globchk( ictxt, 17, param_check, 17, param_check( 1, 3 ),
630 IF( info.EQ.bignum )
THEN
632 ELSE IF( mod( info, descmult ).EQ.0 )
THEN
633 info = -info / descmult
639 CALL pxerbla( ictxt,
'PSPBTRSV', -info )
655 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
657 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb )
THEN
658 part_offset = part_offset + nb
661 IF( mycol.LT.csrc )
THEN
662 part_offset = part_offset - nb
671 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
675 ja_new = mod( ja-1, nb ) + 1
680 np = ( ja_new+n-2 ) / nb + 1
684 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
691 desca_1xp( 2 ) = ictxt_new
692 descb_px1( 2 ) = ictxt_new
696 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
700 IF( myrow.LT.0 )
THEN
713 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
717 IF( mycol.EQ.0 )
THEN
718 part_offset = part_offset + mod( ja_new-1, part_size )
719 my_num_cols = my_num_cols - mod( ja_new-1, part_size )
724 ofst = part_offset*llda
728 odd_size = my_num_cols
729 IF( mycol.LT.np-1 )
THEN
730 odd_size = odd_size - bw
737 IF( lsame( uplo,
'L' ) )
THEN
739 IF( lsame( trans,
'N' ) )
THEN
750 CALL stbtrs( uplo,
'N',
'N', odd_size, bw, nrhs,
751 $ a( ofst+1 ), llda, b( part_offset+1 ), lldb,
755 IF( mycol.LT.np-1 )
THEN
763 CALL slamov(
'N', bw, nrhs,
764 $ b( part_offset+odd_size-bw+1 ), lldb,
767 CALL strmm(
'L',
'U',
'N',
'N', bw, nrhs, -one,
768 $ a( ( ofst+( bw+1 )+( odd_size-bw )*llda ) ),
769 $ llda-1, work( 1 ), bw )
771 CALL smatadd( bw, nrhs, one, work( 1 ), bw, one,
772 $ b( part_offset+odd_size+1 ), lldb )
777 IF( mycol.NE.0 )
THEN
781 CALL sgemm(
'T',
'N', bw, nrhs, odd_size, -one, af( 1 ),
782 $ odd_size, b( part_offset+1 ), lldb, zero,
783 $ work( 1+bw-bw ), bw )
794 IF( mycol.GT.0 )
THEN
796 CALL sgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
803 IF( mycol.LT.npcol-1 )
THEN
805 CALL sgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
810 CALL smatadd( bw, nrhs, one, work( 1 ), bw, one,
811 $ b( part_offset+odd_size+1 ), lldb )
818 IF( mycol.EQ.npcol-1 )
THEN
833 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
838 IF( mycol-level_dist.GE.0 )
THEN
840 CALL sgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
843 CALL smatadd( bw, nrhs, one, work( 1 ), bw, one,
844 $ b( part_offset+odd_size+1 ), lldb )
850 IF( mycol+level_dist.LT.npcol-1 )
THEN
852 CALL sgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
855 CALL smatadd( bw, nrhs, one, work( 1 ), bw, one,
856 $ b( part_offset+odd_size+1 ), lldb )
860 level_dist = level_dist*2
873 CALL strtrs(
'L',
'N',
'N', bw, nrhs,
874 $ af( odd_size*bw+mbw2+1 ), bw,
875 $ b( part_offset+odd_size+1 ), lldb, info )
884 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
888 CALL sgemm(
'T',
'N', bw, nrhs, bw, -one,
889 $ af( ( odd_size )*bw+1 ), bw,
890 $ b( part_offset+odd_size+1 ), lldb, zero,
895 CALL sgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
902 IF( ( mycol / level_dist.GT.0 ) .AND.
903 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
910 CALL sgemm(
'N',
'N', bw, nrhs, bw, -one,
911 $ af( odd_size*bw+2*mbw2+1 ), bw,
912 $ b( part_offset+odd_size+1 ), lldb, zero,
917 CALL sgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
937 IF( mycol.EQ.npcol-1 )
THEN
945 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
948 level_dist = level_dist*2
954 IF( ( mycol / level_dist.GT.0 ) .AND.
955 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
960 CALL sgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
967 CALL sgemm(
'T',
'N', bw, nrhs, bw, -one,
968 $ af( odd_size*bw+2*mbw2+1 ), bw, work( 1 ),
969 $ bw, one, b( part_offset+odd_size+1 ), lldb )
974 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
978 CALL sgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
983 CALL sgemm(
'N',
'N', bw, nrhs, bw, -one,
984 $ af( ( odd_size )*bw+1 ), bw, work( 1 ), bw,
985 $ one, b( part_offset+odd_size+1 ), lldb )
993 CALL strtrs(
'L',
'T',
'N', bw, nrhs,
994 $ af( odd_size*bw+mbw2+1 ), bw,
995 $ b( part_offset+odd_size+1 ), lldb, info )
1006 IF( level_dist.EQ.1 )
1009 level_dist = level_dist / 2
1013 IF( mycol+level_dist.LT.npcol-1 )
THEN
1015 CALL sgesd2d( ictxt, bw, nrhs,
1016 $ b( part_offset+odd_size+1 ), lldb, 0,
1017 $ mycol+level_dist )
1023 IF( mycol-level_dist.GE.0 )
THEN
1025 CALL sgesd2d( ictxt, bw, nrhs,
1026 $ b( part_offset+odd_size+1 ), lldb, 0,
1027 $ mycol-level_dist )
1045 IF( mycol.LT.npcol-1 )
THEN
1047 CALL sgesd2d( ictxt, bw, nrhs,
1048 $ b( part_offset+odd_size+1 ), lldb, 0,
1055 IF( mycol.GT.0 )
THEN
1057 CALL sgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1068 IF( mycol.NE.0 )
THEN
1072 CALL sgemm(
'N',
'N', odd_size, nrhs, bw, -one, af( 1 ),
1073 $ odd_size, work( 1+bw-bw ), bw, one,
1074 $ b( part_offset+1 ), lldb )
1079 IF( mycol.LT.np-1 )
THEN
1087 CALL slamov(
'N', bw, nrhs, b( part_offset+odd_size+1 ),
1088 $ lldb, work( 1+bw-bw ), bw )
1090 CALL strmm(
'L',
'U',
'T',
'N', bw, nrhs, -one,
1091 $ a( ( ofst+( bw+1 )+( odd_size-bw )*llda ) ),
1092 $ llda-1, work( 1+bw-bw ), bw )
1094 CALL smatadd( bw, nrhs, one, work( 1+bw-bw ), bw, one,
1095 $ b( part_offset+odd_size-bw+1 ), lldb )
1101 CALL stbtrs( uplo,
'T',
'N', odd_size, bw, nrhs,
1102 $ a( ofst+1 ), llda, b( part_offset+1 ), lldb,
1113 IF( lsame( trans,
'T' ) )
THEN
1124 CALL stbtrs( uplo,
'T',
'N', odd_size, bw, nrhs,
1125 $ a( ofst+1 ), llda, b( part_offset+1 ), lldb,
1129 IF( mycol.LT.np-1 )
THEN
1137 CALL slamov(
'N', bw, nrhs,
1138 $ b( part_offset+odd_size-bw+1 ), lldb,
1141 CALL strmm(
'L',
'L',
'T',
'N', bw, nrhs, -one,
1142 $ a( ( ofst+1+odd_size*llda ) ), llda-1,
1145 CALL smatadd( bw, nrhs, one, work( 1 ), bw, one,
1146 $ b( part_offset+odd_size+1 ), lldb )
1151 IF( mycol.NE.0 )
THEN
1155 CALL sgemm(
'T',
'N', bw, nrhs, odd_size, -one, af( 1 ),
1156 $ odd_size, b( part_offset+1 ), lldb, zero,
1157 $ work( 1+bw-bw ), bw )
1168 IF( mycol.GT.0 )
THEN
1170 CALL sgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1177 IF( mycol.LT.npcol-1 )
THEN
1179 CALL sgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1184 CALL smatadd( bw, nrhs, one, work( 1 ), bw, one,
1185 $ b( part_offset+odd_size+1 ), lldb )
1192 IF( mycol.EQ.npcol-1 )
THEN
1207 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
1212 IF( mycol-level_dist.GE.0 )
THEN
1214 CALL sgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1215 $ mycol-level_dist )
1217 CALL smatadd( bw, nrhs, one, work( 1 ), bw, one,
1218 $ b( part_offset+odd_size+1 ), lldb )
1224 IF( mycol+level_dist.LT.npcol-1 )
THEN
1226 CALL sgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1227 $ mycol+level_dist )
1229 CALL smatadd( bw, nrhs, one, work( 1 ), bw, one,
1230 $ b( part_offset+odd_size+1 ), lldb )
1234 level_dist = level_dist*2
1247 CALL strtrs(
'L',
'N',
'N', bw, nrhs,
1248 $ af( odd_size*bw+mbw2+1 ), bw,
1249 $ b( part_offset+odd_size+1 ), lldb, info )
1251 IF( info.NE.0 )
THEN
1258 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1262 CALL sgemm(
'T',
'N', bw, nrhs, bw, -one,
1263 $ af( ( odd_size )*bw+1 ), bw,
1264 $ b( part_offset+odd_size+1 ), lldb, zero,
1269 CALL sgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1270 $ mycol+level_dist )
1276 IF( ( mycol / level_dist.GT.0 ) .AND.
1277 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
1284 CALL sgemm(
'N',
'N', bw, nrhs, bw, -one,
1285 $ af( odd_size*bw+2*mbw2+1 ), bw,
1286 $ b( part_offset+odd_size+1 ), lldb, zero,
1291 CALL sgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1292 $ mycol-level_dist )
1311 IF( mycol.EQ.npcol-1 )
THEN
1319 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
1322 level_dist = level_dist*2
1328 IF( ( mycol / level_dist.GT.0 ) .AND.
1329 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
1334 CALL sgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1335 $ mycol-level_dist )
1341 CALL sgemm(
'T',
'N', bw, nrhs, bw, -one,
1342 $ af( odd_size*bw+2*mbw2+1 ), bw, work( 1 ),
1343 $ bw, one, b( part_offset+odd_size+1 ), lldb )
1348 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1352 CALL sgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1353 $ mycol+level_dist )
1357 CALL sgemm(
'N',
'N', bw, nrhs, bw, -one,
1358 $ af( ( odd_size )*bw+1 ), bw, work( 1 ), bw,
1359 $ one, b( part_offset+odd_size+1 ), lldb )
1367 CALL strtrs(
'L',
'T',
'N', bw, nrhs,
1368 $ af( odd_size*bw+mbw2+1 ), bw,
1369 $ b( part_offset+odd_size+1 ), lldb, info )
1371 IF( info.NE.0 )
THEN
1380 IF( level_dist.EQ.1 )
1383 level_dist = level_dist / 2
1387 IF( mycol+level_dist.LT.npcol-1 )
THEN
1389 CALL sgesd2d( ictxt, bw, nrhs,
1390 $ b( part_offset+odd_size+1 ), lldb, 0,
1391 $ mycol+level_dist )
1397 IF( mycol-level_dist.GE.0 )
THEN
1399 CALL sgesd2d( ictxt, bw, nrhs,
1400 $ b( part_offset+odd_size+1 ), lldb, 0,
1401 $ mycol-level_dist )
1419 IF( mycol.LT.npcol-1 )
THEN
1421 CALL sgesd2d( ictxt, bw, nrhs,
1422 $ b( part_offset+odd_size+1 ), lldb, 0,
1429 IF( mycol.GT.0 )
THEN
1431 CALL sgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
1442 IF( mycol.NE.0 )
THEN
1446 CALL sgemm(
'N',
'N', odd_size, nrhs, bw, -one, af( 1 ),
1447 $ odd_size, work( 1+bw-bw ), bw, one,
1448 $ b( part_offset+1 ), lldb )
1453 IF( mycol.LT.np-1 )
THEN
1461 CALL slamov(
'N', bw, nrhs, b( part_offset+odd_size+1 ),
1462 $ lldb, work( 1+bw-bw ), bw )
1464 CALL strmm(
'L',
'L',
'N',
'N', bw, nrhs, -one,
1465 $ a( ( ofst+1+odd_size*llda ) ), llda-1,
1466 $ work( 1+bw-bw ), bw )
1468 CALL smatadd( bw, nrhs, one, work( 1+bw-bw ), bw, one,
1469 $ b( part_offset+odd_size-bw+1 ), lldb )
1475 CALL stbtrs( uplo,
'N',
'N', odd_size, bw, nrhs,
1476 $ a( ofst+1 ), llda, b( part_offset+1 ), lldb,
1490 IF( ictxt_save.NE.ictxt_new )
THEN
1491 CALL blacs_gridexit( ictxt_new )
1503 work( 1 ) = work_size_min