1 SUBROUTINE psdttrsv( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA,
2 $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO )
11 INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS
14 INTEGER DESCA( * ), DESCB( * )
15 REAL AF( * ), B( * ), D( * ), DL( * ), DU( * ),
387 parameter( one = 1.0e+0 )
389 parameter( zero = 0.0e+0 )
391 parameter( int_one = 1 )
392 INTEGER DESCMULT, BIGNUM
393 parameter( descmult = 100, bignum = descmult*descmult )
394 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
395 $ lld_, mb_, m_, nb_, n_, rsrc_
396 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
397 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
398 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
401 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
402 $ idum1, idum2, idum3, ja_new, level_dist, llda,
403 $ lldb, mycol, myrow, my_num_cols, nb, np, npcol,
404 $ nprow, np_save, odd_size, part_offset,
405 $ part_size, return_code, store_m_b, store_n_a,
406 $ temp, work_size_min, work_u
409 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
410 $ param_check( 16, 3 )
415 $ sgemm, sgerv2d, sgesd2d,
smatadd, stbtrs
420 EXTERNAL lsame, numroc
423 INTRINSIC ichar,
min, mod
437 temp = desca( dtype_ )
438 IF( temp.EQ.502 )
THEN
440 desca( dtype_ ) = 501
445 desca( dtype_ ) = temp
447 IF( return_code.NE.0 )
THEN
453 IF( return_code.NE.0 )
THEN
460 IF( desca_1xp( 2 ).NE.descb_px1( 2 ) )
THEN
468 IF( desca_1xp( 4 ).NE.descb_px1( 4 ) )
THEN
474 IF( desca_1xp( 5 ).NE.descb_px1( 5 ) )
THEN
480 ictxt = desca_1xp( 2 )
481 csrc = desca_1xp( 5 )
483 llda = desca_1xp( 6 )
484 store_n_a = desca_1xp( 3 )
485 lldb = descb_px1( 6 )
486 store_m_b = descb_px1( 3 )
491 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
496 IF( lsame( uplo,
'U' ) )
THEN
498 ELSE IF( lsame( uplo,
'L' ) )
THEN
504 IF( lsame( trans,
'N' ) )
THEN
506 ELSE IF( lsame( trans,
'T' ) )
THEN
508 ELSE IF( lsame( trans,
'C' ) )
THEN
514 IF( lwork.LT.-1 )
THEN
516 ELSE IF( lwork.EQ.-1 )
THEN
526 IF( n+ja-1.GT.store_n_a )
THEN
530 IF( n+ib-1.GT.store_m_b )
THEN
534 IF( lldb.LT.nb )
THEN
550 IF( nprow.NE.1 )
THEN
554 IF( n.GT.np*nb-mod( ja-1, nb ) )
THEN
557 $
'PSDTTRSV, D&C alg.: only 1 block per proc',
562 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*int_one ) )
THEN
564 CALL pxerbla( ictxt,
'PSDTTRSV, D&C alg.: NB too small',
570 work_size_min = int_one*nrhs
572 work( 1 ) = work_size_min
574 IF( lwork.LT.work_size_min )
THEN
575 IF( lwork.NE.-1 )
THEN
577 CALL pxerbla( ictxt,
'PSDTTRSV: worksize error', -info )
584 param_check( 16, 1 ) = descb( 5 )
585 param_check( 15, 1 ) = descb( 4 )
586 param_check( 14, 1 ) = descb( 3 )
587 param_check( 13, 1 ) = descb( 2 )
588 param_check( 12, 1 ) = descb( 1 )
589 param_check( 11, 1 ) = ib
590 param_check( 10, 1 ) = desca( 5 )
591 param_check( 9, 1 ) = desca( 4 )
592 param_check( 8, 1 ) = desca( 3 )
593 param_check( 7, 1 ) = desca( 1 )
594 param_check( 6, 1 ) = ja
595 param_check( 5, 1 ) = nrhs
596 param_check( 4, 1 ) = n
597 param_check( 3, 1 ) = idum3
598 param_check( 2, 1 ) = idum2
599 param_check( 1, 1 ) = idum1
601 param_check( 16, 2 ) = 1205
602 param_check( 15, 2 ) = 1204
603 param_check( 14, 2 ) = 1203
604 param_check( 13, 2 ) = 1202
605 param_check( 12, 2 ) = 1201
606 param_check( 11, 2 ) = 11
607 param_check( 10, 2 ) = 905
608 param_check( 9, 2 ) = 904
609 param_check( 8, 2 ) = 903
610 param_check( 7, 2 ) = 901
611 param_check( 6, 2 ) = 8
612 param_check( 5, 2 ) = 4
613 param_check( 4, 2 ) = 3
614 param_check( 3, 2 ) = 16
615 param_check( 2, 2 ) = 2
616 param_check( 1, 2 ) = 1
624 ELSE IF( info.LT.-descmult )
THEN
627 info = -info*descmult
632 CALL globchk( ictxt, 16, param_check, 16, param_check( 1, 3 ),
638 IF( info.EQ.bignum )
THEN
640 ELSE IF( mod( info, descmult ).EQ.0 )
THEN
641 info = -info / descmult
647 CALL pxerbla( ictxt,
'PSDTTRSV', -info )
663 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
665 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb )
THEN
666 part_offset = part_offset + nb
669 IF( mycol.LT.csrc )
THEN
670 part_offset = part_offset - nb
679 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
683 ja_new = mod( ja-1, nb ) + 1
688 np = ( ja_new+n-2 ) / nb + 1
692 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
699 desca_1xp( 2 ) = ictxt_new
700 descb_px1( 2 ) = ictxt_new
704 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
708 IF( myrow.LT.0 )
THEN
721 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
725 IF( mycol.EQ.0 )
THEN
726 part_offset = part_offset + mod( ja_new-1, part_size )
727 my_num_cols = my_num_cols - mod( ja_new-1, part_size )
732 odd_size = my_num_cols
733 IF( mycol.LT.np-1 )
THEN
734 odd_size = odd_size - int_one
739 work_u = int_one*odd_size + 3
745 IF( lsame( uplo,
'L' ) )
THEN
747 IF( lsame( trans,
'N' ) )
THEN
758 CALL sdttrsv( uplo,
'N', odd_size, nrhs,
759 $ dl( part_offset+2 ), d( part_offset+1 ),
760 $ du( part_offset+1 ), b( part_offset+1 ), lldb,
764 IF( mycol.LT.np-1 )
THEN
768 CALL saxpy( nrhs, -dl( part_offset+odd_size+1 ),
769 $ b( part_offset+odd_size ), lldb,
770 $ b( part_offset+odd_size+1 ), lldb )
775 IF( mycol.NE.0 )
THEN
779 CALL sgemm(
'T',
'N', int_one, nrhs, odd_size, -one,
780 $ af( 1 ), odd_size, b( part_offset+1 ), lldb,
781 $ zero, work( 1+int_one-int_one ), int_one )
792 IF( mycol.GT.0 )
THEN
794 CALL sgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
801 IF( mycol.LT.npcol-1 )
THEN
803 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
808 CALL smatadd( int_one, nrhs, one, work( 1 ), int_one,
809 $ one, b( part_offset+odd_size+1 ), lldb )
816 IF( mycol.EQ.npcol-1 )
THEN
831 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
836 IF( mycol-level_dist.GE.0 )
THEN
838 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
839 $ 0, mycol-level_dist )
841 CALL smatadd( int_one, nrhs, one, work( 1 ), int_one,
842 $ one, b( part_offset+odd_size+1 ), lldb )
848 IF( mycol+level_dist.LT.npcol-1 )
THEN
850 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
851 $ 0, mycol+level_dist )
853 CALL smatadd( int_one, nrhs, one, work( 1 ), int_one,
854 $ one, b( part_offset+odd_size+1 ), lldb )
858 level_dist = level_dist*2
871 CALL stbtrs(
'L',
'N',
'U', int_one,
872 $
min( int_one, int_one-1 ), nrhs,
873 $ af( odd_size+2 ), int_one+1,
874 $ b( part_offset+odd_size+1 ), lldb, info )
883 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
887 CALL sgemm(
'T',
'N', int_one, nrhs, int_one, -one,
888 $ af( ( odd_size )*int_one+1 ), int_one,
889 $ b( part_offset+odd_size+1 ), lldb, zero,
890 $ work( 1 ), int_one )
894 CALL sgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
895 $ 0, mycol+level_dist )
901 IF( ( mycol / level_dist.GT.0 ) .AND.
902 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
909 CALL sgemm(
'N',
'N', int_one, nrhs, int_one, -one,
910 $ af( odd_size*int_one+2+1 ), int_one,
911 $ b( part_offset+odd_size+1 ), lldb, zero,
912 $ work( 1 ), int_one )
916 CALL sgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
917 $ 0, mycol-level_dist )
936 IF( mycol.EQ.npcol-1 )
THEN
944 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
947 level_dist = level_dist*2
953 IF( ( mycol / level_dist.GT.0 ) .AND.
954 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
959 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
960 $ 0, mycol-level_dist )
966 CALL sgemm(
'T',
'N', int_one, nrhs, int_one, -one,
967 $ af( odd_size*int_one+2+1 ), int_one,
968 $ work( 1 ), int_one, one,
969 $ b( part_offset+odd_size+1 ), lldb )
974 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
978 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
979 $ 0, mycol+level_dist )
983 CALL sgemm(
'N',
'N', int_one, nrhs, int_one, -one,
984 $ af( ( odd_size )*int_one+1 ), int_one,
985 $ work( 1 ), int_one, one,
986 $ b( part_offset+odd_size+1 ), lldb )
994 CALL stbtrs(
'L',
'T',
'U', int_one,
995 $
min( int_one, int_one-1 ), nrhs,
996 $ af( odd_size+2 ), int_one+1,
997 $ b( part_offset+odd_size+1 ), lldb, info )
1008 IF( level_dist.EQ.1 )
1011 level_dist = level_dist / 2
1015 IF( mycol+level_dist.LT.npcol-1 )
THEN
1017 CALL sgesd2d( ictxt, int_one, nrhs,
1018 $ b( part_offset+odd_size+1 ), lldb, 0,
1019 $ mycol+level_dist )
1025 IF( mycol-level_dist.GE.0 )
THEN
1027 CALL sgesd2d( ictxt, int_one, nrhs,
1028 $ b( part_offset+odd_size+1 ), lldb, 0,
1029 $ mycol-level_dist )
1047 IF( mycol.LT.npcol-1 )
THEN
1049 CALL sgesd2d( ictxt, int_one, nrhs,
1050 $ b( part_offset+odd_size+1 ), lldb, 0,
1057 IF( mycol.GT.0 )
THEN
1059 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1070 IF( mycol.NE.0 )
THEN
1074 CALL sgemm(
'N',
'N', odd_size, nrhs, int_one, -one,
1075 $ af( 1 ), odd_size, work( 1+int_one-int_one ),
1076 $ int_one, one, b( part_offset+1 ), lldb )
1081 IF( mycol.LT.np-1 )
THEN
1085 CALL saxpy( nrhs, -( dl( part_offset+odd_size+1 ) ),
1086 $ b( part_offset+odd_size+1 ), lldb,
1087 $ b( part_offset+odd_size ), lldb )
1093 CALL sdttrsv( uplo,
'T', odd_size, nrhs,
1094 $ dl( part_offset+2 ), d( part_offset+1 ),
1095 $ du( part_offset+1 ), b( part_offset+1 ), lldb,
1106 IF( lsame( trans,
'T' ) )
THEN
1117 CALL sdttrsv( uplo,
'T', odd_size, nrhs,
1118 $ dl( part_offset+2 ), d( part_offset+1 ),
1119 $ du( part_offset+1 ), b( part_offset+1 ), lldb,
1123 IF( mycol.LT.np-1 )
THEN
1127 CALL saxpy( nrhs, -( du( part_offset+odd_size ) ),
1128 $ b( part_offset+odd_size ), lldb,
1129 $ b( part_offset+odd_size+1 ), lldb )
1134 IF( mycol.NE.0 )
THEN
1138 CALL sgemm(
'T',
'N', int_one, nrhs, odd_size, -one,
1139 $ af( work_u+1 ), odd_size, b( part_offset+1 ),
1140 $ lldb, zero, work( 1+int_one-int_one ),
1152 IF( mycol.GT.0 )
THEN
1154 CALL sgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1161 IF( mycol.LT.npcol-1 )
THEN
1163 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1168 CALL smatadd( int_one, nrhs, one, work( 1 ), int_one,
1169 $ one, b( part_offset+odd_size+1 ), lldb )
1176 IF( mycol.EQ.npcol-1 )
THEN
1191 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
1196 IF( mycol-level_dist.GE.0 )
THEN
1198 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1199 $ 0, mycol-level_dist )
1201 CALL smatadd( int_one, nrhs, one, work( 1 ), int_one,
1202 $ one, b( part_offset+odd_size+1 ), lldb )
1208 IF( mycol+level_dist.LT.npcol-1 )
THEN
1210 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1211 $ 0, mycol+level_dist )
1213 CALL smatadd( int_one, nrhs, one, work( 1 ), int_one,
1214 $ one, b( part_offset+odd_size+1 ), lldb )
1218 level_dist = level_dist*2
1231 CALL stbtrs(
'U',
'T',
'N', int_one,
1232 $
min( int_one, int_one-1 ), nrhs,
1233 $ af( odd_size+2 ), int_one+1,
1234 $ b( part_offset+odd_size+1 ), lldb, info )
1236 IF( info.NE.0 )
THEN
1243 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1247 CALL sgemm(
'T',
'N', int_one, nrhs, int_one, -one,
1248 $ af( work_u+( odd_size )*int_one+1 ), int_one,
1249 $ b( part_offset+odd_size+1 ), lldb, zero,
1250 $ work( 1 ), int_one )
1254 CALL sgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1255 $ 0, mycol+level_dist )
1261 IF( ( mycol / level_dist.GT.0 ) .AND.
1262 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
1269 CALL sgemm(
'N',
'N', int_one, nrhs, int_one, -one,
1270 $ af( work_u+odd_size*int_one+2+1 ), int_one,
1271 $ b( part_offset+odd_size+1 ), lldb, zero,
1272 $ work( 1 ), int_one )
1276 CALL sgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1277 $ 0, mycol-level_dist )
1296 IF( mycol.EQ.npcol-1 )
THEN
1304 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
1307 level_dist = level_dist*2
1313 IF( ( mycol / level_dist.GT.0 ) .AND.
1314 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
1319 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1320 $ 0, mycol-level_dist )
1326 CALL sgemm(
'T',
'N', int_one, nrhs, int_one, -one,
1327 $ af( work_u+odd_size*int_one+2+1 ), int_one,
1328 $ work( 1 ), int_one, one,
1329 $ b( part_offset+odd_size+1 ), lldb )
1334 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1338 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1339 $ 0, mycol+level_dist )
1343 CALL sgemm(
'N',
'N', int_one, nrhs, int_one, -one,
1344 $ af( work_u+( odd_size )*int_one+1 ), int_one,
1345 $ work( 1 ), int_one, one,
1346 $ b( part_offset+odd_size+1 ), lldb )
1354 CALL stbtrs(
'U',
'N',
'N', int_one,
1355 $
min( int_one, int_one-1 ), nrhs,
1356 $ af( odd_size+2 ), int_one+1,
1357 $ b( part_offset+odd_size+1 ), lldb, info )
1359 IF( info.NE.0 )
THEN
1368 IF( level_dist.EQ.1 )
1371 level_dist = level_dist / 2
1375 IF( mycol+level_dist.LT.npcol-1 )
THEN
1377 CALL sgesd2d( ictxt, int_one, nrhs,
1378 $ b( part_offset+odd_size+1 ), lldb, 0,
1379 $ mycol+level_dist )
1385 IF( mycol-level_dist.GE.0 )
THEN
1387 CALL sgesd2d( ictxt, int_one, nrhs,
1388 $ b( part_offset+odd_size+1 ), lldb, 0,
1389 $ mycol-level_dist )
1407 IF( mycol.LT.npcol-1 )
THEN
1409 CALL sgesd2d( ictxt, int_one, nrhs,
1410 $ b( part_offset+odd_size+1 ), lldb, 0,
1417 IF( mycol.GT.0 )
THEN
1419 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1430 IF( mycol.NE.0 )
THEN
1434 CALL sgemm(
'N',
'N', odd_size, nrhs, int_one, -one,
1435 $ af( work_u+1 ), odd_size,
1436 $ work( 1+int_one-int_one ), int_one, one,
1437 $ b( part_offset+1 ), lldb )
1442 IF( mycol.LT.np-1 )
THEN
1446 CALL saxpy( nrhs, -( du( part_offset+odd_size ) ),
1447 $ b( part_offset+odd_size+1 ), lldb,
1448 $ b( part_offset+odd_size ), lldb )
1454 CALL sdttrsv( uplo,
'N', odd_size, nrhs,
1455 $ du( part_offset+2 ), d( part_offset+1 ),
1456 $ du( part_offset+1 ), b( part_offset+1 ), lldb,
1470 IF( ictxt_save.NE.ictxt_new )
THEN
1471 CALL blacs_gridexit( ictxt_new )
1483 work( 1 ) = work_size_min