1 SUBROUTINE psgbtrs( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV,
2 $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO )
10 INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS
13 INTEGER DESCA( * ), DESCB( * ), IPIV( * )
14 REAL A( * ), AF( * ), B( * ), WORK( * )
369 parameter( one = 1.0e+0 )
371 parameter( zero = 0.0e+0 )
373 parameter( int_one = 1 )
374 INTEGER DESCMULT, BIGNUM
375 parameter( descmult = 100, bignum = descmult*descmult )
376 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
377 $ lld_, mb_, m_, nb_, n_, rsrc_
378 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
379 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
380 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
383 INTEGER APTR, BBPTR, BM, BMN, BN, BNN, BW, CSRC,
384 $ first_proc, ictxt, ictxt_new, ictxt_save,
385 $ idum2, idum3, j, ja_new, l, lbwl, lbwu, ldbb,
386 $ ldw, llda, lldb, lm, lmj, ln, lptr, mycol,
387 $ myrow, nb, neicol, np, npact, npcol, nprow,
388 $ npstr, np_save, odd_size, part_offset,
389 $ recovery_val, return_code, store_m_b,
390 $ store_n_a, work_size_min, wptr
393 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
394 $ param_check( 17, 3 )
397 EXTERNAL blacs_gridexit, blacs_gridinfo, scopy,
399 $ sgesd2d, sgetrs, slamov, slaswp, sscal, sswap,
405 EXTERNAL lsame, numroc
408 INTRINSIC ichar,
max,
min, mod
425 IF( return_code.NE.0 )
THEN
431 IF( return_code.NE.0 )
THEN
438 IF( desca_1xp( 2 ).NE.descb_px1( 2 ) )
THEN
446 IF( desca_1xp( 4 ).NE.descb_px1( 4 ) )
THEN
452 IF( desca_1xp( 5 ).NE.descb_px1( 5 ) )
THEN
458 ictxt = desca_1xp( 2 )
459 csrc = desca_1xp( 5 )
461 llda = desca_1xp( 6 )
462 store_n_a = desca_1xp( 3 )
463 lldb = descb_px1( 6 )
464 store_m_b = descb_px1( 3 )
469 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
474 IF( lsame( trans,
'N' ) )
THEN
476 ELSE IF( lsame( trans,
'T' ) )
THEN
478 ELSE IF( lsame( trans,
'C' ) )
THEN
484 IF( lwork.LT.-1 )
THEN
486 ELSE IF( lwork.EQ.-1 )
THEN
496 IF( n+ja-1.GT.store_n_a )
THEN
500 IF( ( bwl.GT.n-1 ) .OR. ( bwl.LT.0 ) )
THEN
504 IF( ( bwu.GT.n-1 ) .OR. ( bwu.LT.0 ) )
THEN
508 IF( llda.LT.( 2*bwl+2*bwu+1 ) )
THEN
518 IF( n+ib-1.GT.store_m_b )
THEN
522 IF( lldb.LT.nb )
THEN
538 IF( nprow.NE.1 )
THEN
542 IF( n.GT.np*nb-mod( ja-1, nb ) )
THEN
544 CALL pxerbla( ictxt,
'PSGBTRS, D&C alg.: only 1 block per proc'
549 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.( bwl+bwu+1 ) ) )
THEN
551 CALL pxerbla( ictxt,
'PSGBTRS, D&C alg.: NB too small', -info )
558 work_size_min = nrhs*( nb+2*bwl+4*bwu )
560 work( 1 ) = work_size_min
562 IF( lwork.LT.work_size_min )
THEN
563 IF( lwork.NE.-1 )
THEN
565 CALL pxerbla( ictxt,
'PSGBTRS: worksize error ', -info )
572 param_check( 17, 1 ) = descb( 5 )
573 param_check( 16, 1 ) = descb( 4 )
574 param_check( 15, 1 ) = descb( 3 )
575 param_check( 14, 1 ) = descb( 2 )
576 param_check( 13, 1 ) = descb( 1 )
577 param_check( 12, 1 ) = ib
578 param_check( 11, 1 ) = desca( 5 )
579 param_check( 10, 1 ) = desca( 4 )
580 param_check( 9, 1 ) = desca( 3 )
581 param_check( 8, 1 ) = desca( 1 )
582 param_check( 7, 1 ) = ja
583 param_check( 6, 1 ) = nrhs
584 param_check( 5, 1 ) = bwu
585 param_check( 4, 1 ) = bwl
586 param_check( 3, 1 ) = n
587 param_check( 2, 1 ) = idum3
588 param_check( 1, 1 ) = idum2
590 param_check( 17, 2 ) = 1105
591 param_check( 16, 2 ) = 1104
592 param_check( 15, 2 ) = 1103
593 param_check( 14, 2 ) = 1102
594 param_check( 13, 2 ) = 1101
595 param_check( 12, 2 ) = 10
596 param_check( 11, 2 ) = 805
597 param_check( 10, 2 ) = 804
598 param_check( 9, 2 ) = 803
599 param_check( 8, 2 ) = 801
600 param_check( 7, 2 ) = 7
601 param_check( 6, 2 ) = 5
602 param_check( 5, 2 ) = 4
603 param_check( 4, 2 ) = 3
604 param_check( 3, 2 ) = 2
605 param_check( 2, 2 ) = 16
606 param_check( 1, 2 ) = 1
614 ELSE IF( info.LT.-descmult )
THEN
617 info = -info*descmult
622 CALL globchk( ictxt, 17, param_check, 17, param_check( 1, 3 ),
628 IF( info.EQ.bignum )
THEN
630 ELSE IF( mod( info, descmult ).EQ.0 )
THEN
631 info = -info / descmult
637 CALL pxerbla( ictxt,
'PSGBTRS', -info )
653 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
655 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb )
THEN
656 part_offset = part_offset + nb
659 IF( mycol.LT.csrc )
THEN
660 part_offset = part_offset - nb
669 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
673 ja_new = mod( ja-1, nb ) + 1
678 np = ( ja_new+n-2 ) / nb + 1
682 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
689 desca_1xp( 2 ) = ictxt_new
690 descb_px1( 2 ) = ictxt_new
694 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
698 IF( myrow.LT.0 )
THEN
708 IF( mycol.LT.npcol-1 )
THEN
709 CALL sgesd2d( ictxt, bwu, nrhs, b( nb-bwu+1 ), lldb, 0,
713 IF( mycol.LT.npcol-1 )
THEN
719 IF( mycol.GT.0 )
THEN
725 ldw = nb + bwu + 2*bw + bwu
727 CALL slamov(
'G', lm, nrhs, b( 1 ), lldb, work( wptr ), ldw )
732 DO 10 l = wptr + lm, ldw
733 work( ( j-1 )*ldw+l ) = zero
737 IF( mycol.GT.0 )
THEN
738 CALL sgerv2d( ictxt, bwu, nrhs, work( 1 ), ldw, 0, mycol-1 )
747 odd_size = numroc( n, nb, mycol, 0, npcol )
749 IF( mycol.NE.0 )
THEN
759 IF( mycol.NE.npcol-1 )
THEN
762 ELSE IF( mycol.NE.0 )
THEN
764 ln =
max( odd_size-bw, 0 )
772 lmj =
min( lbwl, lm-j )
776 CALL sswap( nrhs, work( l ), ldw, work( j ), ldw )
779 lptr = bw + 1 + ( j-1 )*llda + aptr
781 CALL sger( lmj, nrhs, -one, a( lptr ), 1, work( j ), ldw,
793 IF( mycol.NE.npcol-1 )
THEN
797 bm =
min( bw, odd_size ) + bwu
798 bn =
min( bw, odd_size )
804 bbptr = ( nb+bwu )*bw + 1
807 IF( npcol.EQ.1 )
THEN
811 CALL sgetrs(
'N', n-ln, nrhs, af( bbptr+bw*ldbb ), ldbb,
812 $ ipiv( ln+1 ), work( ln+1 ), ldw, info )
831 IF( mod( mycol, npstr ).EQ.0 )
THEN
835 IF( mod( mycol, 2*npstr ).EQ.0 )
THEN
837 neicol = mycol + npstr
839 IF( neicol / npstr.LE.npact-1 )
THEN
841 IF( neicol / npstr.LT.npact-1 )
THEN
844 bmn =
min( bw, numroc( n, nb, neicol, 0, npcol ) ) +
848 CALL sgesd2d( ictxt, bm, nrhs, work( ln+1 ), ldw, 0,
851 IF( npact.NE.2 )
THEN
855 CALL sgerv2d( ictxt, bm+bmn-bw, nrhs, work( ln+1 ),
866 neicol = mycol - npstr
868 IF( neicol.EQ.0 )
THEN
874 CALL slamov(
'G', bm, nrhs, work( ln+1 ), ldw,
875 $ work( nb+bwu+bmn+1 ), ldw )
877 CALL sgerv2d( ictxt, bmn, nrhs, work( nb+bwu+1 ), ldw, 0,
882 IF( npact.NE.2 )
THEN
886 CALL slaswp( nrhs, work( nb+bwu+1 ), ldw, 1, bw,
889 CALL strsm(
'L',
'L',
'N',
'U', bw, nrhs, one,
890 $ af( bbptr+bw*ldbb ), ldbb, work( nb+bwu+1 ),
895 CALL sgemm(
'N',
'N', bm+bmn-bw, nrhs, bw, -one,
896 $ af( bbptr+bw*ldbb+bw ), ldbb,
897 $ work( nb+bwu+1 ), ldw, one,
898 $ work( nb+bwu+1+bw ), ldw )
902 CALL sgesd2d( ictxt, bm+bmn-bw, nrhs,
903 $ work( nb+bwu+1+bw ), ldw, 0, neicol )
909 CALL slaswp( nrhs, work( nb+bwu+1 ), ldw, 1, bm+bmn,
912 CALL strsm(
'L',
'L',
'N',
'U', bm+bmn, nrhs, one,
913 $ af( bbptr+bw*ldbb ), ldbb, work( nb+bwu+1 ),
919 npact = ( npact+1 ) / 2
934 IF( npcol.EQ.1 )
THEN
946 recovery_val = npact*npstr - npcol
961 npact = npact - mod( ( recovery_val / npstr ), 2 )
965 IF( mycol / npstr.LT.npact-1 )
THEN
968 bn =
min( bw, numroc( n, nb, npcol-1, 0, npcol ) )
973 IF( mod( mycol, 2*npstr ).EQ.0 )
THEN
975 neicol = mycol + npstr
977 IF( neicol / npstr.LE.npact-1 )
THEN
979 IF( neicol / npstr.LT.npact-1 )
THEN
983 bmn =
min( bw, numroc( n, nb, neicol, 0, npcol ) ) + bwu
984 bnn =
min( bw, numroc( n, nb, neicol, 0, npcol ) )
987 IF( npact.GT.2 )
THEN
989 CALL sgesd2d( ictxt, 2*bw, nrhs, work( ln+1 ), ldw, 0,
992 CALL sgerv2d( ictxt, bw, nrhs, work( ln+1 ), ldw, 0,
997 CALL sgerv2d( ictxt, bw, nrhs, work( ln+1 ), ldw, 0,
1007 neicol = mycol - npstr
1009 IF( neicol.EQ.0 )
THEN
1015 IF( neicol.LT.npcol-1 )
THEN
1018 bnn =
min( bw, numroc( n, nb, neicol, 0, npcol ) )
1021 IF( npact.GT.2 )
THEN
1025 CALL slamov(
'G', bw, nrhs, work( nb+bwu+1 ), ldw,
1026 $ work( nb+bwu+bw+1 ), ldw )
1028 CALL sgerv2d( ictxt, 2*bw, nrhs, work( ln+1 ), ldw, 0,
1031 CALL sgemm(
'N',
'N', bw, nrhs, bn, -one, af( bbptr ), ldbb,
1032 $ work( ln+1 ), ldw, one, work( nb+bwu+bw+1 ),
1036 IF( mycol.GT.npstr )
THEN
1038 CALL sgemm(
'N',
'N', bw, nrhs, bw, -one,
1039 $ af( bbptr+2*bw*ldbb ), ldbb, work( ln+bw+1 ),
1040 $ ldw, one, work( nb+bwu+bw+1 ), ldw )
1044 CALL strsm(
'L',
'U',
'N',
'N', bw, nrhs, one,
1045 $ af( bbptr+bw*ldbb ), ldbb, work( nb+bwu+bw+1 ),
1050 CALL sgesd2d( ictxt, bw, nrhs, work( nb+bwu+bw+1 ), ldw, 0,
1055 CALL slamov(
'G', bw, nrhs, work( nb+bwu+1+bw ), ldw,
1056 $ work( ln+bw+1 ), ldw )
1062 CALL strsm(
'L',
'U',
'N',
'N', bn+bnn, nrhs, one,
1063 $ af( bbptr+bw*ldbb ), ldbb, work( nb+bwu+1 ),
1068 CALL sgesd2d( ictxt, bw, nrhs, work( nb+bwu+1 ), ldw, 0,
1073 CALL slamov(
'G', bnn+bn-bw, nrhs, work( nb+bwu+1+bw ), ldw,
1074 $ work( ln+1 ), ldw )
1077 IF( ( nb+bwu+1 ).NE.( ln+1+bw ) )
THEN
1082 CALL scopy( nrhs, work( nb+bwu+j ), ldw,
1083 $ work( ln+bw+j ), ldw )
1103 IF( mycol.NE.npcol-1 )
THEN
1106 bm =
min( bw, odd_size ) + bwu
1111 IF( mycol.LT.npcol-1 )
THEN
1113 CALL sgesd2d( ictxt, bw, nrhs, work( nb-bw+1 ), ldw, 0,
1118 IF( mycol.GT.0 )
THEN
1120 CALL sgerv2d( ictxt, bw, nrhs, work( nb+bwu+1 ), ldw, 0,
1125 CALL sgemm(
'T',
'N', lm-bm, nrhs, bw, -one, af( 1 ), bw,
1126 $ work( nb+bwu+1 ), ldw, one, work( 1 ), ldw )
1132 lmj =
min( bw, odd_size-1 )
1134 lptr = bw - 1 + j*llda + aptr
1139 CALL sgemv(
'T', lmj, nrhs, -one, work( j+1 ), ldw, a( lptr ),
1140 $ llda-1, one, work( j ), ldw )
1144 CALL sscal( nrhs, one / a( lptr-llda+1 ), work( j ), ldw )
1149 CALL slamov(
'G', odd_size, nrhs, work( 1 ), ldw, b( 1 ), lldb )
1154 IF( ictxt.NE.ictxt_new )
THEN
1155 CALL blacs_gridexit( ictxt_new )
1166 work( 1 ) = work_size_min