1 SUBROUTINE pcgbtrs( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV,
2 $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO )
10 INTEGER BWU, BWL, IB, INFO, JA, LAF, LWORK, N, NRHS
13 INTEGER DESCA( * ), DESCB( * ), IPIV(*)
14 COMPLEX A( * ), AF( * ), B( * ), WORK( * )
373 parameter( one = 1.0e+0 )
374 parameter( zero = 0.0e+0 )
376 parameter( cone = ( 1.0e+0, 0.0e+0 ) )
377 parameter( czero = ( 0.0e+0, 0.0e+0 ) )
379 parameter( int_one = 1 )
380 INTEGER DESCMULT, BIGNUM
381 parameter( descmult = 100, bignum = descmult*descmult )
382 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
383 $ lld_, mb_, m_, nb_, n_, rsrc_
384 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
385 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
386 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
389 INTEGER APTR, BBPTR, BM, BMN, BN, BNN, BW, CSRC,
390 $ first_proc, ictxt, ictxt_new, ictxt_save,
391 $ idum2, idum3, j, ja_new, l, lbwl, lbwu, ldbb,
392 $ ldw, llda, lldb, lm, lmj, ln, lptr, mycol,
393 $ myrow, nb, neicol, np, npact, npcol, nprow,
394 $ npstr, np_save, odd_size, part_offset,
395 $ recovery_val, return_code, store_m_b,
396 $ store_n_a, work_size_min, wptr
399 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
400 $ param_check( 17, 3 )
430 IF( return_code .NE. 0)
THEN
431 info = -( 8*100 + 2 )
436 IF( return_code .NE. 0)
THEN
437 info = -( 11*100 + 2 )
443 IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) )
THEN
444 info = -( 11*100 + 2 )
451 IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) )
THEN
452 info = -( 11*100 + 4 )
457 IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) )
THEN
458 info = -( 11*100 + 5 )
463 ictxt = desca_1xp( 2 )
464 csrc = desca_1xp( 5 )
466 llda = desca_1xp( 6 )
467 store_n_a = desca_1xp( 3 )
468 lldb = descb_px1( 6 )
469 store_m_b = descb_px1( 3 )
474 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
479 IF( lsame( trans,
'N' ) )
THEN
481 ELSE IF ( lsame( trans,
'C' ) )
THEN
487 IF( lwork .LT. -1)
THEN
489 ELSE IF ( lwork .EQ. -1 )
THEN
499 IF( n+ja-1 .GT. store_n_a )
THEN
500 info = -( 8*100 + 6 )
503 IF(( bwl .GT. n-1 ) .OR.
504 $ ( bwl .LT. 0 ) )
THEN
508 IF(( bwu .GT. n-1 ) .OR.
509 $ ( bwu .LT. 0 ) )
THEN
513 IF( llda .LT. (2*bwl+2*bwu+1) )
THEN
514 info = -( 8*100 + 6 )
518 info = -( 8*100 + 4 )
523 IF( n+ib-1 .GT. store_m_b )
THEN
524 info = -( 11*100 + 3 )
527 IF( lldb .LT. nb )
THEN
528 info = -( 11*100 + 6 )
531 IF( nrhs .LT. 0 )
THEN
543 IF( nprow .NE. 1 )
THEN
547 IF( n .GT. np*nb-mod( ja-1, nb ))
THEN
550 $
'PCGBTRS, D&C alg.: only 1 block per proc',
555 IF((ja+n-1.GT.nb) .AND. ( nb.LT.(bwl+bwu+1) ))
THEN
558 $
'PCGBTRS, D&C alg.: NB too small',
566 work_size_min = nrhs*(nb+2*bwl+4*bwu)
568 work( 1 ) = work_size_min
570 IF( lwork .LT. work_size_min )
THEN
571 IF( lwork .NE. -1 )
THEN
574 $
'PCGBTRS: worksize error ',
582 param_check( 17, 1 ) = descb(5)
583 param_check( 16, 1 ) = descb(4)
584 param_check( 15, 1 ) = descb(3)
585 param_check( 14, 1 ) = descb(2)
586 param_check( 13, 1 ) = descb(1)
587 param_check( 12, 1 ) = ib
588 param_check( 11, 1 ) = desca(5)
589 param_check( 10, 1 ) = desca(4)
590 param_check( 9, 1 ) = desca(3)
591 param_check( 8, 1 ) = desca(1)
592 param_check( 7, 1 ) = ja
593 param_check( 6, 1 ) = nrhs
594 param_check( 5, 1 ) = bwu
595 param_check( 4, 1 ) = bwl
596 param_check( 3, 1 ) = n
597 param_check( 2, 1 ) = idum3
598 param_check( 1, 1 ) = idum2
600 param_check( 17, 2 ) = 1105
601 param_check( 16, 2 ) = 1104
602 param_check( 15, 2 ) = 1103
603 param_check( 14, 2 ) = 1102
604 param_check( 13, 2 ) = 1101
605 param_check( 12, 2 ) = 10
606 param_check( 11, 2 ) = 805
607 param_check( 10, 2 ) = 804
608 param_check( 9, 2 ) = 803
609 param_check( 8, 2 ) = 801
610 param_check( 7, 2 ) = 7
611 param_check( 6, 2 ) = 5
612 param_check( 5, 2 ) = 4
613 param_check( 4, 2 ) = 3
614 param_check( 3, 2 ) = 2
615 param_check( 2, 2 ) = 16
616 param_check( 1, 2 ) = 1
624 ELSE IF( info.LT.-descmult )
THEN
627 info = -info * descmult
632 CALL globchk( ictxt, 17, param_check, 17,
633 $ param_check( 1, 3 ), info )
638 IF( info.EQ.bignum )
THEN
640 ELSE IF( mod( info, descmult ) .EQ. 0 )
THEN
641 info = -info / descmult
647 CALL pxerbla( ictxt,
'PCGBTRS', -info )
663 part_offset = nb*( (ja-1)/(npcol*nb) )
665 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb )
THEN
666 part_offset = part_offset + nb
669 IF ( mycol .LT. csrc )
THEN
670 part_offset = part_offset - nb
679 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
683 ja_new = mod( ja-1, nb ) + 1
688 np = ( ja_new+n-2 )/nb + 1
692 CALL reshape( ictxt, int_one, ictxt_new, int_one,
693 $ first_proc, int_one, np )
699 desca_1xp( 2 ) = ictxt_new
700 descb_px1( 2 ) = ictxt_new
704 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
708 IF( myrow .LT. 0 )
THEN
718 IF (mycol .LT. npcol-1)
THEN
719 CALL cgesd2d( ictxt, bwu, nrhs, b(nb-bwu+1), lldb,
723 IF (mycol .LT. npcol-1)
THEN
729 IF (mycol .GT. 0)
THEN
735 ldw = nb+bwu + 2*bw+bwu
737 CALL clamov(
'G', lm, nrhs, b(1), lldb, work( wptr ), ldw )
742 DO 1502 l=wptr+lm, ldw
743 work( (j-1)*ldw+l ) = czero
747 IF (mycol .GT. 0)
THEN
748 CALL cgerv2d( ictxt, bwu, nrhs, work(1), ldw,
758 odd_size = numroc( n, nb, mycol, 0, npcol )
760 IF (mycol .NE. 0)
THEN
770 IF (mycol .NE. npcol-1)
THEN
773 ELSE IF (mycol .NE. 0)
THEN
775 ln =
max(odd_size-bw,0)
787 CALL cswap(nrhs, work(l), ldw, work(j), ldw)
790 lptr = bw+1 + (j-1)*llda + aptr
792 CALL cgeru(lmj,nrhs,-cone, a(lptr),1, work(j),ldw,
804 IF (mycol .NE. npcol-1)
THEN
808 bm =
min(bw,odd_size) + bwu
809 bn =
min(bw,odd_size)
815 bbptr = (nb+bwu)*bw + 1
822 CALL cgetrs(
'N', n-ln, nrhs, af(bbptr+bw*ldbb), ldbb,
823 $ ipiv(ln+1), work( ln+1 ), ldw, info)
837 200
IF (npact .LE. 1)
GOTO 300
840 IF (mod(mycol,npstr) .EQ. 0)
THEN
844 IF (mod(mycol,2*npstr) .EQ. 0)
THEN
846 neicol = mycol + npstr
848 IF (neicol/npstr .LE. npact-1)
THEN
850 IF (neicol/npstr .LT. npact-1)
THEN
853 bmn =
min(bw,numroc(n, nb, neicol, 0, npcol))+bwu
856 CALL cgesd2d( ictxt, bm, nrhs,
857 $ work(ln+1), ldw, 0, neicol )
859 IF( npact .NE. 2 )
THEN
863 CALL cgerv2d(ictxt, bm+bmn-bw, nrhs,
864 $ work( ln+1 ), ldw, 0, neicol )
874 neicol = mycol - npstr
876 IF (neicol .EQ. 0)
THEN
882 CALL clamov(
'G', bm, nrhs, work(ln+1), ldw,
883 $ work(nb+bwu+bmn+1), ldw )
885 CALL cgerv2d( ictxt, bmn, nrhs, work( nb+bwu+1 ),
890 IF (npact .NE. 2)
THEN
894 CALL claswp( nrhs, work(nb+bwu+1), ldw, 1, bw,
897 CALL ctrsm(
'L',
'L',
'N',
'U', bw, nrhs, cone,
898 $ af(bbptr+bw*ldbb), ldbb, work(nb+bwu+1), ldw)
902 CALL cgemm(
'N',
'N', bm+bmn-bw, nrhs, bw,
903 $ -cone, af(bbptr+bw*ldbb+bw), ldbb,
904 $ work(nb+bwu+1), ldw,
905 $ cone, work(nb+bwu+1+bw), ldw )
909 CALL cgesd2d( ictxt, bm+bmn-bw, nrhs,
910 $ work(nb+bwu+1+bw), ldw, 0, neicol )
916 CALL claswp( nrhs, work(nb+bwu+1), ldw, 1, bm+bmn,
919 CALL ctrsm(
'L',
'L',
'N',
'U', bm+bmn, nrhs, cone,
920 $ af(bbptr+bw*ldbb), ldbb, work(nb+bwu+1), ldw)
925 npact = (npact + 1)/2
952 recovery_val = npact*npstr - npcol
957 2200
IF( npact .GE. npcol )
GOTO 2300
965 npact = npact-mod( (recovery_val/npstr), 2 )
969 IF( mycol/npstr .LT. npact-1 )
THEN
972 bn =
min(bw, numroc(n, nb, npcol-1, 0, npcol) )
977 IF( mod( mycol, 2*npstr ) .EQ. 0 )
THEN
981 IF( neicol/npstr .LE. npact-1 )
THEN
983 IF( neicol/npstr .LT. npact-1 )
THEN
987 bmn =
min(bw,numroc(n, nb, neicol, 0, npcol))+bwu
988 bnn =
min(bw, numroc(n, nb, neicol, 0, npcol) )
991 IF( npact .GT. 2 )
THEN
993 CALL cgesd2d( ictxt, 2*bw, nrhs, work( ln+1 ),
996 CALL cgerv2d( ictxt, bw, nrhs, work( ln+1 ),
1001 CALL cgerv2d( ictxt, bw, nrhs, work( ln+1 ),
1011 neicol = mycol - npstr
1013 IF (neicol .EQ. 0)
THEN
1019 IF( neicol .LT. npcol-1 )
THEN
1022 bnn =
min(bw, numroc(n, nb, neicol, 0, npcol) )
1025 IF( npact .GT. 2 )
THEN
1029 CALL clamov(
'G', bw, nrhs, work(nb+bwu+1),
1030 $ ldw, work(nb+bwu+bw+1), ldw )
1032 CALL cgerv2d( ictxt, 2*bw, nrhs, work( ln+1 ),
1035 CALL cgemm(
'N',
'N', bw, nrhs, bn,
1036 $ -cone, af(bbptr), ldbb,
1038 $ cone, work(nb+bwu+bw+1), ldw )
1041 IF( mycol .GT. npstr )
THEN
1043 CALL cgemm(
'N',
'N', bw, nrhs, bw,
1044 $ -cone, af(bbptr+2*bw*ldbb), ldbb,
1045 $ work(ln+bw+1), ldw,
1046 $ cone, work(nb+bwu+bw+1), ldw )
1050 CALL ctrsm(
'L',
'U',
'N',
'N', bw, nrhs, cone,
1051 $ af(bbptr+bw*ldbb), ldbb, work(nb+bwu+bw+1), ldw)
1055 CALL cgesd2d( ictxt, bw, nrhs,
1056 $ work( nb+bwu+bw+1 ), ldw, 0, neicol )
1060 CALL clamov(
'G', bw, nrhs, work(nb+bwu+1+bw),
1061 $ ldw, work(ln+bw+1), ldw )
1067 CALL ctrsm(
'L',
'U',
'N',
'N', bn+bnn, nrhs, cone,
1068 $ af(bbptr+bw*ldbb), ldbb, work(nb+bwu+1), ldw)
1072 CALL cgesd2d( ictxt, bw, nrhs,
1073 $ work(nb+bwu+1), ldw, 0, neicol )
1077 CALL clamov(
'G', bnn+bn-bw, nrhs, work(nb+bwu+1+bw),
1078 $ ldw, work(ln+1), ldw )
1081 IF( (nb+bwu+1) .NE. (ln+1+bw) )
THEN
1086 CALL ccopy( nrhs, work(nb+bwu+j), ldw,
1087 $ work(ln+bw+j), ldw )
1107 IF (mycol .NE. npcol-1)
THEN
1110 bm =
min(bw,odd_size) + bwu
1115 IF( mycol .LT. npcol-1 )
THEN
1117 CALL cgesd2d( ictxt, bw, nrhs, work( nb-bw+1 ),
1122 IF( mycol .GT. 0 )
THEN
1124 CALL cgerv2d( ictxt, bw, nrhs, work( nb+bwu+1 ),
1129 CALL cgemm(
'N',
'N', lm-bm, nrhs, bw, -cone,
1130 $ af( 1 ), lm, work( nb+bwu+1 ), ldw, cone,
1135 DO 2021 j = ln, 1, -1
1137 lmj =
min( bw, odd_size-1 )
1139 lptr = bw-1+j*llda+aptr
1144 CALL cgemv(
'T', lmj, nrhs, -cone, work( j+1), ldw,
1145 $ a( lptr ), llda-1, cone, work( j ), ldw )
1149 CALL cscal( nrhs, cone/a( lptr-llda+1 ),
1155 CALL clamov(
'G', odd_size, nrhs, work( 1 ), ldw,
1161 IF( ictxt .NE. ictxt_new )
THEN
1162 CALL blacs_gridexit( ictxt_new )
1173 work( 1 ) = work_size_min