1 SUBROUTINE pdgbtrf( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF,
9 INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N
12 INTEGER DESCA( * ), IPIV( * )
13 DOUBLE PRECISION A( * ), AF( * ), WORK( * )
357 parameter( one = 1.0d+0 )
358 DOUBLE PRECISION ZERO
359 parameter( zero = 0.0d+0 )
361 parameter( int_one = 1 )
362 INTEGER DESCMULT, BIGNUM
363 parameter( descmult = 100, bignum = descmult*descmult )
364 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
365 $ lld_, mb_, m_, nb_, n_, rsrc_
366 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
367 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
368 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
371 INTEGER APTR, BBPTR, BIPTR, BM, BM1, BM2, BMN, BN, BW,
372 $ csrc, dbptr, first_proc, i, i1, i2, ictxt,
373 $ ictxt_new, ictxt_save, idum3, j, ja_new, jptr,
374 $ l, laf_min, lbwl, lbwu, ldb, ldbb, llda, lm,
375 $ lmj, ln, lnj, lptr, mycol, myrow, my_num_cols,
376 $ nb, neicol, np, npact, npcol, nprow, npstr,
377 $ np_save, nrhs, odd_n, odd_size, odptr, ofst,
378 $ part_offset, part_size, return_code, store_n_a,
382 INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 )
386 $ dgbtrf, dgemm, dger, dgerv2d, dgesd2d, dgetrf,
387 $ dlamov, dlaswp,
dlatcpy, dswap, dtrrv2d,
388 $ dtrsd2d, dtrsm,
globchk, igamx2d, igebr2d,
412 IF( return_code.NE.0 )
THEN
418 ictxt = desca_1xp( 2 )
419 csrc = desca_1xp( 5 )
421 llda = desca_1xp( 6 )
422 store_n_a = desca_1xp( 3 )
427 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
432 IF( lwork.LT.-1 )
THEN
434 ELSE IF( lwork.EQ.-1 )
THEN
444 IF( n+ja-1.GT.store_n_a )
THEN
448 IF( ( bwl.GT.n-1 ) .OR. ( bwl.LT.0 ) )
THEN
452 IF( ( bwu.GT.n-1 ) .OR. ( bwu.LT.0 ) )
THEN
456 IF( llda.LT.( 2*bwl+2*bwu+1 ) )
THEN
468 IF( nprow.NE.1 )
THEN
472 IF( n.GT.np*nb-mod( ja-1, nb ) )
THEN
474 CALL pxerbla( ictxt,
'PDGBTRF, D&C alg.: only 1 block per proc'
479 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.( bwl+bwu+1 ) ) )
THEN
481 CALL pxerbla( ictxt,
'PDGBTRF, D&C alg.: NB too small', -info )
488 laf_min = ( nb+bwu )*( bwl+bwu ) + 6*( bwl+bwu )*( bwl+2*bwu )
490 IF( laf.LT.laf_min )
THEN
494 CALL pxerbla( ictxt,
'PDGBTRF: auxiliary storage error ',
503 work( 1 ) = work_size_min
505 IF( lwork.LT.work_size_min )
THEN
506 IF( lwork.NE.-1 )
THEN
509 work( 1 ) = work_size_min
510 CALL pxerbla( ictxt,
'PDGBTRF: worksize error ', -info )
517 param_check( 9, 1 ) = desca( 5 )
518 param_check( 8, 1 ) = desca( 4 )
519 param_check( 7, 1 ) = desca( 3 )
520 param_check( 6, 1 ) = desca( 1 )
521 param_check( 5, 1 ) = ja
522 param_check( 4, 1 ) = bwu
523 param_check( 3, 1 ) = bwl
524 param_check( 2, 1 ) = n
525 param_check( 1, 1 ) = idum3
527 param_check( 9, 2 ) = 605
528 param_check( 8, 2 ) = 604
529 param_check( 7, 2 ) = 603
530 param_check( 6, 2 ) = 601
531 param_check( 5, 2 ) = 5
532 param_check( 4, 2 ) = 3
533 param_check( 3, 2 ) = 2
534 param_check( 2, 2 ) = 1
535 param_check( 1, 2 ) = 11
543 ELSE IF( info.LT.-descmult )
THEN
546 info = -info*descmult
551 CALL globchk( ictxt, 9, param_check, 9, param_check( 1, 3 ),
557 IF( info.EQ.bignum )
THEN
559 ELSE IF( mod( info, descmult ).EQ.0 )
THEN
560 info = -info / descmult
566 CALL pxerbla( ictxt,
'PDGBTRF', -info )
579 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
581 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb )
THEN
582 part_offset = part_offset + nb
585 IF( mycol.LT.csrc )
THEN
586 part_offset = part_offset - nb
595 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
599 ja_new = mod( ja-1, nb ) + 1
604 np = ( ja_new+n-2 ) / nb + 1
608 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
615 desca_1xp( 2 ) = ictxt_new
619 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
623 IF( myrow.LT.0 )
THEN
636 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
640 IF( mycol.EQ.0 )
THEN
641 part_offset = part_offset + mod( ja_new-1, part_size )
642 my_num_cols = my_num_cols - mod( ja_new-1, part_size )
647 ofst = part_offset*llda
651 odd_size = numroc( n, part_size, mycol, 0, npcol )
660 DO 30 j = 1, odd_size
662 a( i+( j-1 )*llda ) = zero
676 IF( mycol.LE.npcol-2 )
THEN
680 biptr = ( nb-bw )*llda + 2*bw + 1
682 CALL dtrsd2d( ictxt,
'U',
'N',
683 $
min( bw, bwu+numroc( n, nb, mycol+1, 0,
684 $ npcol ) ), bw, a( biptr ), llda-1, 0, mycol+1 )
699 IF( mycol.NE.0 )
THEN
709 IF( mycol.NE.npcol-1 )
THEN
712 ELSE IF( mycol.NE.0 )
THEN
714 ln =
max( odd_size-bw, 0 )
722 CALL dgbtrf( lm, ln, lbwl, lbwu, a( aptr ), llda, ipiv, info )
725 info = info + nb*mycol
737 DO 40 j =
max( ln-bw+1, 1 ), ln
739 lmj =
min( lbwl, lm-j )
740 lnj =
min( bw, j+bw-ln+aptr-1 )
744 jptr = j - ( ln+1 ) + 2*bw + 1 - lbwl + ln*llda
753 lptr = l - ( ln+1 ) + 2*bw + 1 - lbwl + ln*llda
755 CALL dswap( lnj, a( lptr ), ldb, a( jptr ), ldb )
762 lptr = bw + 1 + aptr + ( j-1 )*llda
764 CALL dger( lmj, lnj, -one, a( lptr ), 1, a( jptr ), ldb,
774 IF( mycol.GT.0 )
THEN
775 CALL dtrrv2d( ictxt,
'U',
'N',
min( bw, lm ), bw, af( 1 ), bw,
780 DO 60 i2 = 1,
min( bw, lm )
781 DO 50 i1 = i2 + 1, bw
782 af( i1+( i2-1 )*bw ) = af( i2+( i1-1 )*bw )
783 af( i2+( i1-1 )*bw ) = zero
791 lmj =
min( lbwl, lm-j )
795 CALL dswap( bw, af( ( l-1 )*bw+1 ), 1,
796 $ af( ( j-1 )*bw+1 ), 1 )
799 lptr = bw + 1 + aptr + ( j-1 )*llda
801 CALL dger( nrhs, lmj, -one, af( ( j-1 )*bw+1 ), 1,
802 $ a( lptr ), 1, af( j*bw+1 ), bw )
817 IF( mycol.NE.npcol-1 )
THEN
821 bm =
min( bw, odd_size ) + bwu
822 bn =
min( bw, odd_size )
828 bbptr = ( nb+bwu )*bw + 1
834 dbptr = bw + 1 + lbwu + ln*llda
836 CALL dlamov(
'G', bm, bn, a( dbptr ), llda-1, af( bbptr+bw*ldbb ),
842 DO 90 i = j + lbwl, bm - 1
843 af( bbptr+bw*ldbb+( j-1 )*ldbb+i ) = zero
847 IF( mycol.NE.0 )
THEN
851 odptr = ( lm-bm )*bw + 1
852 CALL dlatcpy(
'G', bw, bm, af( odptr ), bw,
853 $ af( bbptr+2*bw*ldbb ), ldbb )
856 IF( npcol.EQ.1 )
THEN
860 CALL dgetrf( n-ln, n-ln, af( bbptr+bw*ldbb ), ldbb,
861 $ ipiv( ln+1 ), info )
882 IF( mod( mycol, npstr ).EQ.0 )
THEN
887 IF( mod( mycol, 2*npstr ).EQ.0 )
THEN
891 neicol = mycol + npstr
893 IF( neicol / npstr.LT.npact-1 )
THEN
895 ELSE IF( neicol / npstr.EQ.npact-1 )
THEN
896 odd_n = numroc( n, nb, npcol-1, 0, npcol )
897 bmn =
min( bw, odd_n ) + bwu
909 IF( neicol / npstr.LE.npact-1 )
THEN
911 CALL dgesd2d( ictxt, bm, 2*bw, af( bbptr+bw*ldbb ), ldbb,
914 CALL dgerv2d( ictxt, bmn, 2*bw, af( bbptr+bm ), ldbb, 0,
917 IF( npact.EQ.2 )
THEN
921 CALL dlamov(
'G', bmn, bw, af( bbptr+bm ), ldbb,
922 $ af( bbptr+2*bw*ldbb+bm ), ldbb )
933 neicol = mycol - npstr
935 IF( neicol.EQ.0 )
THEN
944 CALL dgesd2d( ictxt, bm, 2*bw, af( bbptr+bw*ldbb ), ldbb, 0,
947 CALL dlamov(
'G', bm, 2*bw, af( bbptr+bw*ldbb ), ldbb,
948 $ af( bbptr+bmn ), ldbb )
950 DO 130 j = bbptr + 2*bw*ldbb, bbptr + 3*bw*ldbb - 1, ldbb
951 DO 120 i = 0, ldbb - 1
956 CALL dgerv2d( ictxt, bmn, 2*bw, af( bbptr+bw*ldbb ), ldbb,
959 IF( npact.EQ.2 )
THEN
963 CALL dlamov(
'G', bm, bw, af( bbptr+bmn ), ldbb,
964 $ af( bbptr+2*bw*ldbb+bmn ), ldbb )
971 IF( npact.NE.2 )
THEN
973 CALL dgetrf( bm+bmn, bw, af( bbptr+bw*ldbb ), ldbb,
974 $ ipiv( ln+1 ), info )
978 DO 150 j = bbptr, bbptr + bw*ldbb - 1, ldbb
979 DO 140 i = 0, bm1 - 1
984 CALL dlaswp( bw, af( bbptr ), ldbb, 1, bw, ipiv( ln+1 ), 1 )
986 CALL dtrsm(
'L',
'L',
'N',
'U', bw, bw, one,
987 $ af( bbptr+bw*ldbb ), ldbb, af( bbptr ), ldbb )
991 CALL dgemm(
'N',
'N', bm+bmn-bw, bw, bw, -one,
992 $ af( bbptr+bw*ldbb+bw ), ldbb, af( bbptr ), ldbb,
993 $ one, af( bbptr+bw ), ldbb )
999 CALL dlaswp( nrhs, af( bbptr+2*bw*ldbb ), ldbb, 1, bw,
1002 CALL dtrsm(
'L',
'L',
'N',
'U', bw, nrhs, one,
1003 $ af( bbptr+bw*ldbb ), ldbb,
1004 $ af( bbptr+2*bw*ldbb ), ldbb )
1008 CALL dgemm(
'N',
'N', bm+bmn-bw, nrhs, bw, -one,
1009 $ af( bbptr+bw*ldbb+bw ), ldbb,
1010 $ af( bbptr+2*bw*ldbb ), ldbb, one,
1011 $ af( bbptr+2*bw*ldbb+bw ), ldbb )
1016 IF( mod( mycol, 2*npstr ).EQ.0 )
THEN
1025 CALL dlamov(
'G', bm, bw, af( bbptr+bw ), ldbb,
1026 $ af( bbptr+bw*ldbb ), ldbb )
1027 CALL dlamov(
'G', bm, bw, af( bbptr+2*bw*ldbb+bw ), ldbb,
1028 $ af( bbptr+2*bw*ldbb ), ldbb )
1032 DO 170 j = 0, bw - 1
1033 DO 160 i = 0, bm - 1
1034 af( bbptr+2*bw*ldbb+bw+j*ldbb+i ) = zero
1044 CALL dgetrf( bm+bmn, bm+bmn, af( bbptr+bw*ldbb ), ldbb,
1045 $ ipiv( ln+1 ), info )
1054 npact = ( npact+1 ) / 2
1067 IF( ictxt.NE.ictxt_new )
THEN
1068 CALL blacs_gridexit( ictxt_new )
1082 work( 1 ) = work_size_min
1086 CALL igamx2d( ictxt,
'A',
' ', 1, 1, info, 1, info, info, -1, 0,
1089 IF( mycol.EQ.0 )
THEN
1090 CALL igebs2d( ictxt,
'A',
' ', 1, 1, info, 1 )
1092 CALL igebr2d( ictxt,
'A',
' ', 1, 1, info, 1, 0, 0 )