1 SUBROUTINE pdlarfb( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV,
2 $ JV, DESCV, T, C, IC, JC, DESCC, WORK )
9 CHARACTER SIDE, TRANS, DIRECT, STOREV
10 INTEGER IC, IV, JC, JV, K, M, N
13 INTEGER DESCC( * ), DESCV( * )
14 DOUBLE PRECISION C( * ), T( * ), V( * ), WORK( * )
216 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
217 $ lld_, mb_, m_, nb_, n_, rsrc_
218 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
219 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
220 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
221 DOUBLE PRECISION ONE, ZERO
222 parameter( one = 1.0d+0, zero = 0.0d+0 )
226 CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO
227 INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW,
228 $ ictxt, ii, iibeg, iic, iiend, iinxt, iiv,
229 $ ilastcol, ilastrow, ileft, ioff, ioffc, ioffv,
230 $ ipt, ipv, ipw, ipw1, iright, iroffc, iroffv,
231 $ itop, ivcol, ivrow, jj, jjbeg, jjc, jjend,
232 $ jjnxt, jjv, kp, kq, ldc, ldv, lv, lw, mbv, mpc,
233 $ mpc0, mqv, mqv0, mycol, mydist, myrow, nbv,
234 $ npv, npv0, npcol, nprow, nqc, nqc0, wide
237 EXTERNAL blacs_gridinfo, dgebr2d, dgebs2d,dgemm,
238 $ dgsum2d, dlamov, dlaset, dtrbr2d,
247 INTEGER ICEIL, NUMROC
248 EXTERNAL iceil, lsame, numroc
254 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
259 ictxt = descc( ctxt_ )
260 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
262 IF( lsame( trans,
'N' ) )
THEN
267 forward = lsame( direct,
'F' )
274 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
276 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
280 iic =
min( iic, ldc )
281 iiv =
min( iiv, ldv )
282 iroffc = mod( ic-1, descc( mb_ ) )
283 icoffc = mod( jc-1, descc( nb_ ) )
286 iroffv = mod( iv-1, mbv )
287 icoffv = mod( jv-1, nbv )
288 mpc = numroc( m+iroffc, descc( mb_ ), myrow, icrow, nprow )
289 nqc = numroc( n+icoffc, descc( nb_ ), mycol, iccol, npcol )
294 jjc =
min( jjc,
max( 1, jjc+nqc-1 ) )
295 jjv =
min( jjv,
max( 1, numroc( descv( n_ ), nbv, mycol,
296 $ descv( csrc_ ), npcol ) ) )
297 ioffc = iic + ( jjc-1 ) * ldc
298 ioffv = iiv + ( jjv-1 ) * ldv
300 IF( lsame( storev,
'C' ) )
THEN
304 IF( lsame( side,
'L' ) )
THEN
319 CALL pb_topget( ictxt,
'Broadcast',
'Rowwise', rowbtop )
320 IF( mycol.EQ.ivcol )
THEN
321 CALL dgebs2d( ictxt,
'Rowwise', rowbtop, mpc, k,
324 $
CALL dtrbs2d( ictxt,
'Rowwise', rowbtop, uplo,
325 $
'Non unit', k, k, t, nbv )
326 CALL dlamov(
'All', mpc, k, v( ioffv ), ldv, work( ipv ),
329 CALL dgebr2d( ictxt,
'Rowwise', rowbtop, mpc, k,
330 $ work( ipv ), lv, myrow, ivcol )
332 $
CALL dtrbr2d( ictxt,
'Rowwise', rowbtop, uplo,
333 $
'Non unit', k, k, t, nbv, myrow, ivcol )
341 mydist = mod( myrow-ivrow+nprow, nprow )
342 itop =
max( 0, mydist*mbv - iroffv )
344 iiend = iibeg + mpc - 1
345 iinxt =
min( iceil( iibeg, mbv )*mbv, iiend )
348 IF( k-itop .GT.0 )
THEN
349 CALL dlaset(
'Upper', iinxt-iibeg+1, k-itop, zero,
350 $ one, work( ipv+iibeg-iiv+itop*lv ), lv )
351 mydist = mydist + nprow
352 itop = mydist * mbv - iroffv
354 iinxt =
min( iinxt+mbv, iiend )
364 ioff = mod( iv+m-k-1, mbv )
365 CALL infog1l( iv+m-k, mbv, nprow, myrow, descv( rsrc_ ),
367 kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
368 IF( myrow.EQ.ilastrow )
370 mydist = mod( myrow-ilastrow+nprow, nprow )
371 itop = mydist * mbv - ioff
372 ibase =
min( itop+mbv, k )
373 itop =
min(
max( 0, itop ), k )
376 IF( jj.LE.( jjv+k-1 ) )
THEN
377 height = ibase - itop
378 CALL dlaset(
'All', kp, itop-jj+jjv, zero, zero,
379 $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
380 CALL dlaset(
'Lower', kp, height, zero, one,
381 $ work( ipv+ii-iiv+itop*lv ), lv )
382 kp =
max( 0, kp - height )
385 mydist = mydist + nprow
386 itop = mydist * mbv - ioff
387 ibase =
min( itop + mbv, k )
388 itop =
min( itop, k )
397 CALL dgemm(
'Transpose',
'No transpose', nqc, k, mpc,
398 $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
401 CALL dlaset(
'All', nqc, k, zero, zero, work( ipw ), lw )
404 CALL dgsum2d( ictxt,
'Columnwise',
' ', nqc, k, work( ipw ),
407 IF( myrow.EQ.ivrow )
THEN
411 CALL dtrmm(
'Right', uplo, transt,
'Non unit', nqc, k,
412 $ one, t, nbv, work( ipw ), lw )
413 CALL dgebs2d( ictxt,
'Columnwise',
' ', nqc, k,
416 CALL dgebr2d( ictxt,
'Columnwise',
' ', nqc, k,
417 $ work( ipw ), lw, ivrow, mycol )
424 CALL dgemm(
'No transpose',
'Transpose', mpc, nqc, k, -one,
425 $ work( ipv ), lv, work( ipw ), lw, one,
435 npv0 = numroc( n+iroffv, mbv, myrow, ivrow, nprow )
436 IF( myrow.EQ.ivrow )
THEN
441 IF( mycol.EQ.iccol )
THEN
458 IF( mycol.EQ.ivcol )
THEN
459 IF( myrow.EQ.ivrow )
THEN
460 CALL dlaset(
'All', iroffv, k, zero, zero,
463 CALL dlamov(
'All', npv, k, v( ioffv ), ldv,
467 CALL dlamov(
'All', npv, k, v( ioffv ), ldv,
476 mydist = mod( myrow-ivrow+nprow, nprow )
477 itop =
max( 0, mydist*mbv - iroffv )
479 iiend = iibeg + npv - 1
480 iinxt =
min( iceil( iibeg, mbv )*mbv, iiend )
483 IF( ( k-itop ).GT.0 )
THEN
484 CALL dlaset(
'Upper', iinxt-iibeg+1, k-itop, zero,
485 $ one, work( ipw1+iibeg-iiv+itop*lw ),
487 mydist = mydist + nprow
488 itop = mydist * mbv - iroffv
490 iinxt =
min( iinxt+mbv, iiend )
500 CALL infog1l( iv+n-k, mbv, nprow, myrow,
501 $ descv( rsrc_ ), ii, ilastrow )
502 ioff = mod( iv+n-k-1, mbv )
503 kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
504 IF( myrow.EQ.ilastrow )
506 mydist = mod( myrow-ilastrow+nprow, nprow )
507 itop = mydist * mbv - ioff
508 ibase =
min( itop+mbv, k )
509 itop =
min(
max( 0, itop ), k )
512 IF( jj.LE.( jjv+k-1 ) )
THEN
513 height = ibase - itop
514 CALL dlaset(
'All', kp, itop-jj+jjv, zero, zero,
515 $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
516 CALL dlaset(
'Lower', kp, height, zero, one,
517 $ work( ipw1+ii-iiv+itop*lw ), lw )
518 kp =
max( 0, kp - height )
521 mydist = mydist + nprow
522 itop = mydist * mbv - ioff
523 ibase =
min( itop + mbv, k )
524 itop =
min( itop, k )
530 CALL pbdtran( ictxt,
'Columnwise',
'Transpose', n+iroffv, k,
531 $ mbv, work( ipw ), lw, zero, work( ipv ), lv,
532 $ ivrow, ivcol, -1, iccol, work( ipt ) )
537 $ ipv = ipv + icoffc * lv
545 CALL dgemm(
'No transpose',
'Transpose', mpc, k, nqc,
546 $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
549 CALL dlaset(
'All', mpc, k, zero, zero, work( ipw ), lw )
552 CALL dgsum2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
557 IF( mycol.EQ.ivcol )
THEN
558 IF( myrow.EQ.ivrow )
THEN
562 CALL dtrbs2d( ictxt,
'Columnwise',
' ', uplo,
563 $
'Non unit', k, k, t, nbv )
565 CALL dtrbr2d( ictxt,
'Columnwise',
' ', uplo,
566 $
'Non unit', k, k, t, nbv, ivrow, mycol )
568 CALL dtrmm(
'Right', uplo, trans,
'Non unit', mpc, k,
569 $ one, t, nbv, work( ipw ), lw )
571 CALL dgebs2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
574 CALL dgebr2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
582 CALL dgemm(
'No transpose',
'No transpose', mpc, nqc, k,
583 $ -one, work( ipw ), lw, work( ipv ), lv, one,
591 IF( lsame( side,
'L' ) )
THEN
598 mqv0 = numroc( m+icoffv, nbv, mycol, ivcol, npcol )
599 IF( mycol.EQ.ivcol )
THEN
604 IF( myrow.EQ.icrow )
THEN
621 IF( myrow.EQ.ivrow )
THEN
622 IF( mycol.EQ.ivcol )
THEN
623 CALL dlaset(
'All', k, icoffv, zero, zero,
625 ipw1 = ipw + icoffv * lw
626 CALL dlamov(
'All', k, mqv, v( ioffv ), ldv,
630 CALL dlamov(
'All', k, mqv, v( ioffv ), ldv,
639 mydist = mod( mycol-ivcol+npcol, npcol )
640 ileft =
max( 0, mydist * nbv - icoffv )
642 jjend = jjv + mqv - 1
643 jjnxt =
min( iceil( jjbeg, nbv ) * nbv, jjend )
646 IF( ( k-ileft ).GT.0 )
THEN
647 CALL dlaset(
'Lower', k-ileft, jjnxt-jjbeg+1, zero,
649 $ work( ipw1+ileft+(jjbeg-jjv)*lw ),
651 mydist = mydist + npcol
652 ileft = mydist * nbv - icoffv
654 jjnxt =
min( jjnxt+nbv, jjend )
664 CALL infog1l( jv+m-k, nbv, npcol, mycol,
665 $ descv( csrc_ ), jj, ilastcol )
666 ioff = mod( jv+m-k-1, nbv )
667 kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
668 IF( mycol.EQ.ilastcol )
670 mydist = mod( mycol-ilastcol+npcol, npcol )
671 ileft = mydist * nbv - ioff
672 iright =
min( ileft+nbv, k )
673 ileft =
min(
max( 0, ileft ), k )
676 IF( ii.LE.( iiv+k-1 ) )
THEN
677 wide = iright - ileft
678 CALL dlaset(
'All', ileft-ii+iiv, kq, zero, zero,
679 $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
680 CALL dlaset(
'Upper', wide, kq, zero, one,
681 $ work( ipw1+ileft+(jj-jjv)*lw ), lw )
682 kq =
max( 0, kq - wide )
685 mydist = mydist + npcol
686 ileft = mydist * nbv - ioff
687 iright =
min( ileft + nbv, k )
688 ileft =
min( ileft, k )
696 CALL pbdtran( ictxt,
'Rowwise',
'Transpose', k, m+icoffv,
697 $ nbv, work( ipw ), lw, zero, work( ipv ), lv,
698 $ ivrow, ivcol, icrow, -1, work( ipt ) )
711 CALL dgemm(
'Transpose',
'No transpose', nqc, k, mpc,
712 $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
715 CALL dlaset(
'All', nqc, k, zero, zero, work( ipw ), lw )
718 CALL dgsum2d( ictxt,
'Columnwise',
' ', nqc, k, work( ipw ),
723 IF( myrow.EQ.ivrow )
THEN
724 IF( mycol.EQ.ivcol )
THEN
728 CALL dtrbs2d( ictxt,
'Rowwise',
' ', uplo,
'Non unit',
731 CALL dtrbr2d( ictxt,
'Rowwise',
' ', uplo,
'Non unit',
732 $ k, k, t, mbv, myrow, ivcol )
734 CALL dtrmm(
'Right', uplo, transt,
'Non unit', nqc, k,
735 $ one, t, mbv, work( ipw ), lw )
737 CALL dgebs2d( ictxt,
'Columnwise',
' ', nqc, k,
740 CALL dgebr2d( ictxt,
'Columnwise',
' ', nqc, k,
741 $ work( ipw ), lw, ivrow, mycol )
748 CALL dgemm(
'No transpose',
'Transpose', mpc, nqc, k, -one,
749 $ work( ipv ), lv, work( ipw ), lw, one,
767 CALL pb_topget( ictxt,
'Broadcast',
'Columnwise', colbtop )
768 IF( myrow.EQ.ivrow )
THEN
769 CALL dgebs2d( ictxt,
'Columnwise', colbtop, k, nqc,
772 $
CALL dtrbs2d( ictxt,
'Columnwise', colbtop, uplo,
773 $
'Non unit', k, k, t, mbv )
774 CALL dlamov(
'All', k, nqc, v( ioffv ), ldv, work( ipv ),
777 CALL dgebr2d( ictxt,
'Columnwise', colbtop, k, nqc,
778 $ work( ipv ), lv, ivrow, mycol )
780 $
CALL dtrbr2d( ictxt,
'Columnwise', colbtop, uplo,
781 $
'Non unit', k, k, t, mbv, ivrow, mycol )
789 mydist = mod( mycol-ivcol+npcol, npcol )
790 ileft =
max( 0, mydist * nbv - icoffv )
792 jjend = jjv + nqc - 1
793 jjnxt =
min( iceil( jjbeg, nbv ) * nbv, jjend )
796 IF( ( k-ileft ).GT.0 )
THEN
797 CALL dlaset(
'Lower', k-ileft, jjnxt-jjbeg+1, zero,
798 $ one, work( ipv+ileft+(jjbeg-jjv)*lv ),
800 mydist = mydist + npcol
801 ileft = mydist * nbv - icoffv
803 jjnxt =
min( jjnxt+nbv, jjend )
813 CALL infog1l( jv+n-k, nbv, npcol, mycol, descv( csrc_ ),
815 ioff = mod( jv+n-k-1, nbv )
816 kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
817 IF( mycol.EQ.ilastcol )
819 mydist = mod( mycol-ilastcol+npcol, npcol )
820 ileft = mydist * nbv - ioff
821 iright =
min( ileft+nbv, k )
822 ileft =
min(
max( 0, ileft ), k )
825 IF( ii.LE.( iiv+k-1 ) )
THEN
826 wide = iright - ileft
827 CALL dlaset(
'All', ileft-ii+iiv, kq, zero, zero,
828 $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
829 CALL dlaset(
'Upper', wide, kq, zero, one,
830 $ work( ipv+ileft+(jj-jjv)*lv ), lv )
831 kq =
max( 0, kq - wide )
834 mydist = mydist + npcol
835 ileft = mydist * nbv - ioff
836 iright =
min( ileft + nbv, k )
837 ileft =
min( ileft, k )
847 CALL dgemm(
'No Transpose',
'Transpose', mpc, k, nqc,
848 $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
851 CALL dlaset(
'All', mpc, k, zero, zero, work( ipw ), lw )
854 CALL dgsum2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
859 IF( mycol.EQ.ivcol )
THEN
860 CALL dtrmm(
'Right', uplo, trans,
'Non unit', mpc, k,
861 $ one, t, mbv, work( ipw ), lw )
862 CALL dgebs2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
865 CALL dgebr2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
873 CALL dgemm(
'No transpose',
'No transpose', mpc, nqc, k,
874 $ -one, work( ipw ), lw, work( ipv ), lv, one,