1 SUBROUTINE pslarfb( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV,
2 $ JV, DESCV, T, C, IC, JC, DESCC, WORK )
9 CHARACTER SIDE, TRANS, DIRECT, STOREV
10 INTEGER IC, IV, JC, JV, K, M, N
13 INTEGER DESCC( * ), DESCV( * )
14 REAL C( * ), T( * ), V( * ), WORK( * )
216 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
217 $ lld_, mb_, m_, nb_, n_, rsrc_
218 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
219 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
220 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
222 parameter( one = 1.0e+0, zero = 0.0e+0 )
226 CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO
227 INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW,
228 $ ictxt, ii, iibeg, iic, iiend, iinxt, iiv,
229 $ ilastcol, ilastrow, ileft, ioff, ioffc, ioffv,
230 $ ipt, ipv, ipw, ipw1, iright, iroffc, iroffv,
231 $ itop, ivcol, ivrow, jj, jjbeg, jjc, jjend,
232 $ jjnxt, jjv, kp, kq, ldc, ldv, lv, lw, mbv, mpc,
233 $ mpc0, mqv, mqv0, mycol, mydist, myrow, nbv,
234 $ npv, npv0, npcol, nprow, nqc, nqc0, wide
238 $
pbstran, sgebr2d, sgebs2d, sgemm,
239 $ sgsum2d, slamov, slaset, strbr2d,
247 INTEGER ICEIL, NUMROC
248 EXTERNAL iceil, lsame, numroc
254 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
259 ictxt = descc( ctxt_ )
260 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
262 IF( lsame( trans,
'N' ) )
THEN
267 forward = lsame( direct,
'F' )
274 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
276 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
280 iic =
min( iic, ldc )
281 iiv =
min( iiv, ldv )
282 iroffc = mod( ic-1, descc( mb_ ) )
283 icoffc = mod( jc-1, descc( nb_ ) )
286 iroffv = mod( iv-1, mbv )
287 icoffv = mod( jv-1, nbv )
288 mpc = numroc( m+iroffc, descc( mb_ ), myrow, icrow, nprow )
289 nqc = numroc( n+icoffc, descc( nb_ ), mycol, iccol, npcol )
294 jjc =
min( jjc,
max( 1, jjc+nqc-1 ) )
295 jjv =
min( jjv,
max( 1, numroc( descv( n_ ), nbv, mycol,
296 $ descv( csrc_ ), npcol ) ) )
297 ioffc = iic + ( jjc-1 ) * ldc
298 ioffv = iiv + ( jjv-1 ) * ldv
300 IF( lsame( storev,
'C' ) )
THEN
304 IF( lsame( side,
'L' ) )
THEN
319 CALL pb_topget( ictxt,
'Broadcast',
'Rowwise', rowbtop )
320 IF( mycol.EQ.ivcol )
THEN
321 CALL sgebs2d( ictxt,
'Rowwise', rowbtop, mpc, k,
324 $
CALL strbs2d( ictxt,
'Rowwise', rowbtop, uplo,
325 $
'Non unit', k, k, t, nbv )
326 CALL slamov(
'All', mpc, k, v( ioffv ), ldv, work( ipv ),
329 CALL sgebr2d( ictxt,
'Rowwise', rowbtop, mpc, k,
330 $ work( ipv ), lv, myrow, ivcol )
332 $
CALL strbr2d( ictxt,
'Rowwise', rowbtop, uplo,
333 $
'Non unit', k, k, t, nbv, myrow, ivcol )
341 mydist = mod( myrow-ivrow+nprow, nprow )
342 itop =
max( 0, mydist*mbv - iroffv )
344 iiend = iibeg + mpc - 1
345 iinxt =
min( iceil( iibeg, mbv )*mbv, iiend )
348 IF( k-itop .GT.0 )
THEN
349 CALL slaset(
'Upper', iinxt-iibeg+1, k-itop, zero,
350 $ one, work( ipv+iibeg-iiv+itop*lv ), lv )
351 mydist = mydist + nprow
352 itop = mydist * mbv - iroffv
354 iinxt =
min( iinxt+mbv, iiend )
364 ioff = mod( iv+m-k-1, mbv )
365 CALL infog1l( iv+m-k, mbv, nprow, myrow, descv( rsrc_ ),
367 kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
368 IF( myrow.EQ.ilastrow )
370 mydist = mod( myrow-ilastrow+nprow, nprow )
371 itop = mydist * mbv - ioff
372 ibase =
min( itop+mbv, k )
373 itop =
min(
max( 0, itop ), k )
376 IF( jj.LE.( jjv+k-1 ) )
THEN
377 height = ibase - itop
378 CALL slaset(
'All', kp, itop-jj+jjv, zero, zero,
379 $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
380 CALL slaset(
'Lower', kp, height, zero, one,
381 $ work( ipv+ii-iiv+itop*lv ), lv )
382 kp =
max( 0, kp - height )
385 mydist = mydist + nprow
386 itop = mydist * mbv - ioff
387 ibase =
min( itop + mbv, k )
388 itop =
min( itop, k )
397 CALL sgemm(
'Transpose',
'No transpose', nqc, k, mpc,
398 $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
401 CALL slaset(
'All', nqc, k, zero, zero, work( ipw ), lw )
404 CALL sgsum2d( ictxt,
'Columnwise',
' ', nqc, k, work( ipw ),
407 IF( myrow.EQ.ivrow )
THEN
411 CALL strmm(
'Right', uplo, transt,
'Non unit', nqc, k,
412 $ one, t, nbv, work( ipw ), lw )
413 CALL sgebs2d( ictxt,
'Columnwise',
' ', nqc, k,
416 CALL sgebr2d( ictxt,
'Columnwise',
' ', nqc, k,
417 $ work( ipw ), lw, ivrow, mycol )
424 CALL sgemm(
'No transpose',
'Transpose', mpc, nqc, k, -one,
425 $ work( ipv ), lv, work( ipw ), lw, one,
435 npv0 = numroc( n+iroffv, mbv, myrow, ivrow, nprow )
436 IF( myrow.EQ.ivrow )
THEN
441 IF( mycol.EQ.iccol )
THEN
458 IF( mycol.EQ.ivcol )
THEN
459 IF( myrow.EQ.ivrow )
THEN
460 CALL slaset(
'All', iroffv, k, zero, zero,
463 CALL slamov(
'All', npv, k, v( ioffv ), ldv,
467 CALL slamov(
'All', npv, k, v( ioffv ), ldv,
476 mydist = mod( myrow-ivrow+nprow, nprow )
477 itop =
max( 0, mydist*mbv - iroffv )
479 iiend = iibeg + npv - 1
480 iinxt =
min( iceil( iibeg, mbv )*mbv, iiend )
483 IF( ( k-itop ).GT.0 )
THEN
484 CALL slaset(
'Upper', iinxt-iibeg+1, k-itop, zero,
485 $ one, work( ipw1+iibeg-iiv+itop*lw ),
487 mydist = mydist + nprow
488 itop = mydist * mbv - iroffv
490 iinxt =
min( iinxt+mbv, iiend )
500 CALL infog1l( iv+n-k, mbv, nprow, myrow,
501 $ descv( rsrc_ ), ii, ilastrow )
502 ioff = mod( iv+n-k-1, mbv )
503 kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
504 IF( myrow.EQ.ilastrow )
506 mydist = mod( myrow-ilastrow+nprow, nprow )
507 itop = mydist * mbv - ioff
508 ibase =
min( itop+mbv, k )
509 itop =
min(
max( 0, itop ), k )
512 IF( jj.LE.( jjv+k-1 ) )
THEN
513 height = ibase - itop
514 CALL slaset(
'All', kp, itop-jj+jjv, zero, zero,
515 $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
516 CALL slaset(
'Lower', kp, height, zero, one,
517 $ work( ipw1+ii-iiv+itop*lw ), lw )
518 kp =
max( 0, kp - height )
521 mydist = mydist + nprow
522 itop = mydist * mbv - ioff
523 ibase =
min( itop + mbv, k )
524 itop =
min( itop, k )
530 CALL pbstran( ictxt,
'Columnwise',
'Transpose', n+iroffv, k,
531 $ mbv, work( ipw ), lw, zero, work( ipv ), lv,
532 $ ivrow, ivcol, -1, iccol, work( ipt ) )
537 $ ipv = ipv + icoffc * lv
545 CALL sgemm(
'No transpose',
'Transpose', mpc, k, nqc,
546 $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
549 CALL slaset(
'All', mpc, k, zero, zero, work( ipw ), lw )
552 CALL sgsum2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
557 IF( mycol.EQ.ivcol )
THEN
558 IF( myrow.EQ.ivrow )
THEN
562 CALL strbs2d( ictxt,
'Columnwise',
' ', uplo,
563 $
'Non unit', k, k, t, nbv )
565 CALL strbr2d( ictxt,
'Columnwise',
' ', uplo,
566 $
'Non unit', k, k, t, nbv, ivrow, mycol )
568 CALL strmm(
'Right', uplo, trans,
'Non unit', mpc, k,
569 $ one, t, nbv, work( ipw ), lw )
571 CALL sgebs2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
574 CALL sgebr2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
582 CALL sgemm(
'No transpose',
'No transpose', mpc, nqc, k,
583 $ -one, work( ipw ), lw, work( ipv ), lv, one,
591 IF( lsame( side,
'L' ) )
THEN
598 mqv0 = numroc( m+icoffv, nbv, mycol, ivcol, npcol )
599 IF( mycol.EQ.ivcol )
THEN
604 IF( myrow.EQ.icrow )
THEN
621 IF( myrow.EQ.ivrow )
THEN
622 IF( mycol.EQ.ivcol )
THEN
623 CALL slaset(
'All', k, icoffv, zero, zero,
625 ipw1 = ipw + icoffv * lw
626 CALL slamov(
'All', k, mqv, v( ioffv ), ldv,
630 CALL slamov(
'All', k, mqv, v( ioffv ), ldv,
639 mydist = mod( mycol-ivcol+npcol, npcol )
640 ileft =
max( 0, mydist * nbv - icoffv )
642 jjend = jjv + mqv - 1
643 jjnxt =
min( iceil( jjbeg, nbv ) * nbv, jjend )
646 IF( ( k-ileft ).GT.0 )
THEN
647 CALL slaset(
'Lower', k-ileft, jjnxt-jjbeg+1, zero,
649 $ work( ipw1+ileft+(jjbeg-jjv)*lw ),
651 mydist = mydist + npcol
652 ileft = mydist * nbv - icoffv
654 jjnxt =
min( jjnxt+nbv, jjend )
664 CALL infog1l( jv+m-k, nbv, npcol, mycol,
665 $ descv( csrc_ ), jj, ilastcol )
666 ioff = mod( jv+m-k-1, nbv )
667 kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
668 IF( mycol.EQ.ilastcol )
670 mydist = mod( mycol-ilastcol+npcol, npcol )
671 ileft = mydist * nbv - ioff
672 iright =
min( ileft+nbv, k )
673 ileft =
min(
max( 0, ileft ), k )
676 IF( ii.LE.( iiv+k-1 ) )
THEN
677 wide = iright - ileft
678 CALL slaset(
'All', ileft-ii+iiv, kq, zero, zero,
679 $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
680 CALL slaset(
'Upper', wide, kq, zero, one,
681 $ work( ipw1+ileft+(jj-jjv)*lw ), lw )
682 kq =
max( 0, kq - wide )
685 mydist = mydist + npcol
686 ileft = mydist * nbv - ioff
687 iright =
min( ileft + nbv, k )
688 ileft =
min( ileft, k )
696 CALL pbstran( ictxt,
'Rowwise',
'Transpose', k, m+icoffv,
697 $ nbv, work( ipw ), lw, zero, work( ipv ), lv,
698 $ ivrow, ivcol, icrow, -1, work( ipt ) )
711 CALL sgemm(
'Transpose',
'No transpose', nqc, k, mpc,
712 $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
715 CALL slaset(
'All', nqc, k, zero, zero, work( ipw ), lw )
718 CALL sgsum2d( ictxt,
'Columnwise',
' ', nqc, k, work( ipw ),
723 IF( myrow.EQ.ivrow )
THEN
724 IF( mycol.EQ.ivcol )
THEN
728 CALL strbs2d( ictxt,
'Rowwise',
' ', uplo,
'Non unit',
731 CALL strbr2d( ictxt,
'Rowwise',
' ', uplo,
'Non unit',
732 $ k, k, t, mbv, myrow, ivcol )
734 CALL strmm(
'Right', uplo, transt,
'Non unit', nqc, k,
735 $ one, t, mbv, work( ipw ), lw )
737 CALL sgebs2d( ictxt,
'Columnwise',
' ', nqc, k,
740 CALL sgebr2d( ictxt,
'Columnwise',
' ', nqc, k,
741 $ work( ipw ), lw, ivrow, mycol )
748 CALL sgemm(
'No transpose',
'Transpose', mpc, nqc, k, -one,
749 $ work( ipv ), lv, work( ipw ), lw, one,
767 CALL pb_topget( ictxt,
'Broadcast',
'Columnwise', colbtop )
768 IF( myrow.EQ.ivrow )
THEN
769 CALL sgebs2d( ictxt,
'Columnwise', colbtop, k, nqc,
772 $
CALL strbs2d( ictxt,
'Columnwise', colbtop, uplo,
773 $
'Non unit', k, k, t, mbv )
774 CALL slamov(
'All', k, nqc, v( ioffv ), ldv, work( ipv ),
777 CALL sgebr2d( ictxt,
'Columnwise', colbtop, k, nqc,
778 $ work( ipv ), lv, ivrow, mycol )
780 $
CALL strbr2d( ictxt,
'Columnwise', colbtop, uplo,
781 $
'Non unit', k, k, t, mbv, ivrow, mycol )
789 mydist = mod( mycol-ivcol+npcol, npcol )
790 ileft =
max( 0, mydist * nbv - icoffv )
792 jjend = jjv + nqc - 1
793 jjnxt =
min( iceil( jjbeg, nbv ) * nbv, jjend )
796 IF( ( k-ileft ).GT.0 )
THEN
797 CALL slaset(
'Lower', k-ileft, jjnxt-jjbeg+1, zero,
798 $ one, work( ipv+ileft+(jjbeg-jjv)*lv ),
800 mydist = mydist + npcol
801 ileft = mydist * nbv - icoffv
803 jjnxt =
min( jjnxt+nbv, jjend )
813 CALL infog1l( jv+n-k, nbv, npcol, mycol, descv( csrc_ ),
815 ioff = mod( jv+n-k-1, nbv )
816 kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
817 IF( mycol.EQ.ilastcol )
819 mydist = mod( mycol-ilastcol+npcol, npcol )
820 ileft = mydist * nbv - ioff
821 iright =
min( ileft+nbv, k )
822 ileft =
min(
max( 0, ileft ), k )
825 IF( ii.LE.( iiv+k-1 ) )
THEN
826 wide = iright - ileft
827 CALL slaset(
'All', ileft-ii+iiv, kq, zero, zero,
828 $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
829 CALL slaset(
'Upper', wide, kq, zero, one,
830 $ work( ipv+ileft+(jj-jjv)*lv ), lv )
831 kq =
max( 0, kq - wide )
834 mydist = mydist + npcol
835 ileft = mydist * nbv - ioff
836 iright =
min( ileft + nbv, k )
837 ileft =
min( ileft, k )
847 CALL sgemm(
'No Transpose',
'Transpose', mpc, k, nqc,
848 $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
851 CALL slaset(
'All', mpc, k, zero, zero, work( ipw ), lw )
854 CALL sgsum2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
859 IF( mycol.EQ.ivcol )
THEN
860 CALL strmm(
'Right', uplo, trans,
'Non unit', mpc, k,
861 $ one, t, mbv, work( ipw ), lw )
862 CALL sgebs2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
865 CALL sgebr2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
873 CALL sgemm(
'No transpose',
'No transpose', mpc, nqc, k,
874 $ -one, work( ipw ), lw, work( ipv ), lv, one,