1 SUBROUTINE pzlarfb( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV,
2 $ JV, DESCV, T, C, IC, JC, DESCC, WORK )
9 CHARACTER SIDE, TRANS, DIRECT, STOREV
10 INTEGER IC, IV, JC, JV, K, M, N
13 INTEGER DESCC( * ), DESCV( * )
14 COMPLEX*16 C( * ), T( * ), V( * ), WORK( * )
216 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
217 $ lld_, mb_, m_, nb_, n_, rsrc_
218 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
219 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
220 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
222 parameter( one = ( 1.0d+0, 0.0d+0 ),
223 $ zero = ( 0.0d+0, 0.0d+0 ) )
227 CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO
228 INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW,
229 $ ictxt, ii, iibeg, iic, iiend, iinxt, iiv,
230 $ ilastcol, ilastrow, ileft, ioff, ioffc, ioffv,
231 $ ipt, ipv, ipw, ipw1, iright, iroffc, iroffv,
232 $ itop, ivcol, ivrow, jj, jjbeg, jjc, jjend,
233 $ jjnxt, jjv, kp, kq, ldc, ldv, lv, lw, mbv, mpc,
234 $ mpc0, mqv, mqv0, mycol, mydist, myrow, nbv,
235 $ npv, npv0, npcol, nprow, nqc, nqc0, wide
239 $
pbztran, zgebr2d, zgebs2d, zgemm,
240 $ zgsum2d, zlamov, zlaset, ztrbr2d,
248 INTEGER ICEIL, NUMROC
249 EXTERNAL iceil, lsame, numroc
255 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
260 ictxt = descc( ctxt_ )
261 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
263 IF( lsame( trans,
'N' ) )
THEN
268 forward = lsame( direct,
'F' )
275 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
277 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
281 iic =
min( iic, ldc )
282 iiv =
min( iiv, ldv )
283 iroffc = mod( ic-1, descc( mb_ ) )
284 icoffc = mod( jc-1, descc( nb_ ) )
287 iroffv = mod( iv-1, mbv )
288 icoffv = mod( jv-1, nbv )
289 mpc = numroc( m+iroffc, descc( mb_ ), myrow, icrow, nprow )
290 nqc = numroc( n+icoffc, descc( nb_ ), mycol, iccol, npcol )
295 jjc =
min( jjc,
max( 1, jjc+nqc-1 ) )
296 jjv =
min( jjv,
max( 1, numroc( descv( n_ ), nbv, mycol,
297 $ descv( csrc_ ), npcol ) ) )
298 ioffc = iic + ( jjc-1 ) * ldc
299 ioffv = iiv + ( jjv-1 ) * ldv
301 IF( lsame( storev,
'C' ) )
THEN
305 IF( lsame( side,
'L' ) )
THEN
320 CALL pb_topget( ictxt,
'Broadcast',
'Rowwise', rowbtop )
321 IF( mycol.EQ.ivcol )
THEN
322 CALL zgebs2d( ictxt,
'Rowwise', rowbtop, mpc, k,
325 $
CALL ztrbs2d( ictxt,
'Rowwise', rowbtop, uplo,
326 $
'Non unit', k, k, t, nbv )
327 CALL zlamov(
'All', mpc, k, v( ioffv ), ldv, work( ipv ),
330 CALL zgebr2d( ictxt,
'Rowwise', rowbtop, mpc, k,
331 $ work( ipv ), lv, myrow, ivcol )
333 $
CALL ztrbr2d( ictxt,
'Rowwise', rowbtop, uplo,
334 $
'Non unit', k, k, t, nbv, myrow, ivcol )
342 mydist = mod( myrow-ivrow+nprow, nprow )
343 itop =
max( 0, mydist*mbv - iroffv )
345 iiend = iibeg + mpc - 1
346 iinxt =
min( iceil( iibeg, mbv )*mbv, iiend )
349 IF( k-itop .GT.0 )
THEN
350 CALL zlaset(
'Upper', iinxt-iibeg+1, k-itop, zero,
351 $ one, work( ipv+iibeg-iiv+itop*lv ), lv )
352 mydist = mydist + nprow
353 itop = mydist * mbv - iroffv
355 iinxt =
min( iinxt+mbv, iiend )
365 ioff = mod( iv+m-k-1, mbv )
366 CALL infog1l( iv+m-k, mbv, nprow, myrow, descv( rsrc_ ),
368 kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
369 IF( myrow.EQ.ilastrow )
371 mydist = mod( myrow-ilastrow+nprow, nprow )
372 itop = mydist * mbv - ioff
373 ibase =
min( itop+mbv, k )
374 itop =
min(
max( 0, itop ), k )
377 IF( jj.LE.( jjv+k-1 ) )
THEN
378 height = ibase - itop
379 CALL zlaset(
'All', kp, itop-jj+jjv, zero, zero,
380 $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
381 CALL zlaset(
'Lower', kp, height, zero, one,
382 $ work( ipv+ii-iiv+itop*lv ), lv )
383 kp =
max( 0, kp - height )
386 mydist = mydist + nprow
387 itop = mydist * mbv - ioff
388 ibase =
min( itop + mbv, k )
389 itop =
min( itop, k )
398 CALL zgemm(
'Conjugate transpose',
'No transpose', nqc,
399 $ k, mpc, one, c( ioffc ), ldc, work( ipv ), lv,
400 $ zero, work( ipw ), lw )
402 CALL zlaset(
'All', nqc, k, zero, zero, work( ipw ), lw )
405 CALL zgsum2d( ictxt,
'Columnwise',
' ', nqc, k, work( ipw ),
408 IF( myrow.EQ.ivrow )
THEN
412 CALL ztrmm(
'Right', uplo, transt,
'Non unit', nqc, k,
413 $ one, t, nbv, work( ipw ), lw )
414 CALL zgebs2d( ictxt,
'Columnwise',
' ', nqc, k,
417 CALL zgebr2d( ictxt,
'Columnwise',
' ', nqc, k,
418 $ work( ipw ), lw, ivrow, mycol )
425 CALL zgemm(
'No transpose',
'Conjugate transpose', mpc, nqc,
426 $ k, -one, work( ipv ), lv, work( ipw ), lw, one,
436 npv0 = numroc( n+iroffv, mbv, myrow, ivrow, nprow )
437 IF( myrow.EQ.ivrow )
THEN
442 IF( mycol.EQ.iccol )
THEN
459 IF( mycol.EQ.ivcol )
THEN
460 IF( myrow.EQ.ivrow )
THEN
461 CALL zlaset(
'All', iroffv, k, zero, zero,
464 CALL zlamov(
'All', npv, k, v( ioffv ), ldv,
468 CALL zlamov(
'All', npv, k, v( ioffv ), ldv,
477 mydist = mod( myrow-ivrow+nprow, nprow )
478 itop =
max( 0, mydist*mbv - iroffv )
480 iiend = iibeg + npv - 1
481 iinxt =
min( iceil( iibeg, mbv )*mbv, iiend )
484 IF( ( k-itop ).GT.0 )
THEN
485 CALL zlaset(
'Upper', iinxt-iibeg+1, k-itop, zero,
486 $ one, work( ipw1+iibeg-iiv+itop*lw ),
488 mydist = mydist + nprow
489 itop = mydist * mbv - iroffv
491 iinxt =
min( iinxt+mbv, iiend )
501 CALL infog1l( iv+n-k, mbv, nprow, myrow,
502 $ descv( rsrc_ ), ii, ilastrow )
503 ioff = mod( iv+n-k-1, mbv )
504 kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
505 IF( myrow.EQ.ilastrow )
507 mydist = mod( myrow-ilastrow+nprow, nprow )
508 itop = mydist * mbv - ioff
509 ibase =
min( itop+mbv, k )
510 itop =
min(
max( 0, itop ), k )
513 IF( jj.LE.( jjv+k-1 ) )
THEN
514 height = ibase - itop
515 CALL zlaset(
'All', kp, itop-jj+jjv, zero, zero,
516 $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
517 CALL zlaset(
'Lower', kp, height, zero, one,
518 $ work( ipw1+ii-iiv+itop*lw ), lw )
519 kp =
max( 0, kp - height )
522 mydist = mydist + nprow
523 itop = mydist * mbv - ioff
524 ibase =
min( itop + mbv, k )
525 itop =
min( itop, k )
531 CALL pbztran( ictxt,
'Columnwise',
'Conjugate transpose',
532 $ n+iroffv, k, mbv, work( ipw ), lw, zero,
533 $ work( ipv ), lv, ivrow, ivcol, -1, iccol,
539 $ ipv = ipv + icoffc * lv
547 CALL zgemm(
'No transpose',
'Conjugate transpose', mpc,
548 $ k, nqc, one, c( ioffc ), ldc, work( ipv ),
549 $ lv, zero, work( ipw ), lw )
551 CALL zlaset(
'All', mpc, k, zero, zero, work( ipw ), lw )
554 CALL zgsum2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
559 IF( mycol.EQ.ivcol )
THEN
560 IF( myrow.EQ.ivrow )
THEN
564 CALL ztrbs2d( ictxt,
'Columnwise',
' ', uplo,
565 $
'Non unit', k, k, t, nbv )
567 CALL ztrbr2d( ictxt,
'Columnwise',
' ', uplo,
568 $
'Non unit', k, k, t, nbv, ivrow, mycol )
570 CALL ztrmm(
'Right', uplo, trans,
'Non unit', mpc, k,
571 $ one, t, nbv, work( ipw ), lw )
573 CALL zgebs2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
576 CALL zgebr2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
584 CALL zgemm(
'No transpose',
'No transpose', mpc, nqc, k,
585 $ -one, work( ipw ), lw, work( ipv ), lv, one,
593 IF( lsame( side,
'L' ) )
THEN
600 mqv0 = numroc( m+icoffv, nbv, mycol, ivcol, npcol )
601 IF( mycol.EQ.ivcol )
THEN
606 IF( myrow.EQ.icrow )
THEN
623 IF( myrow.EQ.ivrow )
THEN
624 IF( mycol.EQ.ivcol )
THEN
625 CALL zlaset(
'All', k, icoffv, zero, zero,
627 ipw1 = ipw + icoffv * lw
628 CALL zlamov(
'All', k, mqv, v( ioffv ), ldv,
632 CALL zlamov(
'All', k, mqv, v( ioffv ), ldv,
641 mydist = mod( mycol-ivcol+npcol, npcol )
642 ileft =
max( 0, mydist * nbv - icoffv )
644 jjend = jjv + mqv - 1
645 jjnxt =
min( iceil( jjbeg, nbv ) * nbv, jjend )
648 IF( ( k-ileft ).GT.0 )
THEN
649 CALL zlaset(
'Lower', k-ileft, jjnxt-jjbeg+1, zero,
651 $ work( ipw1+ileft+(jjbeg-jjv)*lw ),
653 mydist = mydist + npcol
654 ileft = mydist * nbv - icoffv
656 jjnxt =
min( jjnxt+nbv, jjend )
666 CALL infog1l( jv+m-k, nbv, npcol, mycol,
667 $ descv( csrc_ ), jj, ilastcol )
668 ioff = mod( jv+m-k-1, nbv )
669 kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
670 IF( mycol.EQ.ilastcol )
672 mydist = mod( mycol-ilastcol+npcol, npcol )
673 ileft = mydist * nbv - ioff
674 iright =
min( ileft+nbv, k )
675 ileft =
min(
max( 0, ileft ), k )
678 IF( ii.LE.( iiv+k-1 ) )
THEN
679 wide = iright - ileft
680 CALL zlaset(
'All', ileft-ii+iiv, kq, zero, zero,
681 $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
682 CALL zlaset(
'Upper', wide, kq, zero, one,
683 $ work( ipw1+ileft+(jj-jjv)*lw ), lw )
684 kq =
max( 0, kq - wide )
687 mydist = mydist + npcol
688 ileft = mydist * nbv - ioff
689 iright =
min( ileft + nbv, k )
690 ileft =
min( ileft, k )
698 CALL pbztran( ictxt,
'Rowwise',
'Conjugate transpose', k,
699 $ m+icoffv, nbv, work( ipw ), lw, zero,
700 $ work( ipv ), lv, ivrow, ivcol, icrow, -1,
714 CALL zgemm(
'Conjugate transpose',
'No transpose', nqc,
715 $ k, mpc, one, c( ioffc ), ldc, work( ipv ),
716 $ lv, zero, work( ipw ), lw )
718 CALL zlaset(
'All', nqc, k, zero, zero, work( ipw ), lw )
721 CALL zgsum2d( ictxt,
'Columnwise',
' ', nqc, k, work( ipw ),
726 IF( myrow.EQ.ivrow )
THEN
727 IF( mycol.EQ.ivcol )
THEN
731 CALL ztrbs2d( ictxt,
'Rowwise',
' ', uplo,
'Non unit',
734 CALL ztrbr2d( ictxt,
'Rowwise',
' ', uplo,
'Non unit',
735 $ k, k, t, mbv, myrow, ivcol )
737 CALL ztrmm(
'Right', uplo, transt,
'Non unit', nqc, k,
738 $ one, t, mbv, work( ipw ), lw )
740 CALL zgebs2d( ictxt,
'Columnwise',
' ', nqc, k,
743 CALL zgebr2d( ictxt,
'Columnwise',
' ', nqc, k,
744 $ work( ipw ), lw, ivrow, mycol )
751 CALL zgemm(
'No transpose',
'Conjugate transpose', mpc, nqc,
752 $ k, -one, work( ipv ), lv, work( ipw ), lw, one,
770 CALL pb_topget( ictxt,
'Broadcast',
'Columnwise', colbtop )
771 IF( myrow.EQ.ivrow )
THEN
772 CALL zgebs2d( ictxt,
'Columnwise', colbtop, k, nqc,
775 $
CALL ztrbs2d( ictxt,
'Columnwise', colbtop, uplo,
776 $
'Non unit', k, k, t, mbv )
777 CALL zlamov(
'All', k, nqc, v( ioffv ), ldv, work( ipv ),
780 CALL zgebr2d( ictxt,
'Columnwise', colbtop, k, nqc,
781 $ work( ipv ), lv, ivrow, mycol )
783 $
CALL ztrbr2d( ictxt,
'Columnwise', colbtop, uplo,
784 $
'Non unit', k, k, t, mbv, ivrow, mycol )
792 mydist = mod( mycol-ivcol+npcol, npcol )
793 ileft =
max( 0, mydist * nbv - icoffv )
795 jjend = jjv + nqc - 1
796 jjnxt =
min( iceil( jjbeg, nbv ) * nbv, jjend )
799 IF( ( k-ileft ).GT.0 )
THEN
800 CALL zlaset(
'Lower', k-ileft, jjnxt-jjbeg+1, zero,
801 $ one, work( ipv+ileft+(jjbeg-jjv)*lv ),
803 mydist = mydist + npcol
804 ileft = mydist * nbv - icoffv
806 jjnxt =
min( jjnxt+nbv, jjend )
816 CALL infog1l( jv+n-k, nbv, npcol, mycol, descv( csrc_ ),
818 ioff = mod( jv+n-k-1, nbv )
819 kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
820 IF( mycol.EQ.ilastcol )
822 mydist = mod( mycol-ilastcol+npcol, npcol )
823 ileft = mydist * nbv - ioff
824 iright =
min( ileft+nbv, k )
825 ileft =
min(
max( 0, ileft ), k )
828 IF( ii.LE.( iiv+k-1 ) )
THEN
829 wide = iright - ileft
830 CALL zlaset(
'All', ileft-ii+iiv, kq, zero, zero,
831 $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
832 CALL zlaset(
'Upper', wide, kq, zero, one,
833 $ work( ipv+ileft+(jj-jjv)*lv ), lv )
834 kq =
max( 0, kq - wide )
837 mydist = mydist + npcol
838 ileft = mydist * nbv - ioff
839 iright =
min( ileft + nbv, k )
840 ileft =
min( ileft, k )
850 CALL zgemm(
'No transpose',
'Conjugate transpose', mpc,
851 $ k, nqc, one, c( ioffc ), ldc, work( ipv ),
852 $ lv, zero, work( ipw ), lw )
854 CALL zlaset(
'All', mpc, k, zero, zero, work( ipw ), lw )
857 CALL zgsum2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
862 IF( mycol.EQ.ivcol )
THEN
863 CALL ztrmm(
'Right', uplo, trans,
'Non unit', mpc, k,
864 $ one, t, mbv, work( ipw ), lw )
865 CALL zgebs2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
868 CALL zgebr2d( ictxt,
'Rowwise',
' ', mpc, k, work( ipw ),
876 CALL zgemm(
'No transpose',
'No transpose', mpc, nqc, k,
877 $ -one, work( ipw ), lw, work( ipv ), lv, one,