db/d0b/pzlarf_8f_source.html

      SUBROUTINE pzlarf( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU,

     $                   C, IC, JC, DESCC, WORK )

*

*  -- ScaLAPACK auxiliary routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     May 25, 2001

*

*     .. Scalar Arguments ..

      CHARACTER          SIDE

      INTEGER            IC, INCV, IV, JC, JV, M, N

*     ..

*     .. Array Arguments ..

      INTEGER            DESCC( * ), DESCV( * )

      COMPLEX*16         C( * ), TAU( * ), V( * ), WORK( * )

*     ..

*

*  Purpose

*  =======

*

*  PZLARF applies a complex elementary reflector Q to a complex M-by-N

*  distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from either the

*  left or the right. Q is represented in the form

*

*        Q = I - tau * v * v'

*

*  where tau is a complex scalar and v is a complex vector.

*

*  If tau = 0, then Q is taken to be the unit matrix.

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Because vectors may be viewed as a subclass of matrices, a

*  distributed vector is considered to be a distributed matrix.

*

*  Restrictions

*  ============

*

*  If SIDE = 'Left' and INCV = 1, then the row process having the first

*  entry V(IV,JV) must also have the first row of sub( C ). Moreover,

*  MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only

*  the last equality must be satisfied.

*

*  If SIDE = 'Right' and INCV = M_V then the column process having the

*  first entry V(IV,JV) must also have the first column of sub( C ) and

*  MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the

*  last equality must be satisfied.

*

*  Arguments

*  =========

*

*  SIDE    (global input) CHARACTER

*          = 'L': form  Q * sub( C ),

*          = 'R': form  sub( C ) * Q.

*

*  M       (global input) INTEGER

*          The number of rows to be operated on i.e the number of rows

*          of the distributed submatrix sub( C ). M >= 0.

*

*  N       (global input) INTEGER

*          The number of columns to be operated on i.e the number of

*          columns of the distributed submatrix sub( C ). N >= 0.

*

*  V       (local input) COMPLEX*16 pointer into the local memory

*          to an array of dimension (LLD_V,*) containing the local

*          pieces of the distributed vectors V representing the

*          Householder transformation Q,

*             V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1,

*             V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V,

*             V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1,

*             V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V,

*

*          The vector v in the representation of Q. V is not used if

*          TAU = 0.

*

*  IV      (global input) INTEGER

*          The row index in the global array V indicating the first

*          row of sub( V ).

*

*  JV      (global input) INTEGER

*          The column index in the global array V indicating the

*          first column of sub( V ).

*

*  DESCV   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix V.

*

*  INCV    (global input) INTEGER

*          The global increment for the elements of V. Only two values

*          of INCV are supported in this version, namely 1 and M_V.

*          INCV must not be zero.

*

*  TAU     (local input) COMPLEX*16, array, dimension  LOCc(JV) if

*          INCV = 1, and LOCr(IV) otherwise. This array contains the

*          Householder scalars related to the Householder vectors.

*          TAU is tied to the distributed matrix V.

*

*  C       (local input/local output) COMPLEX*16 pointer into the

*          local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),

*          containing the local pieces of sub( C ). On exit, sub( C )

*          is overwritten by the Q * sub( C ) if SIDE = 'L', or

*          sub( C ) * Q if SIDE = 'R'.

*

*  IC      (global input) INTEGER

*          The row index in the global array C indicating the first

*          row of sub( C ).

*

*  JC      (global input) INTEGER

*          The column index in the global array C indicating the

*          first column of sub( C ).

*

*  DESCC   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix C.

*

*  WORK    (local workspace) COMPLEX*16 array, dimension (LWORK)

*          If INCV = 1,

*            if SIDE = 'L',

*              if IVCOL = ICCOL,

*                LWORK >= NqC0

*              else

*                LWORK >= MpC0 + MAX( 1, NqC0 )

*              end if

*            else if SIDE = 'R',

*              LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(

*                       N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )

*            end if

*          else if INCV = M_V,

*            if SIDE = 'L',

*              LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(

*                       M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )

*            else if SIDE = 'R',

*              if IVROW = ICROW,

*                LWORK >= MpC0

*              else

*                LWORK >= NqC0 + MAX( 1, MpC0 )

*              end if

*            end if

*          end if

*

*          where LCM is the least common multiple of NPROW and NPCOL and

*          LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,

*          LCMQ = LCM / NPCOL,

*

*          IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),

*          ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),

*          ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),

*          MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),

*          NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),

*

*          ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;

*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling

*          the subroutine BLACS_GRIDINFO.

*

*  Alignment requirements

*  ======================

*

*  The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)

*  must verify some alignment properties, namely the following

*  expressions should be true:

*

*  MB_V = NB_V,

*

*  If INCV = 1,

*    If SIDE = 'Left',

*      ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )

*    If SIDE = 'Right',

*      ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )

*  else if INCV = M_V,

*    If SIDE = 'Left',

*      ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )

*    If SIDE = 'Right',

*      ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )

*  end if

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   lld_, mb_, m_, nb_, n_, rsrc_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      COMPLEX*16         ONE, ZERO

      parameter( one  = ( 1.0d+0, 0.0d+0 ),

     $                     zero = ( 0.0d+0, 0.0d+0 ) )

*     ..

*     .. Local Scalars ..

      LOGICAL            CCBLCK, CRBLCK

      CHARACTER          COLBTOP, ROWBTOP

      INTEGER            ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC,

     $                   ioffv, ipw, iroff, ivcol, ivrow, jjc, jjv, ldc,

     $                   ldv, mycol, myrow, mp, ncc, ncv, npcol, nprow,

     $                   nq, rdest

      COMPLEX*16         TAULOC( 1 )

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, infog2l, pb_topget, pbztrnv,

     $                   zcopy, zgebr2d, zgebs2d, zgemv,

     $                   zgerc, zgerv2d, zgesd2d, zgsum2d,

     $                   zlaset

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            NUMROC

      EXTERNAL           lsame, numroc

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          min, mod

*     ..

*     .. Executable Statements ..

*

*     Quick return if possible

*

      IF( m.LE.0 .OR. n.LE.0 )

     $   RETURN

*

*     Get grid parameters.

*

      ictxt = descc( ctxt_ )

      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

*

*     Figure local indexes

*

      CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,

     $              icrow, iccol )

      CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,

     $              ivrow, ivcol )

      ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),

     $              npcol )

      ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),

     $              npcol )

      ldc = descc( lld_ )

      ldv = descv( lld_ )

      iic = min( iic, ldc )

      iiv = min( iiv, ldv )

      jjc = min( jjc, ncc )

      jjv = min( jjv, ncv )

      ioffc = iic+(jjc-1)*ldc

      ioffv = iiv+(jjv-1)*ldv

*

      iroff = mod( ic-1, descc( mb_ ) )

      icoff = mod( jc-1, descc( nb_ ) )

      mp = numroc( m+iroff, descc( mb_ ), myrow, icrow, nprow )

      nq = numroc( n+icoff, descc( nb_ ), mycol, iccol, npcol )

      IF( myrow.EQ.icrow )

     $   mp = mp - iroff

      IF( mycol.EQ.iccol )

     $   nq = nq - icoff

*

*     Is sub( C ) only distributed over a process row ?

*

      crblck = ( m.LE.(descc( mb_ )-iroff) )

*

*     Is sub( C ) only distributed over a process column ?

*

      ccblck = ( n.LE.(descc( nb_ )-icoff) )

*

      IF( lsame( side, 'L' ) ) THEN

*

         IF( crblck ) THEN

            rdest = icrow

         ELSE

            rdest = -1

         END IF

*

         IF( ccblck ) THEN

*

*           sub( C ) is distributed over a process column

*

            IF( descv( m_ ).EQ.incv ) THEN

*

*              Transpose row vector V

*

               ipw = mp+1

               CALL pbztrnv( ictxt, 'Rowwise', 'Transpose', m,

     $                       descv( nb_ ), iroff, v( ioffv ), ldv, zero,

     $                       work, 1, ivrow, ivcol, icrow, iccol,

     $                       work( ipw ) )

*

*              Perform the local computation within a process column

*

               IF( mycol.EQ.iccol ) THEN

*

                  IF( myrow.EQ.ivrow ) THEN

*

                     CALL zgebs2d( ictxt, 'Columnwise', ' ', 1, 1,

     $                             tau( iiv ), 1 )

                     tauloc( 1 ) = tau( iiv )

*

                  ELSE

*

                     CALL zgebr2d( ictxt, 'Columnwise', ' ', 1, 1,

     $                             tauloc, 1, ivrow, mycol )

*

                  END IF

*

                  IF( tauloc( 1 ).NE.zero ) THEN

*

*                    w := sub( C )' * v

*

                     IF( mp.GT.0 ) THEN

                        CALL zgemv( 'Conjugate transpose', mp, nq, one,

     $                              c( ioffc ), ldc, work, 1, zero,

     $                              work( ipw ), 1 )

                     ELSE

                        CALL zlaset( 'All', nq, 1, zero, zero,

     $                               work( ipw ), max( 1, nq ) )

                     END IF

                     CALL zgsum2d( ictxt, 'Columnwise', ' ', nq, 1,

     $                             work( ipw ), max( 1, nq ), rdest,

     $                             mycol )

*

*                    sub( C ) := sub( C ) - v * w'

*

                     CALL zgerc( mp, nq, -tauloc( 1 ), work, 1,

     $                           work( ipw ), 1, c( ioffc ), ldc )

                  END IF

*

               END IF

*

            ELSE

*

*              V is a column vector

*

               IF( ivcol.EQ.iccol ) THEN

*

*                 Perform the local computation within a process column

*

                  IF( mycol.EQ.iccol ) THEN

*

                     tauloc( 1 ) = tau( jjv )

*

                     IF( tauloc( 1 ).NE.zero ) THEN

*

*                       w := sub( C )' * v

*

                        IF( mp.GT.0 ) THEN

                           CALL zgemv( 'Conjugate transpose', mp, nq,

     $                              one, c( ioffc ), ldc, v( ioffv ), 1,

     $                              zero, work, 1 )

                        ELSE

                           CALL zlaset( 'All', nq, 1, zero, zero,

     $                                  work, max( 1, nq ) )

                        END IF

                        CALL zgsum2d( ictxt, 'Columnwise', ' ', nq, 1,

     $                                work, max( 1, nq ), rdest, mycol )

*

*                       sub( C ) := sub( C ) - v * w'

*

                        CALL zgerc( mp, nq, -tauloc( 1 ), v( ioffv ), 1,

     $                              work, 1, c( ioffc ), ldc )

                     END IF

*

                  END IF

*

               ELSE

*

*                 Send V and TAU to the process column ICCOL

*

                  IF( mycol.EQ.ivcol ) THEN

*

                     ipw = mp+1

                     CALL zcopy( mp, v( ioffv ), 1, work, 1 )

                     work( ipw ) = tau( jjv )

                     CALL zgesd2d( ictxt, ipw, 1, work, ipw, myrow,

     $                             iccol )

*

                  ELSE IF( mycol.EQ.iccol ) THEN

*

                     ipw = mp+1

                     CALL zgerv2d( ictxt, ipw, 1, work, ipw, myrow,

     $                             ivcol )

                     tauloc( 1 ) = work( ipw )

*

                     IF( tauloc( 1 ).NE.zero ) THEN

*

*                       w := sub( C )' * v

*

                        IF( mp.GT.0 ) THEN

                           CALL zgemv( 'Conjugate transpose', mp, nq,

     $                                 one, c( ioffc ), ldc, work, 1,

     $                                 zero, work( ipw ), 1 )

                        ELSE

                           CALL zlaset( 'All', nq, 1, zero, zero,

     $                                  work( ipw ), max( 1, nq ) )

                        END IF

                        CALL zgsum2d( ictxt, 'Columnwise', ' ', nq, 1,

     $                                work( ipw ), max( 1, nq ), rdest,

     $                                mycol )

*

*                       sub( C ) := sub( C ) - v * w'

*

                        CALL zgerc( mp, nq, -tauloc( 1 ), work, 1,

     $                              work( ipw ), 1, c( ioffc ), ldc )

                     END IF

*

                  END IF

*

               END IF

*

            END IF

*

         ELSE

*

*           sub( C ) is a proper distributed matrix

*

            IF( descv( m_ ).EQ.incv ) THEN

*

*              Transpose and broadcast row vector V

*

               ipw = mp+1

               CALL pbztrnv( ictxt, 'Rowwise', 'Transpose', m,

     $                       descv( nb_ ), iroff, v( ioffv ), ldv, zero,

     $                       work, 1, ivrow, ivcol, icrow, -1,

     $                       work( ipw ) )

*

*              Perform the local computation within a process column

*

               IF( myrow.EQ.ivrow ) THEN

*

                  CALL zgebs2d( ictxt, 'Columnwise', ' ', 1, 1,

     $                          tau( iiv ), 1 )

                  tauloc( 1 ) = tau( iiv )

*

               ELSE

*

                  CALL zgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,

     $                          1, ivrow, mycol )

*

               END IF

*

               IF( tauloc( 1 ).NE.zero ) THEN

*

*                 w := sub( C )' * v

*

                  IF( mp.GT.0 ) THEN

                     IF( ioffc.GT.0 )

     $                  CALL zgemv( 'Conjugate transpose', mp, nq, one,

     $                              c( ioffc ), ldc, work, 1, zero,

     $                              work( ipw ), 1 )

                  ELSE

                     CALL zlaset( 'All', nq, 1, zero, zero,

     $                            work( ipw ), max( 1, nq ) )

                  END IF

                  CALL zgsum2d( ictxt, 'Columnwise', ' ', nq, 1,

     $                          work( ipw ), max( 1, nq ), rdest,

     $                          mycol )

*

*                 sub( C ) := sub( C ) - v * w'

*

                  IF( ioffc.GT.0 )

     $               CALL zgerc( mp, nq, -tauloc( 1 ), work, 1,

     $                           work( ipw ), 1, c( ioffc ), ldc )

               END IF

*

            ELSE

*

*              Broadcast column vector V

*

               CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )

               IF( mycol.EQ.ivcol ) THEN

*

                  ipw = mp+1

                  CALL zcopy( mp, v( ioffv ), 1, work, 1 )

                  work(ipw) = tau( jjv )

                  CALL zgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,

     $                          work, ipw )

                  tauloc( 1 ) = tau( jjv )

*

               ELSE

*

                  ipw = mp+1

                  CALL zgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,

     $                          ipw, myrow, ivcol )

                  tauloc( 1 ) = work( ipw )

*

               END IF

*

               IF( tauloc( 1 ).NE.zero ) THEN

*

*                 w := sub( C )' * v

*

                  IF( mp.GT.0 ) THEN

                     IF( ioffc.GT.0 )

     $                  CALL zgemv( 'Conjugate transpose', mp, nq, one,

     $                              c( ioffc ), ldc, work, 1, zero,

     $                              work( ipw ), 1 )

                  ELSE

                     CALL zlaset( 'All', nq, 1, zero, zero,

     $                            work( ipw ), max( 1, nq ) )

                  END IF

                  CALL zgsum2d( ictxt, 'Columnwise', ' ', nq, 1,

     $                          work( ipw ), max( 1, nq ), rdest,

     $                          mycol )

*

*                 sub( C ) := sub( C ) - v * w'

*

                  IF( ioffc.GT.0 )

     $               CALL zgerc( mp, nq, -tauloc( 1 ), work, 1,

     $                           work( ipw ), 1, c( ioffc ), ldc )

               END IF

*

            END IF

*

         END IF

*

      ELSE

*

         IF( ccblck ) THEN

            rdest = myrow

         ELSE

            rdest = -1

         END IF

*

         IF( crblck ) THEN

*

*           sub( C ) is distributed over a process row

*

            IF( descv( m_ ).EQ.incv ) THEN

*

*              V is a row vector

*

               IF( ivrow.EQ.icrow ) THEN

*

*                 Perform the local computation within a process row

*

                  IF( myrow.EQ.icrow ) THEN

*

                     tauloc( 1 ) = tau( iiv )

*

                     IF( tauloc( 1 ).NE.zero ) THEN

*

*                       w := sub( C ) * v

*

                        IF( nq.GT.0 ) THEN

                           CALL zgemv( 'No transpose', mp, nq, one,

     $                                 c( ioffc ), ldc, v( ioffv ), ldv,

     $                                 zero, work, 1 )

                        ELSE

                           CALL zlaset( 'All', mp, 1, zero, zero,

     $                                  work, max( 1, mp ) )

                        END IF

                        CALL zgsum2d( ictxt, 'Rowwise', ' ', mp, 1,

     $                                work, max( 1, mp ), rdest, iccol )

*

*                       sub( C ) := sub( C ) - w * v'

*

                        IF( ioffv.GT.0 .AND. ioffc.GT.0 )

     $                     CALL zgerc( mp, nq, -tauloc( 1 ), work, 1,

     $                                 v( ioffv ), ldv, c( ioffc ),

     $                                 ldc )

                     END IF

*

                  END IF

*

               ELSE

*

*                 Send V and TAU to the process row ICROW

*

                  IF( myrow.EQ.ivrow ) THEN

*

                     ipw = nq+1

                     CALL zcopy( nq, v( ioffv ), ldv, work, 1 )

                     work(ipw) = tau( iiv )

                     CALL zgesd2d( ictxt, ipw, 1, work, ipw, icrow,

     $                             mycol )

*

                  ELSE IF( myrow.EQ.icrow ) THEN

*

                     ipw = nq+1

                     CALL zgerv2d( ictxt, ipw, 1, work, ipw, ivrow,

     $                             mycol )

                     tauloc( 1 ) = work( ipw )

*

                     IF( tauloc( 1 ).NE.zero ) THEN

*

*                       w := sub( C ) * v

*

                        IF( nq.GT.0 ) THEN

                           CALL zgemv( 'No transpose', mp, nq, one,

     $                                 c( ioffc ), ldc, work, 1, zero,

     $                                 work( ipw ), 1 )

                        ELSE

                           CALL zlaset( 'All', mp, 1, zero, zero,

     $                                  work( ipw ), max( 1, mp ) )

                        END IF

                        CALL zgsum2d( ictxt, 'Rowwise', ' ', mp, 1,

     $                                work( ipw ), max( 1, mp ), rdest,

     $                                iccol )

*

*                       sub( C ) := sub( C ) - w * v'

*

                        CALL zgerc( mp, nq, -tauloc( 1 ), work( ipw ),

     $                              1, work, 1, c( ioffc ), ldc )

                     END IF

*

                  END IF

*

               END IF

*

            ELSE

*

*              Transpose column vector V

*

               ipw = nq+1

               CALL pbztrnv( ictxt, 'Columnwise', 'Transpose', n,

     $                       descv( mb_ ), icoff, v( ioffv ), 1, zero,

     $                       work, 1, ivrow, ivcol, icrow, iccol,

     $                       work( ipw ) )

*

*              Perform the local computation within a process column

*

               IF( myrow.EQ.icrow ) THEN

*

                  IF( mycol.EQ.ivcol ) THEN

*

                     CALL zgebs2d( ictxt, 'Rowwise', ' ', 1, 1,

     $                             tau( jjv ), 1 )

                     tauloc( 1 ) = tau( jjv )

*

                  ELSE

*

                     CALL zgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,

     $                             1, myrow, ivcol )

*

                  END IF

*

                  IF( tauloc( 1 ).NE.zero ) THEN

*

*                    w := sub( C ) * v

*

                     IF( nq.GT.0 ) THEN

                        CALL zgemv( 'No transpose', mp, nq, one,

     $                              c( ioffc ), ldc, work, 1, zero,

     $                              work( ipw ), 1 )

                     ELSE

                        CALL zlaset( 'All', mp, 1, zero, zero,

     $                               work( ipw ), max( 1, mp ) )

                     END IF

                     CALL zgsum2d( ictxt, 'Rowwise', ' ', mp, 1,

     $                             work( ipw ), max( 1, mp ), rdest,

     $                             iccol )

*

*                    sub( C ) := sub( C ) - w * v'

*

                     CALL zgerc( mp, nq, -tauloc( 1 ), work( ipw ), 1,

     $                           work, 1, c( ioffc ), ldc )

                  END IF

*

               END IF

*

            END IF

*

         ELSE

*

*           sub( C ) is a proper distributed matrix

*

            IF( descv( m_ ).EQ.incv ) THEN

*

*              Broadcast row vector V

*

               CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',

     $                       colbtop )

               IF( myrow.EQ.ivrow ) THEN

*

                  ipw = nq+1

                  IF( ioffv.GT.0 )

     $               CALL zcopy( nq, v( ioffv ), ldv, work, 1 )

                  work(ipw) = tau( iiv )

                  CALL zgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,

     $                          work, ipw )

                  tauloc( 1 ) = tau( iiv )

*

               ELSE

*

                  ipw = nq+1

                  CALL zgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,

     $                          work, ipw, ivrow, mycol )

                  tauloc( 1 ) = work( ipw )

*

               END IF

*

               IF( tauloc( 1 ).NE.zero ) THEN

*

*                 w := sub( C ) * v

*

                  IF( nq.GT.0 ) THEN

                     CALL zgemv( 'No Transpose', mp, nq, one,

     $                           c( ioffc ), ldc, work, 1, zero,

     $                           work( ipw ), 1 )

                  ELSE

                     CALL zlaset( 'All', mp, 1, zero, zero,

     $                            work( ipw ), max( 1, mp ) )

                  END IF

                  CALL zgsum2d( ictxt, 'Rowwise', ' ', mp, 1,

     $                          work( ipw ), max( 1, mp ), rdest,

     $                          iccol )

*

*                 sub( C ) := sub( C ) - w * v'

*

                  IF( ioffc.GT.0 )

     $               CALL zgerc( mp, nq, -tauloc( 1 ), work( ipw ), 1,

     $                           work, 1, c( ioffc ), ldc )

               END IF

*

            ELSE

*

*              Transpose and broadcast column vector V

*

               ipw = nq+1

               CALL pbztrnv( ictxt, 'Columnwise', 'Transpose', n,

     $                       descv( mb_ ), icoff, v( ioffv ), 1, zero,

     $                       work, 1, ivrow, ivcol, -1, iccol,

     $                       work( ipw ) )

*

*              Perform the local computation within a process column

*

               IF( mycol.EQ.ivcol ) THEN

*

                  CALL zgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),

     $                          1 )

                  tauloc( 1 ) = tau( jjv )

*

               ELSE

*

                  CALL zgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,

     $                          myrow, ivcol )

*

               END IF

*

               IF( tauloc( 1 ).NE.zero ) THEN

*

*                 w := sub( C ) * v

*

                  IF( nq.GT.0 ) THEN

                     CALL zgemv( 'No transpose', mp, nq, one,

     $                           c( ioffc ), ldc, work, 1, zero,

     $                           work( ipw ), 1 )

                  ELSE

                     CALL zlaset( 'All', mp, 1, zero, zero, work( ipw ),

     $                            max( 1, mp ) )

                  END IF

                  CALL zgsum2d( ictxt, 'Rowwise', ' ', mp, 1,

     $                          work( ipw ), max( 1, mp ), rdest,

     $                          iccol )

*

*                 sub( C ) := sub( C ) - w * v'

*

                  CALL zgerc( mp, nq, -tauloc( 1 ), work( ipw ), 1,

     $                        work, 1, c( ioffc ), ldc )

               END IF

*

            END IF

*

         END IF

*

      END IF

*

      RETURN

*

*     End of PZLARF

*


      END

infog2l
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3

pbztrnv
subroutine pbztrnv(icontxt, xdist, trans, n, nb, nz, x, incx, beta, y, incy, ixrow, ixcol, iyrow, iycol, work)
Definition pbztrnv.f:4

max
#define max(A, B)
Definition pcgemr.c:180

min
#define min(A, B)
Definition pcgemr.c:181

pzlarf
subroutine pzlarf(side, m, n, v, iv, jv, descv, incv, tau, c, ic, jc, descc, work)
Definition pzlarf.f:3