d3/d1b/pbstrnv_8f_source.html

      SUBROUTINE pbstrnv( ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX,

     $                    BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL,

     $                    WORK )

*

*  -- PB-BLAS routine (version 2.1) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.

*     April 28, 1996

*

*     Jaeyoung Choi, Oak Ridge National Laboratory

*     Jack Dongarra, University of Tennessee and Oak Ridge National Lab.

*     David Walker,  Oak Ridge National Laboratory

*

*     .. Scalar Arguments ..

      CHARACTER*1        TRANS, XDIST

      INTEGER            ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL,

     $                   iyrow, n, nb, nz

      REAL               BETA

*     ..

*     .. Array Arguments ..

      REAL               WORK( * ), X( * ), Y( * )

*     ..

*

*  Purpose

*  =======

*

*  PBSTRNV transposes a column vector to row vector, or a row vector to

*  column vector by reallocating data distribution.

*

*     Y := X'

*

*  where X and Y are N vectors.

*

*  Parameters

*  ==========

*

*  ICONTXT (input) INTEGER

*          ICONTXT is the BLACS mechanism for partitioning communication

*          space.  A defining property of a context is that a message in

*          a context cannot be sent or received in another context.  The

*          BLACS context includes the definition of a grid, and each

*          process' coordinates in it.

*

*  XDIST   (input) CHARACTER*1

*          XDIST specifies whether X is a column vector or a row vector,

*

*            XDIST = 'C',  X is a column vector (distributed columnwise)

*            XDIST = 'R',  X is a row vector    (distributed rowwise)

*

*  TRANS   (input) CHARACTER*1

*          TRANS specifies whether the transposed format is transpose

*          or conjugate transpose.  If the vectors X and Y are real,

*          the argument is ignored.

*

*             TRANS = 'T',  transpose

*             TRANS = 'C',  conjugate transpose

*

*  N       (input) INTEGER

*          N specifies the (global) number of the vector X and the

*          vector Y.  N >= 0.

*

*  NB      (input) INTEGER

*          NB specifies the block size of vectors X and Y.  NB >= 0.

*

*  NZ      (input) INTEGER

*          NZ is the column offset to specify the column distance from

*          the beginning of the block to the first element of the

*          vector X, and the row offset to the first element of the

*          vector Y if XDIST = 'C'.

*          Otherwise, it is row offset to specify the row distance

*          from the beginning of the block to the first element of the

*          vector X, and the column offset to the first element of the

*          vector Y.  0 < NZ <= NB.

*

*  X       (input) REAL array of dimension  at least

*          ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or

*          ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'.

*          The incremented array X must contain the vector X.

*

*  INCX    (input) INTEGER

*          INCX specifies the increment for the elements of X.

*          INCX <> 0.

*

*  BETA    (input) REAL

*          BETA specifies scaler beta.

*

*  Y       (input/output) REAL array of dimension at least

*          ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or

*          ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or

*          The incremented array Y must contain the vector Y.

*          Y will not be referenced if beta is zero.

*

*  INCY    (input) INTEGER

*          INCY specifies the increment for the elements of Y.

*          INCY <> 0.

*

*  IXROW   (input) INTEGER

*          IXROW specifies a row of the process template, which holds

*          the first element of the vector X. If X is a row vector and

*          all rows of processes have a copy of X, then set IXROW = -1.

*

*  IXCOL   (input) INTEGER

*          IXCOL specifies  a column of the process template,

*          which holds the first element of the vector X.  If  X is  a

*          column block and all columns of processes have a copy of X,

*          then set IXCOL = -1.

*

*  IYROW   (input) INTEGER

*          IYROW specifies the current row process which holds the

*          first element of the vector Y, which is transposed of X.

*          If X  is a column vector and the transposed  row vector Y is

*          distributed all rows of processes, set IYROW = -1.

*

*  IYCOL   (input) INTEGER

*          IYCOL specifies  the current column process  which holds

*          the first element of the vector Y, which is transposed of Y.

*          If X is a row block and the transposed column vector Y is

*          distributed all columns of processes, set IYCOL = -1.

*

*  WORK    (workspace) REAL array of dimension Size(WORK).

*          It needs extra working space of x**T or x**H.

*

*  Parameters Details

*  ==================

*

*  Nx      It is a local portion  of N owned by a process, where x is

*          replaced by  either p (=NPROW) or q (=NPCOL)).  The value is

*          determined by N, NB, NZ, x, and MI, where NB is a block size,

*          NZ is a offset from the beginning of the block,  and MI is a

*          row or column position  in a process template. Nx is equal

*          to  or less than Nx0 = CEIL( N+NZ, NB*x ) * NB.

*

*  Communication Scheme

*  ====================

*

*  The communication scheme of the routine is set to '1-tree', which is

*  fan-out.  (For details, see BLACS user's guide.)

*

*  Memory Requirement of WORK

*  ==========================

*

*  NN   = N + NZ

*  Npb  = CEIL( NN, NB*NPROW )

*  Nqb  = CEIL( NN, NB*NPCOL )

*  LCMP = LCM / NPROW

*  LCMQ = LCM / NPCOL

*

*   (1) XDIST = 'C'

*     (a) IXCOL != -1

*         Size(WORK) = CEIL(Nqb,LCMQ)*NB

*     (b) IXCOL = -1

*         Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB))

*

*   (2) XDIST = 'R'

*     (a) IXROW != -1

*         Size(WORK) = CEIL(Npb,LCMP)*NB

*     (b) IXROW = -1

*         Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB))

*

*  Notes

*  -----

*  More precise space can be computed as

*

*  CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP)

*  CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ)

*

*  =====================================================================

*

*     .. Parameters ..

      REAL               ONE, ZERO

      PARAMETER          ( ONE = 1.0e+0, zero = 0.0e+0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            COLFORM, ROWFORM

      INTEGER            I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ,

     $                   lcm, lcmp, lcmq, mccol, mcrow, mrcol, mrrow,

     $                   mycol, myrow, nn, np, np0, np1, npcol, nprow,

     $                   nq, nq0, nq1

      REAL               TBETA

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            ILCM, ICEIL, NUMROC

      EXTERNAL           lsame, ilcm, iceil, numroc

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, pbstr2a1, pbstr2b1, pbstrget,

     $                   pbstrst1, pbsvecadd, pxerbla, sgebr2d, sgebs2d,

     $                   sgerv2d, sgesd2d

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          max, min, mod

*     ..

*     .. Executable Statements ..

*

*     Quick return if possible.

*

      IF( n.EQ.0 ) RETURN

*

      CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )

*

      colform = lsame( xdist, 'C' )

      rowform = lsame( xdist, 'R' )

*

*     Test the input parameters.

*

      info = 0

      IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN

         info = 2

      ELSE IF( n   .LT.0                          ) THEN

         info = 4

      ELSE IF( nb  .LT.1                          ) THEN

         info = 5

      ELSE IF( nz  .LT.0 .OR. nz.GE.nb            ) THEN

         info = 6

      ELSE IF( incx.EQ.0                          ) THEN

         info = 8

      ELSE IF( incy.EQ.0                          ) THEN

         info = 11

      ELSE IF( ixrow.LT.-1 .OR. ixrow.GE.nprow .OR.

     $       ( ixrow.EQ.-1 .AND. colform )        ) THEN

         info = 12

      ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol .OR.

     $       ( ixcol.EQ.-1 .AND. rowform )        ) THEN

         info = 13

      ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow .OR.

     $       ( iyrow.EQ.-1 .AND. rowform )        ) THEN

         info = 14

      ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol .OR.

     $       ( iycol.EQ.-1 .AND. colform )        ) THEN

         info = 15

      END IF

*

   10 CONTINUE

      IF( info.NE.0 ) THEN

         CALL pxerbla( icontxt, 'PBSTRNV ', info )

         RETURN

      END IF

*

*     Start the operations.

*

*     LCM : the least common multiple of NPROW and NPCOL

*

      lcm  = ilcm( nprow, npcol )

      lcmp = lcm   / nprow

      lcmq = lcm   / npcol

      igd  = npcol / lcmp

      nn   = n + nz

*

*     When x is a column vector

*

      IF( colform ) THEN

*

*       Form  y <== x'  ( x is a column vector )

*

*                                        ||

*                                        ||

*            _____________               ||

*            -----(y)-----      <==     (x)

*                                        ||

*                                        ||

*                                        ||

*

        IF(      ixrow.LT.0  .OR. ixrow.GE.nprow ) THEN

          info = 12

        ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol ) THEN

          info = 13

        ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow ) THEN

          info = 14

        ELSE IF( iycol.LT.0  .OR. iycol.GE.npcol ) THEN

          info = 15

        END IF

        IF( info.NE.0 ) GO TO 10

*

*       MRROW : row relative position in template from IXROW

*       MRCOL : column relative position in template from IYCOL

*

        mrrow = mod( nprow+myrow-ixrow, nprow )

        mrcol = mod( npcol+mycol-iycol, npcol )

        jyrow = iyrow

        IF( iyrow.EQ.-1 ) jyrow = ixrow

*

        np  = numroc( nn, nb, myrow, ixrow, nprow )

        IF( mrrow.EQ.0 ) np = np - nz

        nq  = numroc( nn, nb, mycol, iycol, npcol )

        IF( mrcol.EQ.0 ) nq = nq - nz

        nq0 = numroc( numroc(nn, nb, 0, 0, npcol), nb, 0, 0, lcmq )

*

*       When a column process of IXCOL has a column block A,

*

        IF( ixcol .GE. 0 ) THEN

          tbeta = zero

          IF( myrow.EQ.jyrow ) tbeta = beta

          kz = nz

*

          DO 20 i = 0, min( lcm, iceil(nn,nb) ) - 1

            mcrow = mod( mod(i, nprow) + ixrow, nprow )

            mccol = mod( mod(i, npcol) + iycol, npcol )

            IF( lcmq.EQ.1 )  nq0 = numroc( nn, nb, i, 0, npcol )

            jdex  = (i/npcol) * nb

            IF( mrcol.EQ.0 ) jdex = max(0, jdex-nz)

*

*           A source node copies the blocks to WORK, and send it

*

            IF( myrow.EQ.mcrow .AND. mycol.EQ.ixcol ) THEN

*

*             The source node is a destination node

*

              idex = (i/nprow) * nb

              IF( mrrow.EQ.0 ) idex = max( 0, idex-nz )

              IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN

                CALL pbstr2b1( icontxt, trans, np-idex, nb, kz,

     $                          x(idex*incx+1), incx, tbeta,

     $                          y(jdex*incy+1), incy, lcmp, lcmq )

*

*             The source node sends blocks to a destination node

*

              ELSE

                CALL pbstr2b1( icontxt, trans, np-idex, nb, kz,

     $                         x(idex*incx+1), incx, zero, work, 1,

     $                         lcmp, 1 )

                CALL sgesd2d( icontxt, 1, nq0-kz, work, 1,

     $                        jyrow, mccol )

              END IF

*

*           A destination node receives the copied vector

*

            ELSE IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN

              IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN

                CALL sgerv2d( icontxt, 1, nq0-kz, y, incy,

     $                        mcrow, ixcol )

              ELSE

                CALL sgerv2d( icontxt, 1, nq0-kz, work, 1,

     $                        mcrow, ixcol )

                CALL pbstr2a1( icontxt, nq-jdex, nb, kz, work, 1, tbeta,

     $                         y(jdex*incy+1), incy, lcmq*nb )

              END IF

            END IF

            kz = 0

   20     CONTINUE

*

*         Broadcast a row block of WORK in each column of template

*

          IF( iyrow.EQ.-1 ) THEN

            IF( myrow.EQ.jyrow ) THEN

              CALL sgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )

            ELSE

              CALL sgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,

     $                     jyrow, mycol )

             END IF

          END IF

*

*       When all column procesors have a copy of the column block A,

*

        ELSE

          IF( lcmq.EQ.1 ) nq0 = nq

*

*         Processors, which have diagonal blocks of X, copy them to

*         WORK array in transposed form

*

          kz = 0

          IF( mrrow.EQ.0 ) kz = nz

          jz = 0

          IF( mrrow.EQ.0 .AND. mycol.EQ.iycol ) jz = nz

*

          DO 30 i = 0, lcmp - 1

            IF( mrcol.EQ.mod(nprow*i+mrrow, npcol) ) THEN

              idex = max( 0, i*nb-kz )

              IF( lcmq.EQ.1 .AND. (iyrow.EQ.-1.OR.iyrow.EQ.myrow) ) THEN

                 CALL pbstr2b1( icontxt, trans, np-idex, nb, jz,

     $                          x(idex*incx+1), incx, beta, y, incy,

     $                          lcmp, 1 )

              ELSE

                 CALL pbstr2b1( icontxt, trans, np-idex, nb, jz,

     $                          x(idex*incx+1), incx, zero, work, 1,

     $                          lcmp, 1 )

              END IF

            END IF

   30     CONTINUE

*

*         Get diagonal blocks of A for each column of the template

*

          mcrow = mod( mod(mrcol, nprow) + ixrow, nprow )

          IF( lcmq.GT.1 ) THEN

            mccol = mod( npcol+mycol-iycol, npcol )

            CALL pbstrget( icontxt, 'Row', 1, nq0, iceil( nn, nb ),

     $                     work, 1, mcrow, mccol, igd, myrow, mycol,

     $                     nprow, npcol )

          END IF

*

*         Broadcast a row block of WORK in every row of template

*

          IF( iyrow.EQ.-1 ) THEN

            IF( myrow.EQ.mcrow ) THEN

              IF( lcmq.GT.1 ) THEN

                kz = 0

                IF( mycol.EQ.iycol ) kz = nz

                CALL pbstrst1( icontxt, 'Row', nq, nb, kz, work, 1,

     $                         beta, y, incy, lcmp, lcmq, nq0 )

              END IF

              CALL sgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )

            ELSE

              CALL sgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,

     $                      mcrow, mycol )

            END IF

*

*         Send a row block of WORK to the destination row

*

          ELSE

            IF( lcmq.EQ.1 ) THEN

              IF( myrow.EQ.mcrow ) THEN

                IF( myrow.NE.iyrow )

     $            CALL sgesd2d( icontxt, 1, nq0, work, 1, iyrow, mycol )

              ELSE IF( myrow.EQ.iyrow ) THEN

                IF( beta.EQ.zero ) THEN

                  CALL sgerv2d( icontxt, 1, nq0, y, incy, mcrow, mycol )

                ELSE

                  CALL sgerv2d( icontxt, 1, nq0, work, 1, mcrow, mycol )

                  CALL pbsvecadd( icontxt, 'G', nq0, one, work, 1,

     $                            beta, y, incy )

                END IF

              END IF

*

            ELSE

              nq1 = nq0 * min( lcmq, max( 0, iceil(nn,nb)-mccol ) )

              IF( myrow.EQ.mcrow ) THEN

                IF( myrow.NE.iyrow )

     $            CALL sgesd2d( icontxt, 1, nq1, work, 1, iyrow, mycol )

              ELSE IF( myrow.EQ.iyrow ) THEN

                CALL sgerv2d( icontxt, 1, nq1, work, 1, mcrow, mycol )

              END IF

*

              IF( myrow.EQ.iyrow ) THEN

                kz = 0

                IF( mycol.EQ.iycol ) kz = nz

                CALL pbstrst1( icontxt, 'Row', nq, nb, kz, work, 1,

     $                         beta, y, incy, lcmp, lcmq, nq0 )

              END IF

            END IF

          END IF

        END IF

*

*     When x is a row vector

*

      ELSE

*

*       Form  y <== x'  ( x is a row block )

*

*           ||

*           ||

*           ||               _____________

*          (y)      <==      -----(x)-----

*           ||

*           ||

*           ||

*

        IF(      ixrow.LT.-1 .OR. ixrow.GE.nprow ) THEN

          info = 12

        ELSE IF( ixcol.LT.0  .OR. ixcol.GE.npcol ) THEN

          info = 13

        ELSE IF( iyrow.LT.0  .OR. iyrow.GE.nprow ) THEN

          info = 14

        ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol ) THEN

          info = 15

        END IF

        IF( info.NE.0 ) GO TO 10

*

*       MRROW : row relative position in template from IYROW

*       MRCOL : column relative position in template from IXCOL

*

        mrrow = mod( nprow+myrow-iyrow, nprow )

        mrcol = mod( npcol+mycol-ixcol, npcol )

        jycol = iycol

        IF( iycol.EQ.-1 ) jycol = ixcol

*

        np  = numroc( nn, nb, myrow, iyrow, nprow )

        IF( mrrow.EQ.0 ) np = np - nz

        nq  = numroc( nn, nb, mycol, ixcol, npcol )

        IF( mrcol.EQ.0 ) nq = nq - nz

        np0 = numroc( numroc(nn, nb, 0, 0, nprow), nb, 0, 0, lcmp )

*

*       When a row process of IXROW has a row block A,

*

        IF( ixrow .GE. 0 ) THEN

          tbeta = zero

          IF( mycol.EQ.jycol ) tbeta = beta

          kz = nz

*

          DO 40 i = 0, min( lcm, iceil(nn,nb) ) - 1

            mcrow = mod( mod(i, nprow) + iyrow, nprow )

            mccol = mod( mod(i, npcol) + ixcol, npcol )

            IF( lcmp.EQ.1 ) np0 = numroc( nn, nb, i, 0, nprow )

            jdex  = (i/nprow) * nb

            IF( mrrow.EQ.0 ) jdex = max(0, jdex-nz)

*

*           A source node copies the blocks to WORK, and send it

*

            IF( myrow.EQ.ixrow .AND. mycol.EQ.mccol ) THEN

*

*             The source node is a destination node

*

              idex = (i/npcol) * nb

              IF( mrcol.EQ.0 ) idex = max( 0, idex-nz )

              IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN

                CALL pbstr2b1( icontxt, trans, nq-idex, nb, kz,

     $                         x(idex*incx+1), incx, tbeta,

     $                         y(jdex*incy+1), incy, lcmq, lcmp )

*

*             The source node sends blocks to a destination node

*

              ELSE

                CALL pbstr2b1( icontxt, trans, nq-idex, nb, kz,

     $                         x(idex*incx+1), incx, zero, work, 1,

     $                         lcmq, 1 )

                CALL sgesd2d( icontxt, 1, np0-kz, work, 1,

     $                        mcrow, jycol )

              END IF

*

*           A destination node receives the copied blocks

*

            ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN

              IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN

                CALL sgerv2d( icontxt, 1, np0-kz, y, incy,

     $                        ixrow, mccol )

              ELSE

                CALL sgerv2d( icontxt, 1, np0-kz, work, 1,

     $                        ixrow, mccol )

                CALL pbstr2a1( icontxt, np-jdex, nb, kz, work, 1, tbeta,

     $                         y(jdex*incy+1), incy, lcmp*nb )

              END IF

            END IF

            kz = 0

   40     CONTINUE

*

*         Broadcast a column vector Y in each row of template

*

          IF( iycol.EQ.-1 ) THEN

            IF( mycol.EQ.jycol ) THEN

              CALL sgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )

            ELSE

              CALL sgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,

     $                      myrow, jycol )

            END IF

          END IF

*

*       When all row procesors have a copy of the row block A,

*

        ELSE

          IF( lcmp.EQ.1 ) np0 = np

*

*         Processors, which have diagonal blocks of A, copy them to

*         WORK array in transposed form

*

          kz = 0

          IF( mrcol.EQ.0 ) kz = nz

          jz = 0

          IF( mrcol.EQ.0 .AND. myrow.EQ.iyrow ) jz = nz

*

          DO 50 i = 0, lcmq-1

            IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN

              idex = max( 0, i*nb-kz )

              IF( lcmp.EQ.1 .AND. (iycol.EQ.-1.OR.iycol.EQ.mycol) ) THEN

                CALL pbstr2b1( icontxt, trans, nq-idex, nb, jz,

     $                          x(idex*incx+1), incx, beta, y, incy,

     $                          lcmq, 1 )

              ELSE

                CALL pbstr2b1( icontxt, trans, nq-idex, nb, jz,

     $                         x(idex*incx+1), incx, zero, work, 1,

     $                         lcmq, 1 )

              END IF

            END IF

   50     CONTINUE

*

*         Get diagonal blocks of A for each row of the template

*

          mccol = mod( mod(mrrow, npcol) + ixcol, npcol )

          IF( lcmp.GT.1 ) THEN

            mcrow = mod( nprow+myrow-iyrow, nprow )

            CALL pbstrget( icontxt, 'Col', 1, np0, iceil( nn, nb ),

     $                     work, 1, mcrow, mccol, igd, myrow, mycol,

     $                     nprow, npcol )

          END IF

*

*         Broadcast a column block of WORK in every column of template

*

          IF( iycol.EQ.-1 ) THEN

            IF( mycol.EQ.mccol ) THEN

              IF( lcmp.GT.1 ) THEN

                kz = 0

                IF( myrow.EQ.iyrow ) kz = nz

                CALL pbstrst1( icontxt, 'Col', np, nb, kz, work, 1,

     $                         beta, y, incy, lcmp, lcmq, np0 )

              END IF

              CALL sgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )

            ELSE

              CALL sgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,

     $                      myrow, mccol )

            END IF

*

*         Send a column block of WORK to the destination column

*

          ELSE

            IF( lcmp.EQ.1 ) THEN

              IF( mycol.EQ.mccol ) THEN

                IF( mycol.NE.iycol )

     $            CALL sgesd2d( icontxt, 1, np, work, 1, myrow, iycol )

              ELSE IF( mycol.EQ.iycol ) THEN

                IF( beta.EQ.zero ) THEN

                  CALL sgerv2d( icontxt, 1, np, y, incy, myrow, mccol )

                ELSE

                  CALL sgerv2d( icontxt, 1, np, work, 1, myrow, mccol )

                  CALL pbsvecadd( icontxt, 'G', np, one, work, 1, beta,

     $                            y, incy )

                END IF

              END IF

*

            ELSE

              np1 = np0 * min( lcmp, max( 0, iceil(nn,nb)-mcrow ) )

              IF( mycol.EQ.mccol ) THEN

                IF( mycol.NE.iycol )

     $            CALL sgesd2d( icontxt, 1, np1, work, 1, myrow, iycol )

              ELSE IF( mycol.EQ.iycol ) THEN

                CALL sgerv2d( icontxt, 1, np1, work, 1, myrow, mccol )

              END IF

*

              IF( mycol.EQ.iycol ) THEN

                kz = 0

                IF( myrow.EQ.iyrow ) kz = nz

                CALL pbstrst1( icontxt, 'Col', np, nb, kz, work, 1,

     $                         beta, y, incy, lcmp, lcmq, np0 )

              END IF

            END IF

          END IF

        END IF

      END IF

*

      RETURN

*

*     End of PBSTRNV

*

      END

*

*=======================================================================

*     SUBROUTINE PBSTR2A1

*=======================================================================

*

      SUBROUTINE pbstr2a1( ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY,

     $                     INTV )

*

*  -- PB-BLAS routine (version 2.1) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.

*     April 28, 1996

*

*     .. Scalar Arguments ..

      INTEGER              ICONTXT, N, NB, NZ, INCX, INCY, INTV

      REAL                 BETA

*     ..

*     .. Array Arguments ..

      REAL                 X( * ), Y( * )

*     ..

*

*  Purpose

*  =======

*

*     y <== x

*     y is a scattered vector, copied from a condensed vector x.

*

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC            min

*     ..

*     .. External Functions ..

      INTEGER              ICEIL

      EXTERNAL             ICEIL

*     ..

*     .. External Subroutines ..

      EXTERNAL             pbsvecadd

*     ..

*     .. Parameters ..

      REAL                 ONE

      PARAMETER          ( ONE = 1.0e+0 )

*     ..

*     .. Local Variables ..

      INTEGER              IX, IY, JZ, K, ITER

*

      IX = 0

      iy = 0

      jz = nz

      iter = iceil( n+nz, intv )

*

      IF( iter.GT.1 ) THEN

         CALL pbsvecadd( icontxt, 'G', nb-jz, one, x(ix*incx+1), incx,

     $                   beta, y(iy*incy+1), incy )

         ix = ix + nb   - jz

         iy = iy + intv - jz

         jz = 0

*

         DO 10 k = 2, iter-1

            CALL pbsvecadd( icontxt, 'G', nb, one, x(ix*incx+1), incx,

     $                      beta, y(iy*incy+1), incy )

            ix = ix + nb

            iy = iy + intv

   10    CONTINUE

      END IF

*

      CALL pbsvecadd( icontxt, 'G', min( n-iy, nb-jz ), one,

     $                x(ix*incx+1), incx, beta, y(iy*incy+1), incy )

*

      RETURN

*

*     End of PBSTR2A1

*

      END

*

*=======================================================================

*     SUBROUTINE PBSTR2B1

*=======================================================================

*

      SUBROUTINE pbstr2b1( ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y,

     $                     INCY, JINX, JINY )

*

*  -- PB-BLAS routine (version 2.1) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.

*     April 28, 1996

*

*     .. Scalar Arguments ..

      CHARACTER*1          TRANS

      INTEGER              ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY

      REAL                 BETA

*     ..

*     .. Array Arguments ..

      REAL                 X( * ), Y( * )

*     ..

*

*  Purpose

*  =======

*

*     y <== x + beta * y

*     y is a condensed vector, copied from a scattered vector x

*

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC            min

*     ..

*     .. External Functions ..

      INTEGER              ICEIL

      EXTERNAL             iceil

*     ..

*     .. External Subroutines ..

      EXTERNAL             pbsvecadd

*     ..

*     .. Parameters ..

      REAL                 ONE

      parameter( one = 1.0e+0 )

*     ..

*     .. Local Variables ..

      INTEGER              IX, IY, JZ, K, ITER, LENX, LENY

*

      IF( jinx.EQ.1 .AND. jiny.EQ.1 ) THEN

         CALL pbsvecadd( icontxt, trans, n, one, x, incx, beta,

     $                   y, incy )

*

      ELSE

         ix   = 0

         iy   = 0

         jz   = nz

         lenx = nb * jinx

         leny = nb * jiny

         iter = iceil( n+nz, lenx )

*

         IF( iter.GT.1 ) THEN

            CALL pbsvecadd( icontxt, trans, nb-jz, one, x(ix*incx+1),

     $                      incx, beta, y(iy*incy+1), incy )

            ix = ix + lenx - jz

            iy = iy + leny - jz

            jz = 0

*

            DO 10 k = 2, iter-1

               CALL pbsvecadd( icontxt, trans, nb, one, x(ix*incx+1),

     $                         incx, beta, y(iy*incy+1), incy )

               ix = ix + lenx

               iy = iy + leny

   10       CONTINUE

         END IF

*

         CALL pbsvecadd( icontxt, trans, min( n-ix, nb-jz ), one,

     $                   x(ix*incx+1), incx, beta, y(iy*incy+1), incy )

      END IF

*

      RETURN

*

*     End of PBSTR2B1

*

      END