ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pbdtrnv.f
Go to the documentation of this file.
1  SUBROUTINE pbdtrnv( ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX,
2  $ BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL,
3  $ WORK )
4 *
5 * -- PB-BLAS routine (version 2.1) --
6 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
7 * April 28, 1996
8 *
9 * Jaeyoung Choi, Oak Ridge National Laboratory
10 * Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
11 * David Walker, Oak Ridge National Laboratory
12 *
13 * .. Scalar Arguments ..
14  CHARACTER*1 TRANS, XDIST
15  INTEGER ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL,
16  $ iyrow, n, nb, nz
17  DOUBLE PRECISION BETA
18 * ..
19 * .. Array Arguments ..
20  DOUBLE PRECISION WORK( * ), X( * ), Y( * )
21 * ..
22 *
23 * Purpose
24 * =======
25 *
26 * PBDTRNV transposes a column vector to row vector, or a row vector to
27 * column vector by reallocating data distribution.
28 *
29 * Y := X'
30 *
31 * where X and Y are N vectors.
32 *
33 * Parameters
34 * ==========
35 *
36 * ICONTXT (input) INTEGER
37 * ICONTXT is the BLACS mechanism for partitioning communication
38 * space. A defining property of a context is that a message in
39 * a context cannot be sent or received in another context. The
40 * BLACS context includes the definition of a grid, and each
41 * process' coordinates in it.
42 *
43 * XDIST (input) CHARACTER*1
44 * XDIST specifies whether X is a column vector or a row vector,
45 *
46 * XDIST = 'C', X is a column vector (distributed columnwise)
47 * XDIST = 'R', X is a row vector (distributed rowwise)
48 *
49 * TRANS (input) CHARACTER*1
50 * TRANS specifies whether the transposed format is transpose
51 * or conjugate transpose. If the vectors X and Y are real,
52 * the argument is ignored.
53 *
54 * TRANS = 'T', transpose
55 * TRANS = 'C', conjugate transpose
56 *
57 * N (input) INTEGER
58 * N specifies the (global) number of the vector X and the
59 * vector Y. N >= 0.
60 *
61 * NB (input) INTEGER
62 * NB specifies the block size of vectors X and Y. NB >= 0.
63 *
64 * NZ (input) INTEGER
65 * NZ is the column offset to specify the column distance from
66 * the beginning of the block to the first element of the
67 * vector X, and the row offset to the first element of the
68 * vector Y if XDIST = 'C'.
69 * Otherwise, it is row offset to specify the row distance
70 * from the beginning of the block to the first element of the
71 * vector X, and the column offset to the first element of the
72 * vector Y. 0 < NZ <= NB.
73 *
74 * X (input) DOUBLE PRECISION array of dimension at least
75 * ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or
76 * ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'.
77 * The incremented array X must contain the vector X.
78 *
79 * INCX (input) INTEGER
80 * INCX specifies the increment for the elements of X.
81 * INCX <> 0.
82 *
83 * BETA (input) DOUBLE PRECISION
84 * BETA specifies scaler beta.
85 *
86 * Y (input/output) DOUBLE PRECISION array of dimension at least
87 * ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or
88 * ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or
89 * The incremented array Y must contain the vector Y.
90 * Y will not be referenced if beta is zero.
91 *
92 * INCY (input) INTEGER
93 * INCY specifies the increment for the elements of Y.
94 * INCY <> 0.
95 *
96 * IXROW (input) INTEGER
97 * IXROW specifies a row of the process template, which holds
98 * the first element of the vector X. If X is a row vector and
99 * all rows of processes have a copy of X, then set IXROW = -1.
100 *
101 * IXCOL (input) INTEGER
102 * IXCOL specifies a column of the process template,
103 * which holds the first element of the vector X. If X is a
104 * column block and all columns of processes have a copy of X,
105 * then set IXCOL = -1.
106 *
107 * IYROW (input) INTEGER
108 * IYROW specifies the current row process which holds the
109 * first element of the vector Y, which is transposed of X.
110 * If X is a column vector and the transposed row vector Y is
111 * distributed all rows of processes, set IYROW = -1.
112 *
113 * IYCOL (input) INTEGER
114 * IYCOL specifies the current column process which holds
115 * the first element of the vector Y, which is transposed of Y.
116 * If X is a row block and the transposed column vector Y is
117 * distributed all columns of processes, set IYCOL = -1.
118 *
119 * WORK (workspace) DOUBLE PRECISION array of dimension Size(WORK).
120 * It needs extra working space of x**T or x**H.
121 *
122 * Parameters Details
123 * ==================
124 *
125 * Nx It is a local portion of N owned by a process, where x is
126 * replaced by either p (=NPROW) or q (=NPCOL)). The value is
127 * determined by N, NB, NZ, x, and MI, where NB is a block size,
128 * NZ is a offset from the beginning of the block, and MI is a
129 * row or column position in a process template. Nx is equal
130 * to or less than Nx0 = CEIL( N+NZ, NB*x ) * NB.
131 *
132 * Communication Scheme
133 * ====================
134 *
135 * The communication scheme of the routine is set to '1-tree', which is
136 * fan-out. (For details, see BLACS user's guide.)
137 *
138 * Memory Requirement of WORK
139 * ==========================
140 *
141 * NN = N + NZ
142 * Npb = CEIL( NN, NB*NPROW )
143 * Nqb = CEIL( NN, NB*NPCOL )
144 * LCMP = LCM / NPROW
145 * LCMQ = LCM / NPCOL
146 *
147 * (1) XDIST = 'C'
148 * (a) IXCOL != -1
149 * Size(WORK) = CEIL(Nqb,LCMQ)*NB
150 * (b) IXCOL = -1
151 * Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB))
152 *
153 * (2) XDIST = 'R'
154 * (a) IXROW != -1
155 * Size(WORK) = CEIL(Npb,LCMP)*NB
156 * (b) IXROW = -1
157 * Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB))
158 *
159 * Notes
160 * -----
161 * More precise space can be computed as
162 *
163 * CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP)
164 * CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ)
165 *
166 * =====================================================================
167 *
168 * .. Parameters ..
169  DOUBLE PRECISION ONE, ZERO
170  PARAMETER ( ONE = 1.0d+0, zero = 0.0d+0 )
171 * ..
172 * .. Local Scalars ..
173  LOGICAL COLFORM, ROWFORM
174  INTEGER I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ,
175  $ lcm, lcmp, lcmq, mccol, mcrow, mrcol, mrrow,
176  $ mycol, myrow, nn, np, np0, np1, npcol, nprow,
177  $ nq, nq0, nq1
178  DOUBLE PRECISION TBETA
179 * ..
180 * .. External Functions ..
181  LOGICAL LSAME
182  INTEGER ILCM, ICEIL, NUMROC
183  EXTERNAL lsame, ilcm, iceil, numroc
184 * ..
185 * .. External Subroutines ..
186  EXTERNAL blacs_gridinfo, dgebr2d, dgebs2d, dgerv2d,
187  $ dgesd2d, pbdtr2a1, pbdtr2b1, pbdtrget,
189 * ..
190 * .. Intrinsic Functions ..
191  INTRINSIC max, min, mod
192 * ..
193 * .. Executable Statements ..
194 *
195 * Quick return if possible.
196 *
197  IF( n.EQ.0 ) RETURN
198 *
199  CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
200 *
201  colform = lsame( xdist, 'C' )
202  rowform = lsame( xdist, 'R' )
203 *
204 * Test the input parameters.
205 *
206  info = 0
207  IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
208  info = 2
209  ELSE IF( n .LT.0 ) THEN
210  info = 4
211  ELSE IF( nb .LT.1 ) THEN
212  info = 5
213  ELSE IF( nz .LT.0 .OR. nz.GE.nb ) THEN
214  info = 6
215  ELSE IF( incx.EQ.0 ) THEN
216  info = 8
217  ELSE IF( incy.EQ.0 ) THEN
218  info = 11
219  ELSE IF( ixrow.LT.-1 .OR. ixrow.GE.nprow .OR.
220  $ ( ixrow.EQ.-1 .AND. colform ) ) THEN
221  info = 12
222  ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol .OR.
223  $ ( ixcol.EQ.-1 .AND. rowform ) ) THEN
224  info = 13
225  ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow .OR.
226  $ ( iyrow.EQ.-1 .AND. rowform ) ) THEN
227  info = 14
228  ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol .OR.
229  $ ( iycol.EQ.-1 .AND. colform ) ) THEN
230  info = 15
231  END IF
232 *
233  10 CONTINUE
234  IF( info.NE.0 ) THEN
235  CALL pxerbla( icontxt, 'PBDTRNV ', info )
236  RETURN
237  END IF
238 *
239 * Start the operations.
240 *
241 * LCM : the least common multiple of NPROW and NPCOL
242 *
243  lcm = ilcm( nprow, npcol )
244  lcmp = lcm / nprow
245  lcmq = lcm / npcol
246  igd = npcol / lcmp
247  nn = n + nz
248 *
249 * When x is a column vector
250 *
251  IF( colform ) THEN
252 *
253 * Form y <== x' ( x is a column vector )
254 *
255 * ||
256 * ||
257 * _____________ ||
258 * -----(y)----- <== (x)
259 * ||
260 * ||
261 * ||
262 *
263  IF( ixrow.LT.0 .OR. ixrow.GE.nprow ) THEN
264  info = 12
265  ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol ) THEN
266  info = 13
267  ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow ) THEN
268  info = 14
269  ELSE IF( iycol.LT.0 .OR. iycol.GE.npcol ) THEN
270  info = 15
271  END IF
272  IF( info.NE.0 ) GO TO 10
273 *
274 * MRROW : row relative position in template from IXROW
275 * MRCOL : column relative position in template from IYCOL
276 *
277  mrrow = mod( nprow+myrow-ixrow, nprow )
278  mrcol = mod( npcol+mycol-iycol, npcol )
279  jyrow = iyrow
280  IF( iyrow.EQ.-1 ) jyrow = ixrow
281 *
282  np = numroc( nn, nb, myrow, ixrow, nprow )
283  IF( mrrow.EQ.0 ) np = np - nz
284  nq = numroc( nn, nb, mycol, iycol, npcol )
285  IF( mrcol.EQ.0 ) nq = nq - nz
286  nq0 = numroc( numroc(nn, nb, 0, 0, npcol), nb, 0, 0, lcmq )
287 *
288 * When a column process of IXCOL has a column block A,
289 *
290  IF( ixcol .GE. 0 ) THEN
291  tbeta = zero
292  IF( myrow.EQ.jyrow ) tbeta = beta
293  kz = nz
294 *
295  DO 20 i = 0, min( lcm, iceil(nn,nb) ) - 1
296  mcrow = mod( mod(i, nprow) + ixrow, nprow )
297  mccol = mod( mod(i, npcol) + iycol, npcol )
298  IF( lcmq.EQ.1 ) nq0 = numroc( nn, nb, i, 0, npcol )
299  jdex = (i/npcol) * nb
300  IF( mrcol.EQ.0 ) jdex = max(0, jdex-nz)
301 *
302 * A source node copies the blocks to WORK, and send it
303 *
304  IF( myrow.EQ.mcrow .AND. mycol.EQ.ixcol ) THEN
305 *
306 * The source node is a destination node
307 *
308  idex = (i/nprow) * nb
309  IF( mrrow.EQ.0 ) idex = max( 0, idex-nz )
310  IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
311  CALL pbdtr2b1( icontxt, trans, np-idex, nb, kz,
312  $ x(idex*incx+1), incx, tbeta,
313  $ y(jdex*incy+1), incy, lcmp, lcmq )
314 *
315 * The source node sends blocks to a destination node
316 *
317  ELSE
318  CALL pbdtr2b1( icontxt, trans, np-idex, nb, kz,
319  $ x(idex*incx+1), incx, zero, work, 1,
320  $ lcmp, 1 )
321  CALL dgesd2d( icontxt, 1, nq0-kz, work, 1,
322  $ jyrow, mccol )
323  END IF
324 *
325 * A destination node receives the copied vector
326 *
327  ELSE IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
328  IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN
329  CALL dgerv2d( icontxt, 1, nq0-kz, y, incy,
330  $ mcrow, ixcol )
331  ELSE
332  CALL dgerv2d( icontxt, 1, nq0-kz, work, 1,
333  $ mcrow, ixcol )
334  CALL pbdtr2a1( icontxt, nq-jdex, nb, kz, work, 1, tbeta,
335  $ y(jdex*incy+1), incy, lcmq*nb )
336  END IF
337  END IF
338  kz = 0
339  20 CONTINUE
340 *
341 * Broadcast a row block of WORK in each column of template
342 *
343  IF( iyrow.EQ.-1 ) THEN
344  IF( myrow.EQ.jyrow ) THEN
345  CALL dgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
346  ELSE
347  CALL dgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
348  $ jyrow, mycol )
349  END IF
350  END IF
351 *
352 * When all column procesors have a copy of the column block A,
353 *
354  ELSE
355  IF( lcmq.EQ.1 ) nq0 = nq
356 *
357 * Processors, which have diagonal blocks of X, copy them to
358 * WORK array in transposed form
359 *
360  kz = 0
361  IF( mrrow.EQ.0 ) kz = nz
362  jz = 0
363  IF( mrrow.EQ.0 .AND. mycol.EQ.iycol ) jz = nz
364 *
365  DO 30 i = 0, lcmp - 1
366  IF( mrcol.EQ.mod(nprow*i+mrrow, npcol) ) THEN
367  idex = max( 0, i*nb-kz )
368  IF( lcmq.EQ.1 .AND. (iyrow.EQ.-1.OR.iyrow.EQ.myrow) ) THEN
369  CALL pbdtr2b1( icontxt, trans, np-idex, nb, jz,
370  $ x(idex*incx+1), incx, beta, y, incy,
371  $ lcmp, 1 )
372  ELSE
373  CALL pbdtr2b1( icontxt, trans, np-idex, nb, jz,
374  $ x(idex*incx+1), incx, zero, work, 1,
375  $ lcmp, 1 )
376  END IF
377  END IF
378  30 CONTINUE
379 *
380 * Get diagonal blocks of A for each column of the template
381 *
382  mcrow = mod( mod(mrcol, nprow) + ixrow, nprow )
383  IF( lcmq.GT.1 ) THEN
384  mccol = mod( npcol+mycol-iycol, npcol )
385  CALL pbdtrget( icontxt, 'Row', 1, nq0, iceil( nn, nb ),
386  $ work, 1, mcrow, mccol, igd, myrow, mycol,
387  $ nprow, npcol )
388  END IF
389 *
390 * Broadcast a row block of WORK in every row of template
391 *
392  IF( iyrow.EQ.-1 ) THEN
393  IF( myrow.EQ.mcrow ) THEN
394  IF( lcmq.GT.1 ) THEN
395  kz = 0
396  IF( mycol.EQ.iycol ) kz = nz
397  CALL pbdtrst1( icontxt, 'Row', nq, nb, kz, work, 1,
398  $ beta, y, incy, lcmp, lcmq, nq0 )
399  END IF
400  CALL dgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
401  ELSE
402  CALL dgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
403  $ mcrow, mycol )
404  END IF
405 *
406 * Send a row block of WORK to the destination row
407 *
408  ELSE
409  IF( lcmq.EQ.1 ) THEN
410  IF( myrow.EQ.mcrow ) THEN
411  IF( myrow.NE.iyrow )
412  $ CALL dgesd2d( icontxt, 1, nq0, work, 1, iyrow, mycol )
413  ELSE IF( myrow.EQ.iyrow ) THEN
414  IF( beta.EQ.zero ) THEN
415  CALL dgerv2d( icontxt, 1, nq0, y, incy, mcrow, mycol )
416  ELSE
417  CALL dgerv2d( icontxt, 1, nq0, work, 1, mcrow, mycol )
418  CALL pbdvecadd( icontxt, 'G', nq0, one, work, 1,
419  $ beta, y, incy )
420  END IF
421  END IF
422 *
423  ELSE
424  nq1 = nq0 * min( lcmq, max( 0, iceil(nn,nb)-mccol ) )
425  IF( myrow.EQ.mcrow ) THEN
426  IF( myrow.NE.iyrow )
427  $ CALL dgesd2d( icontxt, 1, nq1, work, 1, iyrow, mycol )
428  ELSE IF( myrow.EQ.iyrow ) THEN
429  CALL dgerv2d( icontxt, 1, nq1, work, 1, mcrow, mycol )
430  END IF
431 *
432  IF( myrow.EQ.iyrow ) THEN
433  kz = 0
434  IF( mycol.EQ.iycol ) kz = nz
435  CALL pbdtrst1( icontxt, 'Row', nq, nb, kz, work, 1,
436  $ beta, y, incy, lcmp, lcmq, nq0 )
437  END IF
438  END IF
439  END IF
440  END IF
441 *
442 * When x is a row vector
443 *
444  ELSE
445 *
446 * Form y <== x' ( x is a row block )
447 *
448 * ||
449 * ||
450 * || _____________
451 * (y) <== -----(x)-----
452 * ||
453 * ||
454 * ||
455 *
456  IF( ixrow.LT.-1 .OR. ixrow.GE.nprow ) THEN
457  info = 12
458  ELSE IF( ixcol.LT.0 .OR. ixcol.GE.npcol ) THEN
459  info = 13
460  ELSE IF( iyrow.LT.0 .OR. iyrow.GE.nprow ) THEN
461  info = 14
462  ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol ) THEN
463  info = 15
464  END IF
465  IF( info.NE.0 ) GO TO 10
466 *
467 * MRROW : row relative position in template from IYROW
468 * MRCOL : column relative position in template from IXCOL
469 *
470  mrrow = mod( nprow+myrow-iyrow, nprow )
471  mrcol = mod( npcol+mycol-ixcol, npcol )
472  jycol = iycol
473  IF( iycol.EQ.-1 ) jycol = ixcol
474 *
475  np = numroc( nn, nb, myrow, iyrow, nprow )
476  IF( mrrow.EQ.0 ) np = np - nz
477  nq = numroc( nn, nb, mycol, ixcol, npcol )
478  IF( mrcol.EQ.0 ) nq = nq - nz
479  np0 = numroc( numroc(nn, nb, 0, 0, nprow), nb, 0, 0, lcmp )
480 *
481 * When a row process of IXROW has a row block A,
482 *
483  IF( ixrow .GE. 0 ) THEN
484  tbeta = zero
485  IF( mycol.EQ.jycol ) tbeta = beta
486  kz = nz
487 *
488  DO 40 i = 0, min( lcm, iceil(nn,nb) ) - 1
489  mcrow = mod( mod(i, nprow) + iyrow, nprow )
490  mccol = mod( mod(i, npcol) + ixcol, npcol )
491  IF( lcmp.EQ.1 ) np0 = numroc( nn, nb, i, 0, nprow )
492  jdex = (i/nprow) * nb
493  IF( mrrow.EQ.0 ) jdex = max(0, jdex-nz)
494 *
495 * A source node copies the blocks to WORK, and send it
496 *
497  IF( myrow.EQ.ixrow .AND. mycol.EQ.mccol ) THEN
498 *
499 * The source node is a destination node
500 *
501  idex = (i/npcol) * nb
502  IF( mrcol.EQ.0 ) idex = max( 0, idex-nz )
503  IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
504  CALL pbdtr2b1( icontxt, trans, nq-idex, nb, kz,
505  $ x(idex*incx+1), incx, tbeta,
506  $ y(jdex*incy+1), incy, lcmq, lcmp )
507 *
508 * The source node sends blocks to a destination node
509 *
510  ELSE
511  CALL pbdtr2b1( icontxt, trans, nq-idex, nb, kz,
512  $ x(idex*incx+1), incx, zero, work, 1,
513  $ lcmq, 1 )
514  CALL dgesd2d( icontxt, 1, np0-kz, work, 1,
515  $ mcrow, jycol )
516  END IF
517 *
518 * A destination node receives the copied blocks
519 *
520  ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
521  IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN
522  CALL dgerv2d( icontxt, 1, np0-kz, y, incy,
523  $ ixrow, mccol )
524  ELSE
525  CALL dgerv2d( icontxt, 1, np0-kz, work, 1,
526  $ ixrow, mccol )
527  CALL pbdtr2a1( icontxt, np-jdex, nb, kz, work, 1, tbeta,
528  $ y(jdex*incy+1), incy, lcmp*nb )
529  END IF
530  END IF
531  kz = 0
532  40 CONTINUE
533 *
534 * Broadcast a column vector Y in each row of template
535 *
536  IF( iycol.EQ.-1 ) THEN
537  IF( mycol.EQ.jycol ) THEN
538  CALL dgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
539  ELSE
540  CALL dgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
541  $ myrow, jycol )
542  END IF
543  END IF
544 *
545 * When all row procesors have a copy of the row block A,
546 *
547  ELSE
548  IF( lcmp.EQ.1 ) np0 = np
549 *
550 * Processors, which have diagonal blocks of A, copy them to
551 * WORK array in transposed form
552 *
553  kz = 0
554  IF( mrcol.EQ.0 ) kz = nz
555  jz = 0
556  IF( mrcol.EQ.0 .AND. myrow.EQ.iyrow ) jz = nz
557 *
558  DO 50 i = 0, lcmq-1
559  IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
560  idex = max( 0, i*nb-kz )
561  IF( lcmp.EQ.1 .AND. (iycol.EQ.-1.OR.iycol.EQ.mycol) ) THEN
562  CALL pbdtr2b1( icontxt, trans, nq-idex, nb, jz,
563  $ x(idex*incx+1), incx, beta, y, incy,
564  $ lcmq, 1 )
565  ELSE
566  CALL pbdtr2b1( icontxt, trans, nq-idex, nb, jz,
567  $ x(idex*incx+1), incx, zero, work, 1,
568  $ lcmq, 1 )
569  END IF
570  END IF
571  50 CONTINUE
572 *
573 * Get diagonal blocks of A for each row of the template
574 *
575  mccol = mod( mod(mrrow, npcol) + ixcol, npcol )
576  IF( lcmp.GT.1 ) THEN
577  mcrow = mod( nprow+myrow-iyrow, nprow )
578  CALL pbdtrget( icontxt, 'Col', 1, np0, iceil( nn, nb ),
579  $ work, 1, mcrow, mccol, igd, myrow, mycol,
580  $ nprow, npcol )
581  END IF
582 *
583 * Broadcast a column block of WORK in every column of template
584 *
585  IF( iycol.EQ.-1 ) THEN
586  IF( mycol.EQ.mccol ) THEN
587  IF( lcmp.GT.1 ) THEN
588  kz = 0
589  IF( myrow.EQ.iyrow ) kz = nz
590  CALL pbdtrst1( icontxt, 'Col', np, nb, kz, work, 1,
591  $ beta, y, incy, lcmp, lcmq, np0 )
592  END IF
593  CALL dgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
594  ELSE
595  CALL dgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
596  $ myrow, mccol )
597  END IF
598 *
599 * Send a column block of WORK to the destination column
600 *
601  ELSE
602  IF( lcmp.EQ.1 ) THEN
603  IF( mycol.EQ.mccol ) THEN
604  IF( mycol.NE.iycol )
605  $ CALL dgesd2d( icontxt, 1, np, work, 1, myrow, iycol )
606  ELSE IF( mycol.EQ.iycol ) THEN
607  IF( beta.EQ.zero ) THEN
608  CALL dgerv2d( icontxt, 1, np, y, incy, myrow, mccol )
609  ELSE
610  CALL dgerv2d( icontxt, 1, np, work, 1, myrow, mccol )
611  CALL pbdvecadd( icontxt, 'G', np, one, work, 1, beta,
612  $ y, incy )
613  END IF
614  END IF
615 *
616  ELSE
617  np1 = np0 * min( lcmp, max( 0, iceil(nn,nb)-mcrow ) )
618  IF( mycol.EQ.mccol ) THEN
619  IF( mycol.NE.iycol )
620  $ CALL dgesd2d( icontxt, 1, np1, work, 1, myrow, iycol )
621  ELSE IF( mycol.EQ.iycol ) THEN
622  CALL dgerv2d( icontxt, 1, np1, work, 1, myrow, mccol )
623  END IF
624 *
625  IF( mycol.EQ.iycol ) THEN
626  kz = 0
627  IF( myrow.EQ.iyrow ) kz = nz
628  CALL pbdtrst1( icontxt, 'Col', np, nb, kz, work, 1,
629  $ beta, y, incy, lcmp, lcmq, np0 )
630  END IF
631  END IF
632  END IF
633  END IF
634  END IF
635 *
636  RETURN
637 *
638 * End of PBDTRNV
639 *
640  END
641 *
642 *=======================================================================
643 * SUBROUTINE PBDTR2A1
644 *=======================================================================
645 *
646  SUBROUTINE pbdtr2a1( ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY,
647  $ INTV )
648 *
649 * -- PB-BLAS routine (version 2.1) --
650 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
651 * April 28, 1996
652 *
653 * .. Scalar Arguments ..
654  INTEGER ICONTXT, N, NB, NZ, INCX, INCY, INTV
655  DOUBLE PRECISION BETA
656 * ..
657 * .. Array Arguments ..
658  DOUBLE PRECISION X( * ), Y( * )
659 * ..
660 *
661 * Purpose
662 * =======
663 *
664 * y <== x
665 * y is a scattered vector, copied from a condensed vector x.
666 *
667 * ..
668 * .. Intrinsic Functions ..
669  INTRINSIC min
670 * ..
671 * .. External Functions ..
672  INTEGER ICEIL
673  EXTERNAL ICEIL
674 * ..
675 * .. External Subroutines ..
676  EXTERNAL pbdvecadd
677 * ..
678 * .. Parameters ..
679  DOUBLE PRECISION ONE
680  PARAMETER ( ONE = 1.0d+0 )
681 * ..
682 * .. Local Variables ..
683  INTEGER IX, IY, JZ, K, ITER
684 *
685  IX = 0
686  iy = 0
687  jz = nz
688  iter = iceil( n+nz, intv )
689 *
690  IF( iter.GT.1 ) THEN
691  CALL pbdvecadd( icontxt, 'G', nb-jz, one, x(ix*incx+1), incx,
692  $ beta, y(iy*incy+1), incy )
693  ix = ix + nb - jz
694  iy = iy + intv - jz
695  jz = 0
696 *
697  DO 10 k = 2, iter-1
698  CALL pbdvecadd( icontxt, 'G', nb, one, x(ix*incx+1), incx,
699  $ beta, y(iy*incy+1), incy )
700  ix = ix + nb
701  iy = iy + intv
702  10 CONTINUE
703  END IF
704 *
705  CALL pbdvecadd( icontxt, 'G', min( n-iy, nb-jz ), one,
706  $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
707 *
708  RETURN
709 *
710 * End of PBDTR2A1
711 *
712  END
713 *
714 *=======================================================================
715 * SUBROUTINE PBDTR2B1
716 *=======================================================================
717 *
718  SUBROUTINE pbdtr2b1( ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y,
719  $ INCY, JINX, JINY )
720 *
721 * -- PB-BLAS routine (version 2.1) --
722 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
723 * April 28, 1996
724 *
725 * .. Scalar Arguments ..
726  CHARACTER*1 TRANS
727  INTEGER ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY
728  DOUBLE PRECISION BETA
729 * ..
730 * .. Array Arguments ..
731  DOUBLE PRECISION X( * ), Y( * )
732 * ..
733 *
734 * Purpose
735 * =======
736 *
737 * y <== x + beta * y
738 * y is a condensed vector, copied from a scattered vector x
739 *
740 * ..
741 * .. Intrinsic Functions ..
742  INTRINSIC min
743 * ..
744 * .. External Functions ..
745  INTEGER ICEIL
746  EXTERNAL iceil
747 * ..
748 * .. External Subroutines ..
749  EXTERNAL pbdvecadd
750 * ..
751 * .. Parameters ..
752  DOUBLE PRECISION ONE
753  parameter( one = 1.0d+0 )
754 * ..
755 * .. Local Variables ..
756  INTEGER IX, IY, JZ, K, ITER, LENX, LENY
757 *
758  IF( jinx.EQ.1 .AND. jiny.EQ.1 ) THEN
759  CALL pbdvecadd( icontxt, trans, n, one, x, incx, beta,
760  $ y, incy )
761 *
762  ELSE
763  ix = 0
764  iy = 0
765  jz = nz
766  lenx = nb * jinx
767  leny = nb * jiny
768  iter = iceil( n+nz, lenx )
769 *
770  IF( iter.GT.1 ) THEN
771  CALL pbdvecadd( icontxt, trans, nb-jz, one, x(ix*incx+1),
772  $ incx, beta, y(iy*incy+1), incy )
773  ix = ix + lenx - jz
774  iy = iy + leny - jz
775  jz = 0
776 *
777  DO 10 k = 2, iter-1
778  CALL pbdvecadd( icontxt, trans, nb, one, x(ix*incx+1),
779  $ incx, beta, y(iy*incy+1), incy )
780  ix = ix + lenx
781  iy = iy + leny
782  10 CONTINUE
783  END IF
784 *
785  CALL pbdvecadd( icontxt, trans, min( n-ix, nb-jz ), one,
786  $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
787  END IF
788 *
789  RETURN
790 *
791 * End of PBDTR2B1
792 *
793  END
pbdtrget
subroutine pbdtrget(ICONTXT, ADIST, M, N, MNB, A, LDA, MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW, NPCOL)
Definition: pbdtrget.f:3
pbdvecadd
subroutine pbdvecadd(ICONTXT, MODE, N, ALPHA, X, INCX, BETA, Y, INCY)
Definition: pbdvecadd.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
pbdtrst1
subroutine pbdtrst1(ICONTXT, XDIST, N, NB, NZ, X, INCX, BETA, Y, INCY, LCMP, LCMQ, NINT)
Definition: pbdtrst1.f:3
pbdtr2b1
subroutine pbdtr2b1(ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y, INCY, JINX, JINY)
Definition: pbdtrnv.f:720
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pbdtr2a1
subroutine pbdtr2a1(ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY, INTV)
Definition: pbdtrnv.f:648
pbdtrnv
subroutine pbdtrnv(ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX, BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL, WORK)
Definition: pbdtrnv.f:4
min
#define min(A, B)
Definition: pcgemr.c:181