ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pbztrnv.f
Go to the documentation of this file.
1  SUBROUTINE pbztrnv( ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX,
2  $ BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL,
3  $ WORK )
4 *
5 * -- PB-BLAS routine (version 2.1) --
6 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
7 * April 28, 1996
8 *
9 * Jaeyoung Choi, Oak Ridge National Laboratory
10 * Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
11 * David Walker, Oak Ridge National Laboratory
12 *
13 * .. Scalar Arguments ..
14  CHARACTER*1 TRANS, XDIST
15  INTEGER ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL,
16  $ iyrow, n, nb, nz
17  COMPLEX*16 BETA
18 * ..
19 * .. Array Arguments ..
20  COMPLEX*16 WORK( * ), X( * ), Y( * )
21 * ..
22 *
23 * Purpose
24 * =======
25 *
26 * PBZTRNV transposes a column vector to row vector, or a row vector to
27 * column vector by reallocating data distribution.
28 *
29 * Y := X'
30 *
31 * where X and Y are N vectors.
32 *
33 * Parameters
34 * ==========
35 *
36 * ICONTXT (input) INTEGER
37 * ICONTXT is the BLACS mechanism for partitioning communication
38 * space. A defining property of a context is that a message in
39 * a context cannot be sent or received in another context. The
40 * BLACS context includes the definition of a grid, and each
41 * process' coordinates in it.
42 *
43 * XDIST (input) CHARACTER*1
44 * XDIST specifies whether X is a column vector or a row vector,
45 *
46 * XDIST = 'C', X is a column vector (distributed columnwise)
47 * XDIST = 'R', X is a row vector (distributed rowwise)
48 *
49 * TRANS (input) CHARACTER*1
50 * TRANS specifies whether the transposed format is transpose
51 * or conjugate transpose. If the vectors X and Y are real,
52 * the argument is ignored.
53 *
54 * TRANS = 'T', transpose
55 * TRANS = 'C', conjugate transpose
56 *
57 * N (input) INTEGER
58 * N specifies the (global) number of the vector X and the
59 * vector Y. N >= 0.
60 *
61 * NB (input) INTEGER
62 * NB specifies the block size of vectors X and Y. NB >= 0.
63 *
64 * NZ (input) INTEGER
65 * NZ is the column offset to specify the column distance from
66 * the beginning of the block to the first element of the
67 * vector X, and the row offset to the first element of the
68 * vector Y if XDIST = 'C'.
69 * Otherwise, it is row offset to specify the row distance
70 * from the beginning of the block to the first element of the
71 * vector X, and the column offset to the first element of the
72 * vector Y. 0 < NZ <= NB.
73 *
74 * X (input) COMPLEX*16 array of dimension at least
75 * ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or
76 * ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'.
77 * The incremented array X must contain the vector X.
78 *
79 * INCX (input) INTEGER
80 * INCX specifies the increment for the elements of X.
81 * INCX <> 0.
82 *
83 * BETA (input) COMPLEX*16
84 * BETA specifies scaler beta.
85 *
86 * Y (input/output) COMPLEX*16 array of dimension at least
87 * ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or
88 * ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or
89 * The incremented array Y must contain the vector Y.
90 * Y will not be referenced if beta is zero.
91 *
92 * INCY (input) INTEGER
93 * INCY specifies the increment for the elements of Y.
94 * INCY <> 0.
95 *
96 * IXROW (input) INTEGER
97 * IXROW specifies a row of the process template, which holds
98 * the first element of the vector X. If X is a row vector and
99 * all rows of processes have a copy of X, then set IXROW = -1.
100 *
101 * IXCOL (input) INTEGER
102 * IXCOL specifies a column of the process template,
103 * which holds the first element of the vector X. If X is a
104 * column block and all columns of processes have a copy of X,
105 * then set IXCOL = -1.
106 *
107 * IYROW (input) INTEGER
108 * IYROW specifies the current row process which holds the
109 * first element of the vector Y, which is transposed of X.
110 * If X is a column vector and the transposed row vector Y is
111 * distributed all rows of processes, set IYROW = -1.
112 *
113 * IYCOL (input) INTEGER
114 * IYCOL specifies the current column process which holds
115 * the first element of the vector Y, which is transposed of Y.
116 * If X is a row block and the transposed column vector Y is
117 * distributed all columns of processes, set IYCOL = -1.
118 *
119 * WORK (workspace) COMPLEX*16 array of dimension Size(WORK).
120 * It needs extra working space of x**T or x**H.
121 *
122 * Parameters Details
123 * ==================
124 *
125 * Nx It is a local portion of N owned by a process, where x is
126 * replaced by either p (=NPROW) or q (=NPCOL)). The value is
127 * determined by N, NB, NZ, x, and MI, where NB is a block size,
128 * NZ is a offset from the beginning of the block, and MI is a
129 * row or column position in a process template. Nx is equal
130 * to or less than Nx0 = CEIL( N+NZ, NB*x ) * NB.
131 *
132 * Communication Scheme
133 * ====================
134 *
135 * The communication scheme of the routine is set to '1-tree', which is
136 * fan-out. (For details, see BLACS user's guide.)
137 *
138 * Memory Requirement of WORK
139 * ==========================
140 *
141 * NN = N + NZ
142 * Npb = CEIL( NN, NB*NPROW )
143 * Nqb = CEIL( NN, NB*NPCOL )
144 * LCMP = LCM / NPROW
145 * LCMQ = LCM / NPCOL
146 *
147 * (1) XDIST = 'C'
148 * (a) IXCOL != -1
149 * Size(WORK) = CEIL(Nqb,LCMQ)*NB
150 * (b) IXCOL = -1
151 * Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB))
152 *
153 * (2) XDIST = 'R'
154 * (a) IXROW != -1
155 * Size(WORK) = CEIL(Npb,LCMP)*NB
156 * (b) IXROW = -1
157 * Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB))
158 *
159 * Notes
160 * -----
161 * More precise space can be computed as
162 *
163 * CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP)
164 * CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ)
165 *
166 * =====================================================================
167 *
168 * .. Parameters ..
169  COMPLEX*16 ONE, ZERO
170  PARAMETER ( ONE = ( 1.0d+0, 0.0d+0 ),
171  $ zero = ( 0.0d+0, 0.0d+0 ) )
172 * ..
173 * .. Local Scalars ..
174  LOGICAL COLFORM, ROWFORM
175  INTEGER I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ,
176  $ lcm, lcmp, lcmq, mccol, mcrow, mrcol, mrrow,
177  $ mycol, myrow, nn, np, np0, np1, npcol, nprow,
178  $ nq, nq0, nq1
179  COMPLEX*16 TBETA
180 * ..
181 * .. External Functions ..
182  LOGICAL LSAME
183  INTEGER ILCM, ICEIL, NUMROC
184  EXTERNAL lsame, ilcm, iceil, numroc
185 * ..
186 * .. External Subroutines ..
187  EXTERNAL blacs_gridinfo, pbztr2a1, pbztr2b1, pbztrget,
188  $ pbztrst1, pbzvecadd, pxerbla, zgebr2d, zgebs2d,
189  $ zgerv2d, zgesd2d
190 * ..
191 * .. Intrinsic Functions ..
192  INTRINSIC max, min, mod
193 * ..
194 * .. Executable Statements ..
195 *
196 * Quick return if possible.
197 *
198  IF( n.EQ.0 ) RETURN
199 *
200  CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
201 *
202  colform = lsame( xdist, 'C' )
203  rowform = lsame( xdist, 'R' )
204 *
205 * Test the input parameters.
206 *
207  info = 0
208  IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
209  info = 2
210  ELSE IF( n .LT.0 ) THEN
211  info = 4
212  ELSE IF( nb .LT.1 ) THEN
213  info = 5
214  ELSE IF( nz .LT.0 .OR. nz.GE.nb ) THEN
215  info = 6
216  ELSE IF( incx.EQ.0 ) THEN
217  info = 8
218  ELSE IF( incy.EQ.0 ) THEN
219  info = 11
220  ELSE IF( ixrow.LT.-1 .OR. ixrow.GE.nprow .OR.
221  $ ( ixrow.EQ.-1 .AND. colform ) ) THEN
222  info = 12
223  ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol .OR.
224  $ ( ixcol.EQ.-1 .AND. rowform ) ) THEN
225  info = 13
226  ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow .OR.
227  $ ( iyrow.EQ.-1 .AND. rowform ) ) THEN
228  info = 14
229  ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol .OR.
230  $ ( iycol.EQ.-1 .AND. colform ) ) THEN
231  info = 15
232  END IF
233 *
234  10 CONTINUE
235  IF( info.NE.0 ) THEN
236  CALL pxerbla( icontxt, 'PBZTRNV ', info )
237  RETURN
238  END IF
239 *
240 * Start the operations.
241 *
242 * LCM : the least common multiple of NPROW and NPCOL
243 *
244  lcm = ilcm( nprow, npcol )
245  lcmp = lcm / nprow
246  lcmq = lcm / npcol
247  igd = npcol / lcmp
248  nn = n + nz
249 *
250 * When x is a column vector
251 *
252  IF( colform ) THEN
253 *
254 * Form y <== x' ( x is a column vector )
255 *
256 * ||
257 * ||
258 * _____________ ||
259 * -----(y)----- <== (x)
260 * ||
261 * ||
262 * ||
263 *
264  IF( ixrow.LT.0 .OR. ixrow.GE.nprow ) THEN
265  info = 12
266  ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol ) THEN
267  info = 13
268  ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow ) THEN
269  info = 14
270  ELSE IF( iycol.LT.0 .OR. iycol.GE.npcol ) THEN
271  info = 15
272  END IF
273  IF( info.NE.0 ) GO TO 10
274 *
275 * MRROW : row relative position in template from IXROW
276 * MRCOL : column relative position in template from IYCOL
277 *
278  mrrow = mod( nprow+myrow-ixrow, nprow )
279  mrcol = mod( npcol+mycol-iycol, npcol )
280  jyrow = iyrow
281  IF( iyrow.EQ.-1 ) jyrow = ixrow
282 *
283  np = numroc( nn, nb, myrow, ixrow, nprow )
284  IF( mrrow.EQ.0 ) np = np - nz
285  nq = numroc( nn, nb, mycol, iycol, npcol )
286  IF( mrcol.EQ.0 ) nq = nq - nz
287  nq0 = numroc( numroc(nn, nb, 0, 0, npcol), nb, 0, 0, lcmq )
288 *
289 * When a column process of IXCOL has a column block A,
290 *
291  IF( ixcol .GE. 0 ) THEN
292  tbeta = zero
293  IF( myrow.EQ.jyrow ) tbeta = beta
294  kz = nz
295 *
296  DO 20 i = 0, min( lcm, iceil(nn,nb) ) - 1
297  mcrow = mod( mod(i, nprow) + ixrow, nprow )
298  mccol = mod( mod(i, npcol) + iycol, npcol )
299  IF( lcmq.EQ.1 ) nq0 = numroc( nn, nb, i, 0, npcol )
300  jdex = (i/npcol) * nb
301  IF( mrcol.EQ.0 ) jdex = max(0, jdex-nz)
302 *
303 * A source node copies the blocks to WORK, and send it
304 *
305  IF( myrow.EQ.mcrow .AND. mycol.EQ.ixcol ) THEN
306 *
307 * The source node is a destination node
308 *
309  idex = (i/nprow) * nb
310  IF( mrrow.EQ.0 ) idex = max( 0, idex-nz )
311  IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
312  CALL pbztr2b1( icontxt, trans, np-idex, nb, kz,
313  $ x(idex*incx+1), incx, tbeta,
314  $ y(jdex*incy+1), incy, lcmp, lcmq )
315 *
316 * The source node sends blocks to a destination node
317 *
318  ELSE
319  CALL pbztr2b1( icontxt, trans, np-idex, nb, kz,
320  $ x(idex*incx+1), incx, zero, work, 1,
321  $ lcmp, 1 )
322  CALL zgesd2d( icontxt, 1, nq0-kz, work, 1,
323  $ jyrow, mccol )
324  END IF
325 *
326 * A destination node receives the copied vector
327 *
328  ELSE IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
329  IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN
330  CALL zgerv2d( icontxt, 1, nq0-kz, y, incy,
331  $ mcrow, ixcol )
332  ELSE
333  CALL zgerv2d( icontxt, 1, nq0-kz, work, 1,
334  $ mcrow, ixcol )
335  CALL pbztr2a1( icontxt, nq-jdex, nb, kz, work, 1, tbeta,
336  $ y(jdex*incy+1), incy, lcmq*nb )
337  END IF
338  END IF
339  kz = 0
340  20 CONTINUE
341 *
342 * Broadcast a row block of WORK in each column of template
343 *
344  IF( iyrow.EQ.-1 ) THEN
345  IF( myrow.EQ.jyrow ) THEN
346  CALL zgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
347  ELSE
348  CALL zgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
349  $ jyrow, mycol )
350  END IF
351  END IF
352 *
353 * When all column procesors have a copy of the column block A,
354 *
355  ELSE
356  IF( lcmq.EQ.1 ) nq0 = nq
357 *
358 * Processors, which have diagonal blocks of X, copy them to
359 * WORK array in transposed form
360 *
361  kz = 0
362  IF( mrrow.EQ.0 ) kz = nz
363  jz = 0
364  IF( mrrow.EQ.0 .AND. mycol.EQ.iycol ) jz = nz
365 *
366  DO 30 i = 0, lcmp - 1
367  IF( mrcol.EQ.mod(nprow*i+mrrow, npcol) ) THEN
368  idex = max( 0, i*nb-kz )
369  IF( lcmq.EQ.1 .AND. (iyrow.EQ.-1.OR.iyrow.EQ.myrow) ) THEN
370  CALL pbztr2b1( icontxt, trans, np-idex, nb, jz,
371  $ x(idex*incx+1), incx, beta, y, incy,
372  $ lcmp, 1 )
373  ELSE
374  CALL pbztr2b1( icontxt, trans, np-idex, nb, jz,
375  $ x(idex*incx+1), incx, zero, work, 1,
376  $ lcmp, 1 )
377  END IF
378  END IF
379  30 CONTINUE
380 *
381 * Get diagonal blocks of A for each column of the template
382 *
383  mcrow = mod( mod(mrcol, nprow) + ixrow, nprow )
384  IF( lcmq.GT.1 ) THEN
385  mccol = mod( npcol+mycol-iycol, npcol )
386  CALL pbztrget( icontxt, 'Row', 1, nq0, iceil( nn, nb ),
387  $ work, 1, mcrow, mccol, igd, myrow, mycol,
388  $ nprow, npcol )
389  END IF
390 *
391 * Broadcast a row block of WORK in every row of template
392 *
393  IF( iyrow.EQ.-1 ) THEN
394  IF( myrow.EQ.mcrow ) THEN
395  IF( lcmq.GT.1 ) THEN
396  kz = 0
397  IF( mycol.EQ.iycol ) kz = nz
398  CALL pbztrst1( icontxt, 'Row', nq, nb, kz, work, 1,
399  $ beta, y, incy, lcmp, lcmq, nq0 )
400  END IF
401  CALL zgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
402  ELSE
403  CALL zgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
404  $ mcrow, mycol )
405  END IF
406 *
407 * Send a row block of WORK to the destination row
408 *
409  ELSE
410  IF( lcmq.EQ.1 ) THEN
411  IF( myrow.EQ.mcrow ) THEN
412  IF( myrow.NE.iyrow )
413  $ CALL zgesd2d( icontxt, 1, nq0, work, 1, iyrow, mycol )
414  ELSE IF( myrow.EQ.iyrow ) THEN
415  IF( beta.EQ.zero ) THEN
416  CALL zgerv2d( icontxt, 1, nq0, y, incy, mcrow, mycol )
417  ELSE
418  CALL zgerv2d( icontxt, 1, nq0, work, 1, mcrow, mycol )
419  CALL pbzvecadd( icontxt, 'G', nq0, one, work, 1,
420  $ beta, y, incy )
421  END IF
422  END IF
423 *
424  ELSE
425  nq1 = nq0 * min( lcmq, max( 0, iceil(nn,nb)-mccol ) )
426  IF( myrow.EQ.mcrow ) THEN
427  IF( myrow.NE.iyrow )
428  $ CALL zgesd2d( icontxt, 1, nq1, work, 1, iyrow, mycol )
429  ELSE IF( myrow.EQ.iyrow ) THEN
430  CALL zgerv2d( icontxt, 1, nq1, work, 1, mcrow, mycol )
431  END IF
432 *
433  IF( myrow.EQ.iyrow ) THEN
434  kz = 0
435  IF( mycol.EQ.iycol ) kz = nz
436  CALL pbztrst1( icontxt, 'Row', nq, nb, kz, work, 1,
437  $ beta, y, incy, lcmp, lcmq, nq0 )
438  END IF
439  END IF
440  END IF
441  END IF
442 *
443 * When x is a row vector
444 *
445  ELSE
446 *
447 * Form y <== x' ( x is a row block )
448 *
449 * ||
450 * ||
451 * || _____________
452 * (y) <== -----(x)-----
453 * ||
454 * ||
455 * ||
456 *
457  IF( ixrow.LT.-1 .OR. ixrow.GE.nprow ) THEN
458  info = 12
459  ELSE IF( ixcol.LT.0 .OR. ixcol.GE.npcol ) THEN
460  info = 13
461  ELSE IF( iyrow.LT.0 .OR. iyrow.GE.nprow ) THEN
462  info = 14
463  ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol ) THEN
464  info = 15
465  END IF
466  IF( info.NE.0 ) GO TO 10
467 *
468 * MRROW : row relative position in template from IYROW
469 * MRCOL : column relative position in template from IXCOL
470 *
471  mrrow = mod( nprow+myrow-iyrow, nprow )
472  mrcol = mod( npcol+mycol-ixcol, npcol )
473  jycol = iycol
474  IF( iycol.EQ.-1 ) jycol = ixcol
475 *
476  np = numroc( nn, nb, myrow, iyrow, nprow )
477  IF( mrrow.EQ.0 ) np = np - nz
478  nq = numroc( nn, nb, mycol, ixcol, npcol )
479  IF( mrcol.EQ.0 ) nq = nq - nz
480  np0 = numroc( numroc(nn, nb, 0, 0, nprow), nb, 0, 0, lcmp )
481 *
482 * When a row process of IXROW has a row block A,
483 *
484  IF( ixrow .GE. 0 ) THEN
485  tbeta = zero
486  IF( mycol.EQ.jycol ) tbeta = beta
487  kz = nz
488 *
489  DO 40 i = 0, min( lcm, iceil(nn,nb) ) - 1
490  mcrow = mod( mod(i, nprow) + iyrow, nprow )
491  mccol = mod( mod(i, npcol) + ixcol, npcol )
492  IF( lcmp.EQ.1 ) np0 = numroc( nn, nb, i, 0, nprow )
493  jdex = (i/nprow) * nb
494  IF( mrrow.EQ.0 ) jdex = max(0, jdex-nz)
495 *
496 * A source node copies the blocks to WORK, and send it
497 *
498  IF( myrow.EQ.ixrow .AND. mycol.EQ.mccol ) THEN
499 *
500 * The source node is a destination node
501 *
502  idex = (i/npcol) * nb
503  IF( mrcol.EQ.0 ) idex = max( 0, idex-nz )
504  IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
505  CALL pbztr2b1( icontxt, trans, nq-idex, nb, kz,
506  $ x(idex*incx+1), incx, tbeta,
507  $ y(jdex*incy+1), incy, lcmq, lcmp )
508 *
509 * The source node sends blocks to a destination node
510 *
511  ELSE
512  CALL pbztr2b1( icontxt, trans, nq-idex, nb, kz,
513  $ x(idex*incx+1), incx, zero, work, 1,
514  $ lcmq, 1 )
515  CALL zgesd2d( icontxt, 1, np0-kz, work, 1,
516  $ mcrow, jycol )
517  END IF
518 *
519 * A destination node receives the copied blocks
520 *
521  ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
522  IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN
523  CALL zgerv2d( icontxt, 1, np0-kz, y, incy,
524  $ ixrow, mccol )
525  ELSE
526  CALL zgerv2d( icontxt, 1, np0-kz, work, 1,
527  $ ixrow, mccol )
528  CALL pbztr2a1( icontxt, np-jdex, nb, kz, work, 1, tbeta,
529  $ y(jdex*incy+1), incy, lcmp*nb )
530  END IF
531  END IF
532  kz = 0
533  40 CONTINUE
534 *
535 * Broadcast a column vector Y in each row of template
536 *
537  IF( iycol.EQ.-1 ) THEN
538  IF( mycol.EQ.jycol ) THEN
539  CALL zgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
540  ELSE
541  CALL zgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
542  $ myrow, jycol )
543  END IF
544  END IF
545 *
546 * When all row procesors have a copy of the row block A,
547 *
548  ELSE
549  IF( lcmp.EQ.1 ) np0 = np
550 *
551 * Processors, which have diagonal blocks of A, copy them to
552 * WORK array in transposed form
553 *
554  kz = 0
555  IF( mrcol.EQ.0 ) kz = nz
556  jz = 0
557  IF( mrcol.EQ.0 .AND. myrow.EQ.iyrow ) jz = nz
558 *
559  DO 50 i = 0, lcmq-1
560  IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
561  idex = max( 0, i*nb-kz )
562  IF( lcmp.EQ.1 .AND. (iycol.EQ.-1.OR.iycol.EQ.mycol) ) THEN
563  CALL pbztr2b1( icontxt, trans, nq-idex, nb, jz,
564  $ x(idex*incx+1), incx, beta, y, incy,
565  $ lcmq, 1 )
566  ELSE
567  CALL pbztr2b1( icontxt, trans, nq-idex, nb, jz,
568  $ x(idex*incx+1), incx, zero, work, 1,
569  $ lcmq, 1 )
570  END IF
571  END IF
572  50 CONTINUE
573 *
574 * Get diagonal blocks of A for each row of the template
575 *
576  mccol = mod( mod(mrrow, npcol) + ixcol, npcol )
577  IF( lcmp.GT.1 ) THEN
578  mcrow = mod( nprow+myrow-iyrow, nprow )
579  CALL pbztrget( icontxt, 'Col', 1, np0, iceil( nn, nb ),
580  $ work, 1, mcrow, mccol, igd, myrow, mycol,
581  $ nprow, npcol )
582  END IF
583 *
584 * Broadcast a column block of WORK in every column of template
585 *
586  IF( iycol.EQ.-1 ) THEN
587  IF( mycol.EQ.mccol ) THEN
588  IF( lcmp.GT.1 ) THEN
589  kz = 0
590  IF( myrow.EQ.iyrow ) kz = nz
591  CALL pbztrst1( icontxt, 'Col', np, nb, kz, work, 1,
592  $ beta, y, incy, lcmp, lcmq, np0 )
593  END IF
594  CALL zgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
595  ELSE
596  CALL zgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
597  $ myrow, mccol )
598  END IF
599 *
600 * Send a column block of WORK to the destination column
601 *
602  ELSE
603  IF( lcmp.EQ.1 ) THEN
604  IF( mycol.EQ.mccol ) THEN
605  IF( mycol.NE.iycol )
606  $ CALL zgesd2d( icontxt, 1, np, work, 1, myrow, iycol )
607  ELSE IF( mycol.EQ.iycol ) THEN
608  IF( beta.EQ.zero ) THEN
609  CALL zgerv2d( icontxt, 1, np, y, incy, myrow, mccol )
610  ELSE
611  CALL zgerv2d( icontxt, 1, np, work, 1, myrow, mccol )
612  CALL pbzvecadd( icontxt, 'G', np, one, work, 1, beta,
613  $ y, incy )
614  END IF
615  END IF
616 *
617  ELSE
618  np1 = np0 * min( lcmp, max( 0, iceil(nn,nb)-mcrow ) )
619  IF( mycol.EQ.mccol ) THEN
620  IF( mycol.NE.iycol )
621  $ CALL zgesd2d( icontxt, 1, np1, work, 1, myrow, iycol )
622  ELSE IF( mycol.EQ.iycol ) THEN
623  CALL zgerv2d( icontxt, 1, np1, work, 1, myrow, mccol )
624  END IF
625 *
626  IF( mycol.EQ.iycol ) THEN
627  kz = 0
628  IF( myrow.EQ.iyrow ) kz = nz
629  CALL pbztrst1( icontxt, 'Col', np, nb, kz, work, 1,
630  $ beta, y, incy, lcmp, lcmq, np0 )
631  END IF
632  END IF
633  END IF
634  END IF
635  END IF
636 *
637  RETURN
638 *
639 * End of PBZTRNV
640 *
641  END
642 *
643 *=======================================================================
644 * SUBROUTINE PBZTR2A1
645 *=======================================================================
646 *
647  SUBROUTINE pbztr2a1( ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY,
648  $ INTV )
649 *
650 * -- PB-BLAS routine (version 2.1) --
651 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
652 * April 28, 1996
653 *
654 * .. Scalar Arguments ..
655  INTEGER ICONTXT, N, NB, NZ, INCX, INCY, INTV
656  COMPLEX*16 BETA
657 * ..
658 * .. Array Arguments ..
659  COMPLEX*16 X( * ), Y( * )
660 * ..
661 *
662 * Purpose
663 * =======
664 *
665 * y <== x
666 * y is a scattered vector, copied from a condensed vector x.
667 *
668 * ..
669 * .. Intrinsic Functions ..
670  INTRINSIC min
671 * ..
672 * .. External Functions ..
673  INTEGER ICEIL
674  EXTERNAL ICEIL
675 * ..
676 * .. External Subroutines ..
677  EXTERNAL pbzvecadd
678 * ..
679 * .. Parameters ..
680  COMPLEX*16 ONE
681  PARAMETER ( ONE = ( 1.0d+0, 0.0d+0 ) )
682 * ..
683 * .. Local Variables ..
684  INTEGER IX, IY, JZ, K, ITER
685 *
686  IX = 0
687  iy = 0
688  jz = nz
689  iter = iceil( n+nz, intv )
690 *
691  IF( iter.GT.1 ) THEN
692  CALL pbzvecadd( icontxt, 'G', nb-jz, one, x(ix*incx+1), incx,
693  $ beta, y(iy*incy+1), incy )
694  ix = ix + nb - jz
695  iy = iy + intv - jz
696  jz = 0
697 *
698  DO 10 k = 2, iter-1
699  CALL pbzvecadd( icontxt, 'G', nb, one, x(ix*incx+1), incx,
700  $ beta, y(iy*incy+1), incy )
701  ix = ix + nb
702  iy = iy + intv
703  10 CONTINUE
704  END IF
705 *
706  CALL pbzvecadd( icontxt, 'G', min( n-iy, nb-jz ), one,
707  $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
708 *
709  RETURN
710 *
711 * End of PBZTR2A1
712 *
713  END
714 *
715 *=======================================================================
716 * SUBROUTINE PBZTR2B1
717 *=======================================================================
718 *
719  SUBROUTINE pbztr2b1( ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y,
720  $ INCY, JINX, JINY )
721 *
722 * -- PB-BLAS routine (version 2.1) --
723 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
724 * April 28, 1996
725 *
726 * .. Scalar Arguments ..
727  CHARACTER*1 TRANS
728  INTEGER ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY
729  COMPLEX*16 BETA
730 * ..
731 * .. Array Arguments ..
732  COMPLEX*16 X( * ), Y( * )
733 * ..
734 *
735 * Purpose
736 * =======
737 *
738 * y <== x + beta * y
739 * y is a condensed vector, copied from a scattered vector x
740 *
741 * ..
742 * .. Intrinsic Functions ..
743  INTRINSIC min
744 * ..
745 * .. External Functions ..
746  INTEGER ICEIL
747  EXTERNAL iceil
748 * ..
749 * .. External Subroutines ..
750  EXTERNAL pbzvecadd
751 * ..
752 * .. Parameters ..
753  COMPLEX*16 ONE
754  parameter( one = ( 1.0d+0, 0.0d+0 ) )
755 * ..
756 * .. Local Variables ..
757  INTEGER IX, IY, JZ, K, ITER, LENX, LENY
758 *
759  IF( jinx.EQ.1 .AND. jiny.EQ.1 ) THEN
760  CALL pbzvecadd( icontxt, trans, n, one, x, incx, beta,
761  $ y, incy )
762 *
763  ELSE
764  ix = 0
765  iy = 0
766  jz = nz
767  lenx = nb * jinx
768  leny = nb * jiny
769  iter = iceil( n+nz, lenx )
770 *
771  IF( iter.GT.1 ) THEN
772  CALL pbzvecadd( icontxt, trans, nb-jz, one, x(ix*incx+1),
773  $ incx, beta, y(iy*incy+1), incy )
774  ix = ix + lenx - jz
775  iy = iy + leny - jz
776  jz = 0
777 *
778  DO 10 k = 2, iter-1
779  CALL pbzvecadd( icontxt, trans, nb, one, x(ix*incx+1),
780  $ incx, beta, y(iy*incy+1), incy )
781  ix = ix + lenx
782  iy = iy + leny
783  10 CONTINUE
784  END IF
785 *
786  CALL pbzvecadd( icontxt, trans, min( n-ix, nb-jz ), one,
787  $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
788  END IF
789 *
790  RETURN
791 *
792 * End of PBZTR2B1
793 *
794  END
pbztrst1
subroutine pbztrst1(ICONTXT, XDIST, N, NB, NZ, X, INCX, BETA, Y, INCY, LCMP, LCMQ, NINT)
Definition: pbztrst1.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
pbztrnv
subroutine pbztrnv(ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX, BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL, WORK)
Definition: pbztrnv.f:4
pbztr2b1
subroutine pbztr2b1(ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y, INCY, JINX, JINY)
Definition: pbztrnv.f:721
pbzvecadd
subroutine pbzvecadd(ICONTXT, MODE, N, ALPHA, X, INCX, BETA, Y, INCY)
Definition: pbzvecadd.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pbztrget
subroutine pbztrget(ICONTXT, ADIST, M, N, MNB, A, LDA, MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW, NPCOL)
Definition: pbztrget.f:3
pbztr2a1
subroutine pbztr2a1(ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY, INTV)
Definition: pbztrnv.f:649
min
#define min(A, B)
Definition: pcgemr.c:181