ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pbdtran.f
Go to the documentation of this file.
1  SUBROUTINE pbdtran( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA,
2  $ C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK )
3 *
4 * -- PB-BLAS routine (version 2.1) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
6 * April 28, 1996
7 *
8 * Jaeyoung Choi, Oak Ridge National Laboratory
9 * Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
10 * David Walker, Oak Ridge National Laboratory
11 *
12 * .. Scalar Arguments ..
13  CHARACTER*1 ADIST, TRANS
14  INTEGER IACOL, IAROW, ICCOL, ICONTXT, ICROW, LDA, LDC,
15  $ m, n, nb
16  DOUBLE PRECISION BETA
17 * ..
18 * .. Array Arguments ..
19  DOUBLE PRECISION A( LDA, * ), C( LDC, * ), WORK( * )
20 * ..
21 *
22 * Purpose
23 * =======
24 *
25 * PBDTRAN transposes a column block to row block, or a row block to
26 * column block by reallocating data distribution.
27 *
28 * C := A^T + beta*C, or C := A^C + beta*C
29 *
30 * where A is an M-by-N matrix and C is an N-by-M matrix, and the size
31 * of M or N is limited to its block size NB.
32 *
33 * The first elements of the matrices A, and C should be located at
34 * the beginnings of their first blocks. (not the middle of the blocks.)
35 *
36 * Parameters
37 * ==========
38 *
39 * ICONTXT (input) INTEGER
40 * ICONTXT is the BLACS mechanism for partitioning communication
41 * space. A defining property of a context is that a message in
42 * a context cannot be sent or received in another context. The
43 * BLACS context includes the definition of a grid, and each
44 * process' coordinates in it.
45 *
46 * ADIST - (input) CHARACTER*1
47 * ADIST specifies whether A is a column block or a row block.
48 *
49 * ADIST = 'C', A is a column block
50 * ADIST = 'R', A is a row block
51 *
52 * TRANS - (input) CHARACTER*1
53 * TRANS specifies whether the transposed format is transpose
54 * or conjugate transpose. If the matrices A and C are real,
55 * the argument is ignored.
56 *
57 * TRANS = 'T', transpose
58 * TRANS = 'C', conjugate transpose
59 *
60 * M - (input) INTEGER
61 * M specifies the (global) number of rows of the matrix (block
62 * column or block row) A and of columns of the matrix C.
63 * M >= 0.
64 *
65 * N - (input) INTEGER
66 * N specifies the (global) number of columns of the matrix
67 * (block column or block row) A and of columns of the matrix
68 * C. N >= 0.
69 *
70 * NB - (input) INTEGER
71 * NB specifies the column block size of the matrix A and the
72 * row block size of the matrix C when ADIST = 'C'. Otherwise,
73 * it specifies the row block size of the matrix A and the
74 * column block size of the matrix C. NB >= 1.
75 *
76 * A (input) DOUBLE PRECISION array of DIMENSION ( LDA, Lx ),
77 * where Lx is N when ADIST = 'C', or Nq when ADIST = 'R'.
78 * Before entry with ADIST = 'C', the leading Mp by N part of
79 * the array A must contain the matrix A, otherwise the leading
80 * M by Nq part of the array A must contain the matrix A. See
81 * parameter details for the values of Mp and Nq.
82 *
83 * LDA (input) INTEGER
84 * LDA specifies the leading dimension of (local) A as declared
85 * in the calling (sub) program. LDA >= MAX(1,Mp) when
86 * ADIST = 'C', or LDA >= MAX(1,M) otherwise.
87 *
88 * BETA (input) DOUBLE PRECISION
89 * BETA specifies scaler beta.
90 *
91 * C (input/output) DOUBLE PRECISION array of DIMENSION
92 * ( LDC, Lx ),
93 * where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'.
94 * If ADIST = 'C', the leading N-by-Mq part of the array C
95 * contains the (local) matrix C, otherwise the leading
96 * Np-by-M part of the array C must contain the (local) matrix
97 * C. C will not be referenced if beta is zero.
98 *
99 * LDC (input) INTEGER
100 * LDC specifies the leading dimension of (local) C as declared
101 * in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C',
102 * or LDC >= MAX(1,Np) otherwise.
103 *
104 * IAROW (input) INTEGER
105 * IAROW specifies a row of the process template,
106 * which holds the first block of the matrix A. If A is a row
107 * of blocks (ADIST = 'R') and all rows of processes have a copy
108 * of A, then set IAROW = -1.
109 *
110 * IACOL (input) INTEGER
111 * IACOL specifies a column of the process template,
112 * which holds the first block of the matrix A. If A is a
113 * column of blocks (ADIST = 'C') and all columns of processes
114 * have a copy of A, then set IACOL = -1.
115 *
116 * ICROW (input) INTEGER
117 * ICROW specifies the current row process which holds
118 * the first block of the matrix C, which is transposed of A.
119 * If C is a row of blocks (ADIST = 'C') and the transposed
120 * row block C is distributed all rows of processes, set
121 * ICROW = -1.
122 *
123 * ICCOL (input) INTEGER
124 * ICCOL specifies the current column process which holds
125 * the first block of the matrix C, which is transposed of A.
126 * If C is a column of blocks (ADIST = 'R') and the transposed
127 * column block C is distributed all columns of processes,
128 * set ICCOL = -1.
129 *
130 * WORK (workspace) DOUBLE PRECISION array of dimension Size(WORK).
131 * It needs extra working space of A'.
132 *
133 * Parameters Details
134 * ==================
135 *
136 * Lx It is a local portion of L owned by a process, (L is
137 * replaced by M, or N, and x is replaced by either p (=NPROW)
138 * or q (=NPCOL)). The value is determined by L, LB, x, and
139 * MI, where LB is a block size and MI is a row or column
140 * position in a process template. Lx is equal to or less
141 * than Lx0 = CEIL( L, LB*x ) * LB.
142 *
143 * Communication Scheme
144 * ====================
145 *
146 * The communication scheme of the routine is set to '1-tree', which is
147 * fan-out. (For details, see BLACS user's guide.)
148 *
149 * Memory Requirement of WORK
150 * ==========================
151 *
152 * Mqb = CEIL( M, NB*NPCOL )
153 * Npb = CEIL( N, NB*NPROW )
154 * LCMQ = LCM / NPCOL
155 * LCMP = LCM / NPROW
156 *
157 * (1) ADIST = 'C'
158 * (a) IACOL != -1
159 * Size(WORK) = N * CEIL(Mqb,LCMQ)*NB
160 * (b) IACOL = -1
161 * Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB))
162 *
163 * (2) ADIST = 'R'
164 * (a) IAROW != -1
165 * Size(WORK) = M * CEIL(Npb,LCMP)*NB
166 * (b) IAROW = -1
167 * Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB))
168 *
169 * Notes
170 * -----
171 * More precise space can be computed as
172 *
173 * CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ )
174 * CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP )
175 *
176 * =====================================================================
177 *
178 * ..
179 * .. Parameters ..
180  DOUBLE PRECISION ONE, ZERO
181  parameter( one = 1.0d+0, zero = 0.0d+0 )
182 * ..
183 * .. Local Scalars ..
184  LOGICAL COLFORM, ROWFORM
185  INTEGER I, IDEX, IGD, INFO, JCCOL, JCROW, JDEX, LCM,
186  $ lcmp, lcmq, mccol, mcrow, ml, mp, mq, mq0,
187  $ mrcol, mrrow, mycol, myrow, np, np0, npcol,
188  $ nprow, nq
189  DOUBLE PRECISION TBETA
190 * ..
191 * .. External Functions ..
192  LOGICAL LSAME
193  INTEGER ILCM, ICEIL, NUMROC
194  EXTERNAL ilcm, iceil, lsame, numroc
195 * ..
196 * .. External Subroutines ..
197  EXTERNAL blacs_gridinfo, dgebr2d, dgebs2d, dgerv2d,
198  $ dgesd2d, pbdmatadd, pbdtr2af, pbdtr2at,
200 * ..
201 * .. Intrinsic Functions ..
202  INTRINSIC max, min, mod
203 * ..
204 * .. Executable Statements ..
205 *
206 * Quick return if possible.
207 *
208  IF( m.EQ.0 .OR. n.EQ.0 ) RETURN
209 *
210  CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
211 *
212  colform = lsame( adist, 'C' )
213  rowform = lsame( adist, 'R' )
214 *
215 * Test the input parameters.
216 *
217  info = 0
218  IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
219  info = 2
220  ELSE IF( m .LT.0 ) THEN
221  info = 4
222  ELSE IF( n .LT.0 ) THEN
223  info = 5
224  ELSE IF( nb.LT.1 ) THEN
225  info = 6
226  ELSE IF( iarow.LT.-1 .OR. iarow.GE.nprow .OR.
227  $ ( iarow.EQ.-1 .AND. colform ) ) THEN
228  info = 12
229  ELSE IF( iacol.LT.-1 .OR. iacol.GE.npcol .OR.
230  $ ( iacol.EQ.-1 .AND. rowform ) ) THEN
231  info = 13
232  ELSE IF( icrow.LT.-1 .OR. icrow.GE.nprow .OR.
233  $ ( icrow.EQ.-1 .AND. rowform ) ) THEN
234  info = 14
235  ELSE IF( iccol.LT.-1 .OR. iccol.GE.npcol .OR.
236  $ ( iccol.EQ.-1 .AND. colform ) ) THEN
237  info = 15
238  END IF
239 *
240  10 CONTINUE
241  IF( info .NE. 0 ) THEN
242  CALL pxerbla( icontxt, 'PBDTRAN ', info )
243  RETURN
244  END IF
245 *
246 * Start the operations.
247 *
248 * LCM : the least common multiple of NPROW and NPCOL
249 *
250  lcm = ilcm( nprow, npcol )
251  lcmp = lcm / nprow
252  lcmq = lcm / npcol
253  igd = npcol / lcmp
254 *
255 * When A is a column block
256 *
257  IF( colform ) THEN
258 *
259 * Form C <== A' ( A is a column block )
260 * _
261 * | |
262 * | |
263 * _____________ | |
264 * |______C______| <== |A|
265 * | |
266 * | |
267 * |_|
268 *
269 * MRROW : row relative position in template from IAROW
270 * MRCOL : column relative position in template from ICCOL
271 *
272  mrrow = mod( nprow+myrow-iarow, nprow )
273  mrcol = mod( npcol+mycol-iccol, npcol )
274  jcrow = icrow
275  IF( icrow.EQ.-1 ) jcrow = iarow
276 *
277  mp = numroc( m, nb, myrow, iarow, nprow )
278  mq = numroc( m, nb, mycol, iccol, npcol )
279  mq0 = numroc( numroc(m, nb, 0, 0, npcol), nb, 0, 0, lcmq )
280 *
281  IF( lda.LT.mp .AND.
282  $ ( iacol.EQ.mycol .OR. iacol.EQ.-1 ) ) THEN
283  info = 8
284  ELSE IF( ldc.LT.n .AND.
285  $ ( icrow.EQ.myrow .OR. icrow.EQ.-1 ) ) THEN
286  info = 11
287  END IF
288  IF( info.NE.0 ) GO TO 10
289 *
290 * When a column process of IACOL has a column block A,
291 *
292  IF( iacol.GE.0 ) THEN
293  tbeta = zero
294  IF( myrow.EQ.jcrow ) tbeta = beta
295 *
296  DO 20 i = 0, min( lcm, iceil(m,nb) ) - 1
297  mcrow = mod( mod(i, nprow) + iarow, nprow )
298  mccol = mod( mod(i, npcol) + iccol, npcol )
299  IF( lcmq.EQ.1 ) mq0 = numroc( m, nb, i, 0, npcol )
300  jdex = (i/npcol) * nb
301 *
302 * A source node copies the blocks to WORK, and send it
303 *
304  IF( myrow.EQ.mcrow .AND. mycol.EQ.iacol ) THEN
305 *
306 * The source node is a destination node
307 *
308  idex = (i/nprow) * nb
309  IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN
310  CALL pbdtr2at( icontxt, 'Col', trans, mp-idex, n, nb,
311  $ a(idex+1,1), lda, tbeta, c(1,jdex+1),
312  $ ldc, lcmp, lcmq )
313 *
314 * The source node sends blocks to a destination node
315 *
316  ELSE
317  CALL pbdtr2bt( icontxt, 'Col', trans, mp-idex, n, nb,
318  $ a(idex+1,1), lda, zero, work, n,
319  $ lcmp*nb )
320  CALL dgesd2d( icontxt, n, mq0, work, n, jcrow, mccol )
321  END IF
322 *
323 * A destination node receives the copied blocks
324 *
325  ELSE IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN
326  IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN
327  CALL dgerv2d( icontxt, n, mq0, c, ldc, mcrow, iacol )
328  ELSE
329  CALL dgerv2d( icontxt, n, mq0, work, n, mcrow, iacol )
330  CALL pbdtr2af( icontxt, 'Row', n, mq-jdex, nb, work, n,
331  $ tbeta, c(1,jdex+1), ldc, lcmp, lcmq,
332  $ mq0 )
333  END IF
334  END IF
335  20 CONTINUE
336 *
337 * Broadcast a row block of C in each column of template
338 *
339  IF( icrow.EQ.-1 ) THEN
340  IF( myrow.EQ.jcrow ) THEN
341  CALL dgebs2d( icontxt, 'Col', '1-tree', n, mq, c, ldc )
342  ELSE
343  CALL dgebr2d( icontxt, 'Col', '1-tree', n, mq, c, ldc,
344  $ jcrow, mycol )
345  END IF
346  END IF
347 *
348 * When all column procesors have a copy of the column block A,
349 *
350  ELSE
351  IF( lcmq.EQ.1 ) mq0 = mq
352 *
353 * Processors, which have diagonal blocks of A, copy them to
354 * WORK array in transposed form
355 *
356  DO 30 i = 0, lcmp-1
357  IF( mrcol.EQ.mod( nprow*i+mrrow, npcol ) ) THEN
358  IF( lcmq.EQ.1.AND.(icrow.EQ.-1.OR.icrow.EQ.myrow) ) THEN
359  CALL pbdtr2bt( icontxt, 'Col', trans, mp-i*nb, n, nb,
360  $ a(i*nb+1,1), lda, beta, c, ldc,
361  $ lcmp*nb )
362  ELSE
363  CALL pbdtr2bt( icontxt, 'Col', trans, mp-i*nb, n, nb,
364  $ a(i*nb+1,1), lda, zero, work, n,
365  $ lcmp*nb )
366  END IF
367  END IF
368  30 CONTINUE
369 *
370 * Get diagonal blocks of A for each column of the template
371 *
372  mcrow = mod( mod(mrcol,nprow)+iarow, nprow )
373  IF( lcmq.GT.1 ) THEN
374  mccol = mod( npcol+mycol-iccol, npcol )
375  CALL pbdtrget( icontxt, 'Row', n, mq0, iceil(m,nb), work, n,
376  $ mcrow, mccol, igd, myrow, mycol, nprow,
377  $ npcol )
378  END IF
379 *
380 * Broadcast a row block of WORK in every row of template
381 *
382  IF( icrow.EQ.-1 ) THEN
383  IF( myrow.EQ.mcrow ) THEN
384  IF( lcmq.GT.1 )
385  $ CALL pbdtrsrt( icontxt, 'Row', n, mq, nb, work, n, beta,
386  $ c, ldc, lcmp, lcmq, mq0 )
387  CALL dgebs2d( icontxt, 'Col', '1-tree', n, mq, c, ldc )
388  ELSE
389  CALL dgebr2d( icontxt, 'Col', '1-tree', n, mq, c, ldc,
390  $ mcrow, mycol )
391  END IF
392 *
393 * Send a row block of WORK to the destination row
394 *
395  ELSE
396  IF( lcmq.EQ.1 ) THEN
397  IF( myrow.EQ.mcrow ) THEN
398  IF( myrow.NE.icrow )
399  $ CALL dgesd2d( icontxt, n, mq, work, n, icrow, mycol )
400  ELSE IF( myrow.EQ.icrow ) THEN
401  IF( beta.EQ.zero ) THEN
402  CALL dgerv2d( icontxt, n, mq, c, ldc, mcrow, mycol )
403  ELSE
404  CALL dgerv2d( icontxt, n, mq, work, n, mcrow, mycol )
405  CALL pbdmatadd( icontxt, 'G', n, mq, one, work, n,
406  $ beta, c, ldc )
407  END IF
408  END IF
409 *
410  ELSE
411  ml = mq0 * min( lcmq, max(0,iceil(m,nb)-mccol) )
412  IF( myrow.EQ.mcrow ) THEN
413  IF( myrow.NE.icrow )
414  $ CALL dgesd2d( icontxt, n, ml, work, n, icrow, mycol )
415  ELSE IF( myrow.EQ.icrow ) THEN
416  CALL dgerv2d( icontxt, n, ml, work, n, mcrow, mycol )
417  END IF
418 *
419  IF( myrow.EQ.icrow )
420  $ CALL pbdtrsrt( icontxt, 'Row', n, mq, nb, work, n, beta,
421  $ c, ldc, lcmp, lcmq, mq0 )
422  END IF
423  END IF
424 *
425  END IF
426 *
427 * When A is a row block
428 *
429  ELSE
430 *
431 * Form C <== A' ( A is a row block )
432 * _
433 * | |
434 * | |
435 * | | _____________
436 * |C| <== |______A______|
437 * | |
438 * | |
439 * |_|
440 *
441 * MRROW : row relative position in template from ICROW
442 * MRCOL : column relative position in template from IACOL
443 *
444  mrrow = mod( nprow+myrow-icrow, nprow )
445  mrcol = mod( npcol+mycol-iacol, npcol )
446  jccol = iccol
447  IF( iccol.EQ.-1 ) jccol = iacol
448 *
449  np = numroc( n, nb, myrow, icrow, nprow )
450  nq = numroc( n, nb, mycol, iacol, npcol )
451  np0 = numroc( numroc(n, nb, 0, 0, nprow), nb, 0, 0, lcmp )
452 *
453  IF( lda.LT.m .AND.
454  $ ( iarow.EQ.myrow .OR. iarow.EQ.-1 ) ) THEN
455  info = 8
456  ELSE IF( ldc.LT.np .AND.
457  $ ( iccol.EQ.mycol .OR. iccol.EQ.-1 ) ) THEN
458  info = 11
459  END IF
460  IF( info.NE.0 ) GO TO 10
461 *
462 * When a row process of IAROW has a row block A,
463 *
464  IF( iarow.GE.0 ) THEN
465  tbeta = zero
466  IF( mycol.EQ.jccol ) tbeta = beta
467 *
468  DO 40 i = 0, min( lcm, iceil(n,nb) ) - 1
469  mcrow = mod( mod(i, nprow) + icrow, nprow )
470  mccol = mod( mod(i, npcol) + iacol, npcol )
471  IF( lcmp.EQ.1 ) np0 = numroc( n, nb, i, 0, nprow )
472  idex = (i/nprow) * nb
473 *
474 * A source node copies the blocks to WORK, and send it
475 *
476  IF( myrow.EQ.iarow .AND. mycol.EQ.mccol ) THEN
477 *
478 * The source node is a destination node
479 *
480  jdex = (i/npcol) * nb
481  IF( myrow.EQ.mcrow .AND. mycol.EQ.jccol ) THEN
482  CALL pbdtr2at( icontxt, 'Row', trans, m, nq-jdex, nb,
483  $ a(1,jdex+1), lda, tbeta, c(idex+1,1),
484  $ ldc, lcmp, lcmq )
485 *
486 * The source node sends blocks to a destination node
487 *
488  ELSE
489  CALL pbdtr2bt( icontxt, 'Row', trans, m, nq-jdex, nb,
490  $ a(1,jdex+1), lda, zero, work, np0,
491  $ lcmq*nb )
492  CALL dgesd2d( icontxt, np0, m, work, np0,
493  $ mcrow, jccol )
494  END IF
495 *
496 * A destination node receives the copied blocks
497 *
498  ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jccol ) THEN
499  IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN
500  CALL dgerv2d( icontxt, np0, m, c, ldc, iarow, mccol )
501  ELSE
502  CALL dgerv2d( icontxt, np0, m, work, np0, iarow, mccol )
503  CALL pbdtr2af( icontxt, 'Col', np-idex, m, nb, work,
504  $ np0, tbeta, c(idex+1,1), ldc, lcmp, lcmq,
505  $ np0 )
506  END IF
507  END IF
508  40 CONTINUE
509 *
510 * Broadcast a column block of WORK in each row of template
511 *
512  IF( iccol.EQ.-1 ) THEN
513  IF( mycol.EQ.jccol ) THEN
514  CALL dgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )
515  ELSE
516  CALL dgebr2d( icontxt, 'Row', '1-tree', np, m, c, ldc,
517  $ myrow, jccol )
518  END IF
519  END IF
520 *
521 * When all row procesors have a copy of the row block A,
522 *
523  ELSE
524  IF( lcmp.EQ.1 ) np0 = np
525 *
526 * Processors, which have diagonal blocks of A, copy them to
527 * WORK array in transposed form
528 *
529  DO 50 i = 0, lcmq-1
530  IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
531  IF( lcmp.EQ.1.AND.(iccol.EQ.-1.OR.iccol.EQ.mycol) ) THEN
532  CALL pbdtr2bt( icontxt, 'Row', trans, m, nq-i*nb, nb,
533  $ a(1,i*nb+1), lda, beta, c, ldc,
534  $ lcmq*nb )
535  ELSE
536  CALL pbdtr2bt( icontxt, 'Row', trans, m, nq-i*nb, nb,
537  $ a(1,i*nb+1), lda, zero, work, np0,
538  $ lcmq*nb )
539  END IF
540  END IF
541  50 CONTINUE
542 *
543 * Get diagonal blocks of A for each row of the template
544 *
545  mccol = mod( mod(mrrow, npcol)+iacol, npcol )
546  IF( lcmp.GT.1 ) THEN
547  mcrow = mod( nprow+myrow-icrow, nprow )
548  CALL pbdtrget( icontxt, 'Col', np0, m, iceil(n,nb), work,
549  $ np0, mcrow, mccol, igd, myrow, mycol, nprow,
550  $ npcol )
551  END IF
552 *
553 * Broadcast a column block of WORK in every column of template
554 *
555  IF( iccol.EQ.-1 ) THEN
556  IF( mycol.EQ.mccol ) THEN
557  IF( lcmp.GT.1 )
558  $ CALL pbdtrsrt( icontxt, 'Col', np, m, nb, work, np0,
559  $ beta, c, ldc, lcmp, lcmq, np0 )
560  CALL dgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )
561  ELSE
562  CALL dgebr2d( icontxt, 'Row', '1-tree', np, m, c, ldc,
563  $ myrow, mccol )
564  END IF
565 *
566 * Send a column block of WORK to the destination column
567 *
568  ELSE
569  IF( lcmp.EQ.1 ) THEN
570  IF( mycol.EQ.mccol ) THEN
571  IF( mycol.NE.iccol )
572  $ CALL dgesd2d( icontxt, np, m, work, np, myrow, iccol )
573  ELSE IF( mycol.EQ.iccol ) THEN
574  IF( beta.EQ.zero ) THEN
575  CALL dgerv2d( icontxt, np, m, c, ldc, myrow, mccol )
576  ELSE
577  CALL dgerv2d( icontxt, np, m, work, np, myrow, mccol )
578  CALL pbdmatadd( icontxt, 'G', np, m, one, work, np,
579  $ beta, c, ldc )
580  END IF
581  END IF
582 *
583  ELSE
584  ml = m * min( lcmp, max( 0, iceil(n,nb) - mcrow ) )
585  IF( mycol.EQ.mccol ) THEN
586  IF( mycol.NE.iccol )
587  $ CALL dgesd2d( icontxt, np0, ml, work, np0,
588  $ myrow, iccol )
589  ELSE IF( mycol.EQ.iccol ) THEN
590  CALL dgerv2d( icontxt, np0, ml, work, np0,
591  $ myrow, mccol )
592  END IF
593 *
594  IF( mycol.EQ.iccol )
595  $ CALL pbdtrsrt( icontxt, 'Col', np, m, nb, work, np0,
596  $ beta, c, ldc, lcmp, lcmq, np0 )
597  END IF
598  END IF
599 *
600  END IF
601  END IF
602 *
603  RETURN
604 *
605 * End of PBDTRAN
606 *
607  END
608 *
609 *=======================================================================
610 * SUBROUTINE PBDTR2AT
611 *=======================================================================
612 *
613  SUBROUTINE pbdtr2at( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA,
614  $ BETA, B, LDB, LCMP, LCMQ )
615 *
616 * -- PB-BLAS routine (version 2.1) --
617 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
618 * April 28, 1996
619 *
620 * .. Scalar Arguments ..
621  CHARACTER*1 ADIST, TRANS
622  INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB
623  DOUBLE PRECISION BETA
624 * ..
625 * .. Array Arguments ..
626  DOUBLE PRECISION A( LDA, * ), B( LDB, * )
627 * ..
628 *
629 * Purpose
630 * =======
631 *
632 * PBDTR2AT forms B <== A^T + beta*B, or A^C + beta*B
633 * B is a ((conjugate) transposed) scattered block row (or column),
634 * copied from a scattered block column (or row) of A
635 *
636 * =====================================================================
637 *
638 * .. Parameters ..
639  DOUBLE PRECISION ONE
640  PARAMETER ( ONE = 1.0d+0 )
641 * ..
642 * .. Local Scalars ..
643  INTEGER IA, IB, K, INTV, JNTV
644 * ..
645 * .. External Subroutines ..
646  EXTERNAL pbdmatadd
647 * ..
648 * .. External Functions ..
649  LOGICAL LSAME
650  INTEGER ICEIL
651  EXTERNAL lsame, iceil
652 * ..
653 * .. Intrinsic Functions ..
654  INTRINSIC min
655 * ..
656 * .. Excutable Statements ..
657 *
658  IF( lcmp.EQ.lcmq ) THEN
659  CALL pbdmatadd( icontxt, trans, n, m, one, a, lda, beta, b,
660  $ ldb )
661 *
662  ELSE
663 *
664 * If A is a column block ( ADIST = 'C' ),
665 *
666  IF( lsame( adist, 'C' ) ) THEN
667  intv = lcmp * nb
668  jntv = lcmq * nb
669  ia = 1
670  ib = 1
671  DO 10 k = 1, iceil( m, intv )
672  CALL pbdmatadd( icontxt, trans, n, min( m-ia+1, nb ),
673  $ one, a(ia,1), lda, beta, b(1,ib), ldb )
674  ia = ia + intv
675  ib = ib + jntv
676  10 CONTINUE
677 *
678 * If A is a row block ( ADIST = 'R' ),
679 *
680  ELSE
681  intv = lcmp * nb
682  jntv = lcmq * nb
683  ia = 1
684  ib = 1
685  DO 20 k = 1, iceil( n, jntv )
686  CALL pbdmatadd( icontxt, trans, min( n-ia+1, nb ), m,
687  $ one, a(1,ia), lda, beta, b(ib,1), ldb )
688  ia = ia + jntv
689  ib = ib + intv
690  20 CONTINUE
691  END IF
692  END IF
693 *
694  RETURN
695 *
696 * End of PBDTR2AT
697 *
698  END
699 *
700 *=======================================================================
701 * SUBROUTINE PBDTR2BT
702 *=======================================================================
703 *
704  SUBROUTINE pbdtr2bt( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA,
705  $ BETA, B, LDB, INTV )
706 *
707 * -- PB-BLAS routine (version 2.1) --
708 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
709 * April 28, 1996
710 *
711 * .. Scalar Arguments ..
712  CHARACTER*1 ADIST, TRANS
713  INTEGER ICONTXT, INTV, LDA, LDB, M, N, NB
714  DOUBLE PRECISION BETA
715 * ..
716 * .. Array Arguments ..
717  DOUBLE PRECISION A( LDA, * ), B( LDB, * )
718 * ..
719 *
720 * Purpose
721 * =======
722 *
723 * PBDTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a
724 * ((conjugate) transposed) condensed block row (or column), copied from
725 * a scattered block column (or row) of A
726 *
727 * =====================================================================
728 *
729 * .. Parameters ..
730  DOUBLE PRECISION ONE
731  PARAMETER ( ONE = 1.0d+0 )
732 * ..
733 * .. Local Scalars ..
734  INTEGER IA, IB, K
735 * ..
736 * .. External Functions ..
737  LOGICAL LSAME
738  INTEGER ICEIL
739  EXTERNAL LSAME, ICEIL
740 * ..
741 * .. External Subroutines ..
742  EXTERNAL pbdmatadd
743 * ..
744 * .. Intrinsic Functions ..
745  INTRINSIC min
746 * ..
747 * .. Excutable Statements ..
748 *
749  IF( intv.EQ.nb ) THEN
750  CALL pbdmatadd( icontxt, trans, n, m, one, a, lda, beta, b,
751  $ ldb )
752 *
753  ELSE
754 *
755 * If A is a column block ( ADIST = 'C' ),
756 *
757  IF( lsame( adist, 'C' ) ) THEN
758  ia = 1
759  ib = 1
760  DO 10 k = 1, iceil( m, intv )
761  CALL pbdmatadd( icontxt, trans, n, min( m-ia+1, nb ),
762  $ one, a(ia,1), lda, beta, b(1,ib), ldb )
763  ia = ia + intv
764  ib = ib + nb
765  10 CONTINUE
766 *
767 * If A is a row block (ADIST = 'R'),
768 *
769  ELSE
770  ia = 1
771  ib = 1
772  DO 20 k = 1, iceil( n, intv )
773  CALL pbdmatadd( icontxt, trans, min( n-ia+1, nb ), m,
774  $ one, a(1,ia), lda, beta, b(ib,1), ldb )
775  ia = ia + intv
776  ib = ib + nb
777  20 CONTINUE
778  END IF
779  END IF
780 *
781  RETURN
782 *
783 * End of PBDTR2BT
784 *
785  END
786 *
787 *=======================================================================
788 * SUBROUTINE PBDTR2AF
789 *=======================================================================
790 *
791  SUBROUTINE pbdtr2af( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B,
792  $ LDB, LCMP, LCMQ, NINT )
793 *
794 * -- PB-BLAS routine (version 2.1) --
795 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
796 * April 28, 1996
797 *
798 * .. Scalar Arguments ..
799  CHARACTER*1 ADIST
800  INTEGER ICONTXT, M, N, NB, LDA, LDB, LCMP, LCMQ, NINT
801  DOUBLE PRECISION BETA
802 * ..
803 * .. Array Arguments ..
804  DOUBLE PRECISION A( LDA, * ), B( LDB, * )
805 * ..
806 *
807 * Purpose
808 * =======
809 *
810 * PBDTR2AF forms T <== A + BETA*T, where T is a scattered block
811 * row (or column) copied from a (condensed) block column (or row) of A
812 *
813 * =====================================================================
814 *
815 * .. Parameters ..
816  DOUBLE PRECISION ONE
817  PARAMETER ( ONE = 1.0d+0 )
818 * ..
819 * .. Local Scalars ..
820  INTEGER JA, JB, K, INTV
821 * ..
822 * .. External Functions ..
823  LOGICAL LSAME
824  INTEGER ICEIL
825  EXTERNAL LSAME, ICEIL
826 * ..
827 * .. Intrinsic Functions ..
828  INTRINSIC min
829 * ..
830 * .. Executable Statements ..
831 *
832  IF( lsame( adist, 'R' ) ) THEN
833  intv = nb * lcmq
834  ja = 1
835  jb = 1
836  DO 10 k = 1, iceil( nint, nb )
837  CALL pbdmatadd( icontxt, 'G', m, min( n-jb+1, nb ), one,
838  $ a(1,ja), lda, beta, b(1,jb), ldb )
839  ja = ja + nb
840  jb = jb + intv
841  10 CONTINUE
842 *
843 * if( LSAME( ADIST, 'C' ) ) then
844 *
845  ELSE
846  intv = nb * lcmp
847  ja = 1
848  jb = 1
849  DO 20 k = 1, iceil( nint, nb )
850  CALL pbdmatadd( icontxt, 'G', min( m-jb+1, nb ), n, one,
851  $ a(ja,1), lda, beta, b(jb,1), ldb )
852  ja = ja + nb
853  jb = jb + intv
854  20 CONTINUE
855  END IF
856 *
857  RETURN
858 *
859 * End of PBDTR2AF
860 *
861  END
pbdtrget
subroutine pbdtrget(ICONTXT, ADIST, M, N, MNB, A, LDA, MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW, NPCOL)
Definition: pbdtrget.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
pbdtrsrt
subroutine pbdtrsrt(ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B, LDB, LCMP, LCMQ, NINT)
Definition: pbdtrsrt.f:3
pbdtr2bt
subroutine pbdtr2bt(ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, B, LDB, INTV)
Definition: pbdtran.f:706
pbdtr2af
subroutine pbdtr2af(ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B, LDB, LCMP, LCMQ, NINT)
Definition: pbdtran.f:793
pbdtran
subroutine pbdtran(ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK)
Definition: pbdtran.f:3
pbdmatadd
subroutine pbdmatadd(ICONTXT, MODE, M, N, ALPHA, A, LDA, BETA, B, LDB)
Definition: pbdmatadd.f:3
pbdtr2at
subroutine pbdtr2at(ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, B, LDB, LCMP, LCMQ)
Definition: pbdtran.f:615
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181