ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pbstran.f
Go to the documentation of this file.
1  SUBROUTINE pbstran( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA,
2  $ C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK )
3 *
4 * -- PB-BLAS routine (version 2.1) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
6 * April 28, 1996
7 *
8 * Jaeyoung Choi, Oak Ridge National Laboratory
9 * Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
10 * David Walker, Oak Ridge National Laboratory
11 *
12 * .. Scalar Arguments ..
13  CHARACTER*1 ADIST, TRANS
14  INTEGER IACOL, IAROW, ICCOL, ICONTXT, ICROW, LDA, LDC,
15  $ m, n, nb
16  REAL BETA
17 * ..
18 * .. Array Arguments ..
19  REAL A( LDA, * ), C( LDC, * ), WORK( * )
20 * ..
21 *
22 * Purpose
23 * =======
24 *
25 * PBSTRAN transposes a column block to row block, or a row block to
26 * column block by reallocating data distribution.
27 *
28 * C := A^T + beta*C, or C := A^C + beta*C
29 *
30 * where A is an M-by-N matrix and C is an N-by-M matrix, and the size
31 * of M or N is limited to its block size NB.
32 *
33 * The first elements of the matrices A, and C should be located at
34 * the beginnings of their first blocks. (not the middle of the blocks.)
35 *
36 * Parameters
37 * ==========
38 *
39 * ICONTXT (input) INTEGER
40 * ICONTXT is the BLACS mechanism for partitioning communication
41 * space. A defining property of a context is that a message in
42 * a context cannot be sent or received in another context. The
43 * BLACS context includes the definition of a grid, and each
44 * process' coordinates in it.
45 *
46 * ADIST - (input) CHARACTER*1
47 * ADIST specifies whether A is a column block or a row block.
48 *
49 * ADIST = 'C', A is a column block
50 * ADIST = 'R', A is a row block
51 *
52 * TRANS - (input) CHARACTER*1
53 * TRANS specifies whether the transposed format is transpose
54 * or conjugate transpose. If the matrices A and C are real,
55 * the argument is ignored.
56 *
57 * TRANS = 'T', transpose
58 * TRANS = 'C', conjugate transpose
59 *
60 * M - (input) INTEGER
61 * M specifies the (global) number of rows of the matrix (block
62 * column or block row) A and of columns of the matrix C.
63 * M >= 0.
64 *
65 * N - (input) INTEGER
66 * N specifies the (global) number of columns of the matrix
67 * (block column or block row) A and of columns of the matrix
68 * C. N >= 0.
69 *
70 * NB - (input) INTEGER
71 * NB specifies the column block size of the matrix A and the
72 * row block size of the matrix C when ADIST = 'C'. Otherwise,
73 * it specifies the row block size of the matrix A and the
74 * column block size of the matrix C. NB >= 1.
75 *
76 * A (input) REAL array of DIMENSION ( LDA, Lx ),
77 * where Lx is N when ADIST = 'C', or Nq when ADIST = 'R'.
78 * Before entry with ADIST = 'C', the leading Mp by N part of
79 * the array A must contain the matrix A, otherwise the leading
80 * M by Nq part of the array A must contain the matrix A. See
81 * parameter details for the values of Mp and Nq.
82 *
83 * LDA (input) INTEGER
84 * LDA specifies the leading dimension of (local) A as declared
85 * in the calling (sub) program. LDA >= MAX(1,Mp) when
86 * ADIST = 'C', or LDA >= MAX(1,M) otherwise.
87 *
88 * BETA (input) REAL
89 * BETA specifies scaler beta.
90 *
91 * C (input/output) REAL array of DIMENSION ( LDC, Lx ),
92 * where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'.
93 * If ADIST = 'C', the leading N-by-Mq part of the array C
94 * contains the (local) matrix C, otherwise the leading
95 * Np-by-M part of the array C must contain the (local) matrix
96 * C. C will not be referenced if beta is zero.
97 *
98 * LDC (input) INTEGER
99 * LDC specifies the leading dimension of (local) C as declared
100 * in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C',
101 * or LDC >= MAX(1,Np) otherwise.
102 *
103 * IAROW (input) INTEGER
104 * IAROW specifies a row of the process template,
105 * which holds the first block of the matrix A. If A is a row
106 * of blocks (ADIST = 'R') and all rows of processes have a copy
107 * of A, then set IAROW = -1.
108 *
109 * IACOL (input) INTEGER
110 * IACOL specifies a column of the process template,
111 * which holds the first block of the matrix A. If A is a
112 * column of blocks (ADIST = 'C') and all columns of processes
113 * have a copy of A, then set IACOL = -1.
114 *
115 * ICROW (input) INTEGER
116 * ICROW specifies the current row process which holds
117 * the first block of the matrix C, which is transposed of A.
118 * If C is a row of blocks (ADIST = 'C') and the transposed
119 * row block C is distributed all rows of processes, set
120 * ICROW = -1.
121 *
122 * ICCOL (input) INTEGER
123 * ICCOL specifies the current column process which holds
124 * the first block of the matrix C, which is transposed of A.
125 * If C is a column of blocks (ADIST = 'R') and the transposed
126 * column block C is distributed all columns of processes,
127 * set ICCOL = -1.
128 *
129 * WORK (workspace) REAL array of dimension Size(WORK).
130 * It needs extra working space of A'.
131 *
132 * Parameters Details
133 * ==================
134 *
135 * Lx It is a local portion of L owned by a process, (L is
136 * replaced by M, or N, and x is replaced by either p (=NPROW)
137 * or q (=NPCOL)). The value is determined by L, LB, x, and
138 * MI, where LB is a block size and MI is a row or column
139 * position in a process template. Lx is equal to or less
140 * than Lx0 = CEIL( L, LB*x ) * LB.
141 *
142 * Communication Scheme
143 * ====================
144 *
145 * The communication scheme of the routine is set to '1-tree', which is
146 * fan-out. (For details, see BLACS user's guide.)
147 *
148 * Memory Requirement of WORK
149 * ==========================
150 *
151 * Mqb = CEIL( M, NB*NPCOL )
152 * Npb = CEIL( N, NB*NPROW )
153 * LCMQ = LCM / NPCOL
154 * LCMP = LCM / NPROW
155 *
156 * (1) ADIST = 'C'
157 * (a) IACOL != -1
158 * Size(WORK) = N * CEIL(Mqb,LCMQ)*NB
159 * (b) IACOL = -1
160 * Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB))
161 *
162 * (2) ADIST = 'R'
163 * (a) IAROW != -1
164 * Size(WORK) = M * CEIL(Npb,LCMP)*NB
165 * (b) IAROW = -1
166 * Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB))
167 *
168 * Notes
169 * -----
170 * More precise space can be computed as
171 *
172 * CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ )
173 * CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP )
174 *
175 * =====================================================================
176 *
177 * ..
178 * .. Parameters ..
179  REAL ONE, ZERO
180  parameter( one = 1.0e+0, zero = 0.0e+0 )
181 * ..
182 * .. Local Scalars ..
183  LOGICAL COLFORM, ROWFORM
184  INTEGER I, IDEX, IGD, INFO, JCCOL, JCROW, JDEX, LCM,
185  $ lcmp, lcmq, mccol, mcrow, ml, mp, mq, mq0,
186  $ mrcol, mrrow, mycol, myrow, np, np0, npcol,
187  $ nprow, nq
188  REAL TBETA
189 * ..
190 * .. External Functions ..
191  LOGICAL LSAME
192  INTEGER ILCM, ICEIL, NUMROC
193  EXTERNAL ilcm, iceil, lsame, numroc
194 * ..
195 * .. External Subroutines ..
196  EXTERNAL blacs_gridinfo, pbsmatadd, pbstr2af, pbstr2at,
197  $ pbstr2bt, pbstrget, pbstrsrt, pxerbla, sgebr2d,
198  $ sgebs2d, sgerv2d, sgesd2d
199 * ..
200 * .. Intrinsic Functions ..
201  INTRINSIC max, min, mod
202 * ..
203 * .. Executable Statements ..
204 *
205 * Quick return if possible.
206 *
207  IF( m.EQ.0 .OR. n.EQ.0 ) RETURN
208 *
209  CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
210 *
211  colform = lsame( adist, 'C' )
212  rowform = lsame( adist, 'R' )
213 *
214 * Test the input parameters.
215 *
216  info = 0
217  IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
218  info = 2
219  ELSE IF( m .LT.0 ) THEN
220  info = 4
221  ELSE IF( n .LT.0 ) THEN
222  info = 5
223  ELSE IF( nb.LT.1 ) THEN
224  info = 6
225  ELSE IF( iarow.LT.-1 .OR. iarow.GE.nprow .OR.
226  $ ( iarow.EQ.-1 .AND. colform ) ) THEN
227  info = 12
228  ELSE IF( iacol.LT.-1 .OR. iacol.GE.npcol .OR.
229  $ ( iacol.EQ.-1 .AND. rowform ) ) THEN
230  info = 13
231  ELSE IF( icrow.LT.-1 .OR. icrow.GE.nprow .OR.
232  $ ( icrow.EQ.-1 .AND. rowform ) ) THEN
233  info = 14
234  ELSE IF( iccol.LT.-1 .OR. iccol.GE.npcol .OR.
235  $ ( iccol.EQ.-1 .AND. colform ) ) THEN
236  info = 15
237  END IF
238 *
239  10 CONTINUE
240  IF( info .NE. 0 ) THEN
241  CALL pxerbla( icontxt, 'PBSTRAN ', info )
242  RETURN
243  END IF
244 *
245 * Start the operations.
246 *
247 * LCM : the least common multiple of NPROW and NPCOL
248 *
249  lcm = ilcm( nprow, npcol )
250  lcmp = lcm / nprow
251  lcmq = lcm / npcol
252  igd = npcol / lcmp
253 *
254 * When A is a column block
255 *
256  IF( colform ) THEN
257 *
258 * Form C <== A' ( A is a column block )
259 * _
260 * | |
261 * | |
262 * _____________ | |
263 * |______C______| <== |A|
264 * | |
265 * | |
266 * |_|
267 *
268 * MRROW : row relative position in template from IAROW
269 * MRCOL : column relative position in template from ICCOL
270 *
271  mrrow = mod( nprow+myrow-iarow, nprow )
272  mrcol = mod( npcol+mycol-iccol, npcol )
273  jcrow = icrow
274  IF( icrow.EQ.-1 ) jcrow = iarow
275 *
276  mp = numroc( m, nb, myrow, iarow, nprow )
277  mq = numroc( m, nb, mycol, iccol, npcol )
278  mq0 = numroc( numroc(m, nb, 0, 0, npcol), nb, 0, 0, lcmq )
279 *
280  IF( lda.LT.mp .AND.
281  $ ( iacol.EQ.mycol .OR. iacol.EQ.-1 ) ) THEN
282  info = 8
283  ELSE IF( ldc.LT.n .AND.
284  $ ( icrow.EQ.myrow .OR. icrow.EQ.-1 ) ) THEN
285  info = 11
286  END IF
287  IF( info.NE.0 ) GO TO 10
288 *
289 * When a column process of IACOL has a column block A,
290 *
291  IF( iacol.GE.0 ) THEN
292  tbeta = zero
293  IF( myrow.EQ.jcrow ) tbeta = beta
294 *
295  DO 20 i = 0, min( lcm, iceil(m,nb) ) - 1
296  mcrow = mod( mod(i, nprow) + iarow, nprow )
297  mccol = mod( mod(i, npcol) + iccol, npcol )
298  IF( lcmq.EQ.1 ) mq0 = numroc( m, nb, i, 0, npcol )
299  jdex = (i/npcol) * nb
300 *
301 * A source node copies the blocks to WORK, and send it
302 *
303  IF( myrow.EQ.mcrow .AND. mycol.EQ.iacol ) THEN
304 *
305 * The source node is a destination node
306 *
307  idex = (i/nprow) * nb
308  IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN
309  CALL pbstr2at( icontxt, 'Col', trans, mp-idex, n, nb,
310  $ a(idex+1,1), lda, tbeta, c(1,jdex+1),
311  $ ldc, lcmp, lcmq )
312 *
313 * The source node sends blocks to a destination node
314 *
315  ELSE
316  CALL pbstr2bt( icontxt, 'Col', trans, mp-idex, n, nb,
317  $ a(idex+1,1), lda, zero, work, n,
318  $ lcmp*nb )
319  CALL sgesd2d( icontxt, n, mq0, work, n, jcrow, mccol )
320  END IF
321 *
322 * A destination node receives the copied blocks
323 *
324  ELSE IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN
325  IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN
326  CALL sgerv2d( icontxt, n, mq0, c, ldc, mcrow, iacol )
327  ELSE
328  CALL sgerv2d( icontxt, n, mq0, work, n, mcrow, iacol )
329  CALL pbstr2af( icontxt, 'Row', n, mq-jdex, nb, work, n,
330  $ tbeta, c(1,jdex+1), ldc, lcmp, lcmq,
331  $ mq0 )
332  END IF
333  END IF
334  20 CONTINUE
335 *
336 * Broadcast a row block of C in each column of template
337 *
338  IF( icrow.EQ.-1 ) THEN
339  IF( myrow.EQ.jcrow ) THEN
340  CALL sgebs2d( icontxt, 'Col', '1-tree', n, mq, c, ldc )
341  ELSE
342  CALL sgebr2d( icontxt, 'Col', '1-tree', n, mq, c, ldc,
343  $ jcrow, mycol )
344  END IF
345  END IF
346 *
347 * When all column procesors have a copy of the column block A,
348 *
349  ELSE
350  IF( lcmq.EQ.1 ) mq0 = mq
351 *
352 * Processors, which have diagonal blocks of A, copy them to
353 * WORK array in transposed form
354 *
355  DO 30 i = 0, lcmp-1
356  IF( mrcol.EQ.mod( nprow*i+mrrow, npcol ) ) THEN
357  IF( lcmq.EQ.1.AND.(icrow.EQ.-1.OR.icrow.EQ.myrow) ) THEN
358  CALL pbstr2bt( icontxt, 'Col', trans, mp-i*nb, n, nb,
359  $ a(i*nb+1,1), lda, beta, c, ldc,
360  $ lcmp*nb )
361  ELSE
362  CALL pbstr2bt( icontxt, 'Col', trans, mp-i*nb, n, nb,
363  $ a(i*nb+1,1), lda, zero, work, n,
364  $ lcmp*nb )
365  END IF
366  END IF
367  30 CONTINUE
368 *
369 * Get diagonal blocks of A for each column of the template
370 *
371  mcrow = mod( mod(mrcol,nprow)+iarow, nprow )
372  IF( lcmq.GT.1 ) THEN
373  mccol = mod( npcol+mycol-iccol, npcol )
374  CALL pbstrget( icontxt, 'Row', n, mq0, iceil(m,nb), work, n,
375  $ mcrow, mccol, igd, myrow, mycol, nprow,
376  $ npcol )
377  END IF
378 *
379 * Broadcast a row block of WORK in every row of template
380 *
381  IF( icrow.EQ.-1 ) THEN
382  IF( myrow.EQ.mcrow ) THEN
383  IF( lcmq.GT.1 )
384  $ CALL pbstrsrt( icontxt, 'Row', n, mq, nb, work, n, beta,
385  $ c, ldc, lcmp, lcmq, mq0 )
386  CALL sgebs2d( icontxt, 'Col', '1-tree', n, mq, c, ldc )
387  ELSE
388  CALL sgebr2d( icontxt, 'Col', '1-tree', n, mq, c, ldc,
389  $ mcrow, mycol )
390  END IF
391 *
392 * Send a row block of WORK to the destination row
393 *
394  ELSE
395  IF( lcmq.EQ.1 ) THEN
396  IF( myrow.EQ.mcrow ) THEN
397  IF( myrow.NE.icrow )
398  $ CALL sgesd2d( icontxt, n, mq, work, n, icrow, mycol )
399  ELSE IF( myrow.EQ.icrow ) THEN
400  IF( beta.EQ.zero ) THEN
401  CALL sgerv2d( icontxt, n, mq, c, ldc, mcrow, mycol )
402  ELSE
403  CALL sgerv2d( icontxt, n, mq, work, n, mcrow, mycol )
404  CALL pbsmatadd( icontxt, 'G', n, mq, one, work, n,
405  $ beta, c, ldc )
406  END IF
407  END IF
408 *
409  ELSE
410  ml = mq0 * min( lcmq, max(0,iceil(m,nb)-mccol) )
411  IF( myrow.EQ.mcrow ) THEN
412  IF( myrow.NE.icrow )
413  $ CALL sgesd2d( icontxt, n, ml, work, n, icrow, mycol )
414  ELSE IF( myrow.EQ.icrow ) THEN
415  CALL sgerv2d( icontxt, n, ml, work, n, mcrow, mycol )
416  END IF
417 *
418  IF( myrow.EQ.icrow )
419  $ CALL pbstrsrt( icontxt, 'Row', n, mq, nb, work, n, beta,
420  $ c, ldc, lcmp, lcmq, mq0 )
421  END IF
422  END IF
423 *
424  END IF
425 *
426 * When A is a row block
427 *
428  ELSE
429 *
430 * Form C <== A' ( A is a row block )
431 * _
432 * | |
433 * | |
434 * | | _____________
435 * |C| <== |______A______|
436 * | |
437 * | |
438 * |_|
439 *
440 * MRROW : row relative position in template from ICROW
441 * MRCOL : column relative position in template from IACOL
442 *
443  mrrow = mod( nprow+myrow-icrow, nprow )
444  mrcol = mod( npcol+mycol-iacol, npcol )
445  jccol = iccol
446  IF( iccol.EQ.-1 ) jccol = iacol
447 *
448  np = numroc( n, nb, myrow, icrow, nprow )
449  nq = numroc( n, nb, mycol, iacol, npcol )
450  np0 = numroc( numroc(n, nb, 0, 0, nprow), nb, 0, 0, lcmp )
451 *
452  IF( lda.LT.m .AND.
453  $ ( iarow.EQ.myrow .OR. iarow.EQ.-1 ) ) THEN
454  info = 8
455  ELSE IF( ldc.LT.np .AND.
456  $ ( iccol.EQ.mycol .OR. iccol.EQ.-1 ) ) THEN
457  info = 11
458  END IF
459  IF( info.NE.0 ) GO TO 10
460 *
461 * When a row process of IAROW has a row block A,
462 *
463  IF( iarow.GE.0 ) THEN
464  tbeta = zero
465  IF( mycol.EQ.jccol ) tbeta = beta
466 *
467  DO 40 i = 0, min( lcm, iceil(n,nb) ) - 1
468  mcrow = mod( mod(i, nprow) + icrow, nprow )
469  mccol = mod( mod(i, npcol) + iacol, npcol )
470  IF( lcmp.EQ.1 ) np0 = numroc( n, nb, i, 0, nprow )
471  idex = (i/nprow) * nb
472 *
473 * A source node copies the blocks to WORK, and send it
474 *
475  IF( myrow.EQ.iarow .AND. mycol.EQ.mccol ) THEN
476 *
477 * The source node is a destination node
478 *
479  jdex = (i/npcol) * nb
480  IF( myrow.EQ.mcrow .AND. mycol.EQ.jccol ) THEN
481  CALL pbstr2at( icontxt, 'Row', trans, m, nq-jdex, nb,
482  $ a(1,jdex+1), lda, tbeta, c(idex+1,1),
483  $ ldc, lcmp, lcmq )
484 *
485 * The source node sends blocks to a destination node
486 *
487  ELSE
488  CALL pbstr2bt( icontxt, 'Row', trans, m, nq-jdex, nb,
489  $ a(1,jdex+1), lda, zero, work, np0,
490  $ lcmq*nb )
491  CALL sgesd2d( icontxt, np0, m, work, np0,
492  $ mcrow, jccol )
493  END IF
494 *
495 * A destination node receives the copied blocks
496 *
497  ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jccol ) THEN
498  IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN
499  CALL sgerv2d( icontxt, np0, m, c, ldc, iarow, mccol )
500  ELSE
501  CALL sgerv2d( icontxt, np0, m, work, np0, iarow, mccol )
502  CALL pbstr2af( icontxt, 'Col', np-idex, m, nb, work,
503  $ np0, tbeta, c(idex+1,1), ldc, lcmp, lcmq,
504  $ np0 )
505  END IF
506  END IF
507  40 CONTINUE
508 *
509 * Broadcast a column block of WORK in each row of template
510 *
511  IF( iccol.EQ.-1 ) THEN
512  IF( mycol.EQ.jccol ) THEN
513  CALL sgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )
514  ELSE
515  CALL sgebr2d( icontxt, 'Row', '1-tree', np, m, c, ldc,
516  $ myrow, jccol )
517  END IF
518  END IF
519 *
520 * When all row procesors have a copy of the row block A,
521 *
522  ELSE
523  IF( lcmp.EQ.1 ) np0 = np
524 *
525 * Processors, which have diagonal blocks of A, copy them to
526 * WORK array in transposed form
527 *
528  DO 50 i = 0, lcmq-1
529  IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
530  IF( lcmp.EQ.1.AND.(iccol.EQ.-1.OR.iccol.EQ.mycol) ) THEN
531  CALL pbstr2bt( icontxt, 'Row', trans, m, nq-i*nb, nb,
532  $ a(1,i*nb+1), lda, beta, c, ldc,
533  $ lcmq*nb )
534  ELSE
535  CALL pbstr2bt( icontxt, 'Row', trans, m, nq-i*nb, nb,
536  $ a(1,i*nb+1), lda, zero, work, np0,
537  $ lcmq*nb )
538  END IF
539  END IF
540  50 CONTINUE
541 *
542 * Get diagonal blocks of A for each row of the template
543 *
544  mccol = mod( mod(mrrow, npcol)+iacol, npcol )
545  IF( lcmp.GT.1 ) THEN
546  mcrow = mod( nprow+myrow-icrow, nprow )
547  CALL pbstrget( icontxt, 'Col', np0, m, iceil(n,nb), work,
548  $ np0, mcrow, mccol, igd, myrow, mycol, nprow,
549  $ npcol )
550  END IF
551 *
552 * Broadcast a column block of WORK in every column of template
553 *
554  IF( iccol.EQ.-1 ) THEN
555  IF( mycol.EQ.mccol ) THEN
556  IF( lcmp.GT.1 )
557  $ CALL pbstrsrt( icontxt, 'Col', np, m, nb, work, np0,
558  $ beta, c, ldc, lcmp, lcmq, np0 )
559  CALL sgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )
560  ELSE
561  CALL sgebr2d( icontxt, 'Row', '1-tree', np, m, c, ldc,
562  $ myrow, mccol )
563  END IF
564 *
565 * Send a column block of WORK to the destination column
566 *
567  ELSE
568  IF( lcmp.EQ.1 ) THEN
569  IF( mycol.EQ.mccol ) THEN
570  IF( mycol.NE.iccol )
571  $ CALL sgesd2d( icontxt, np, m, work, np, myrow, iccol )
572  ELSE IF( mycol.EQ.iccol ) THEN
573  IF( beta.EQ.zero ) THEN
574  CALL sgerv2d( icontxt, np, m, c, ldc, myrow, mccol )
575  ELSE
576  CALL sgerv2d( icontxt, np, m, work, np, myrow, mccol )
577  CALL pbsmatadd( icontxt, 'G', np, m, one, work, np,
578  $ beta, c, ldc )
579  END IF
580  END IF
581 *
582  ELSE
583  ml = m * min( lcmp, max( 0, iceil(n,nb) - mcrow ) )
584  IF( mycol.EQ.mccol ) THEN
585  IF( mycol.NE.iccol )
586  $ CALL sgesd2d( icontxt, np0, ml, work, np0,
587  $ myrow, iccol )
588  ELSE IF( mycol.EQ.iccol ) THEN
589  CALL sgerv2d( icontxt, np0, ml, work, np0,
590  $ myrow, mccol )
591  END IF
592 *
593  IF( mycol.EQ.iccol )
594  $ CALL pbstrsrt( icontxt, 'Col', np, m, nb, work, np0,
595  $ beta, c, ldc, lcmp, lcmq, np0 )
596  END IF
597  END IF
598 *
599  END IF
600  END IF
601 *
602  RETURN
603 *
604 * End of PBSTRAN
605 *
606  END
607 *
608 *=======================================================================
609 * SUBROUTINE PBSTR2AT
610 *=======================================================================
611 *
612  SUBROUTINE pbstr2at( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA,
613  $ BETA, B, LDB, LCMP, LCMQ )
614 *
615 * -- PB-BLAS routine (version 2.1) --
616 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
617 * April 28, 1996
618 *
619 * .. Scalar Arguments ..
620  CHARACTER*1 ADIST, TRANS
621  INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB
622  REAL BETA
623 * ..
624 * .. Array Arguments ..
625  REAL A( LDA, * ), B( LDB, * )
626 * ..
627 *
628 * Purpose
629 * =======
630 *
631 * PBSTR2AT forms B <== A^T + beta*B, or A^C + beta*B
632 * B is a ((conjugate) transposed) scattered block row (or column),
633 * copied from a scattered block column (or row) of A
634 *
635 * =====================================================================
636 *
637 * .. Parameters ..
638  REAL ONE
639  PARAMETER ( ONE = 1.0e+0 )
640 * ..
641 * .. Local Scalars ..
642  INTEGER IA, IB, K, INTV, JNTV
643 * ..
644 * .. External Subroutines ..
645  EXTERNAL pbsmatadd
646 * ..
647 * .. External Functions ..
648  LOGICAL LSAME
649  INTEGER ICEIL
650  EXTERNAL lsame, iceil
651 * ..
652 * .. Intrinsic Functions ..
653  INTRINSIC min
654 * ..
655 * .. Excutable Statements ..
656 *
657  IF( lcmp.EQ.lcmq ) THEN
658  CALL pbsmatadd( icontxt, trans, n, m, one, a, lda, beta, b,
659  $ ldb )
660 *
661  ELSE
662 *
663 * If A is a column block ( ADIST = 'C' ),
664 *
665  IF( lsame( adist, 'C' ) ) THEN
666  intv = lcmp * nb
667  jntv = lcmq * nb
668  ia = 1
669  ib = 1
670  DO 10 k = 1, iceil( m, intv )
671  CALL pbsmatadd( icontxt, trans, n, min( m-ia+1, nb ),
672  $ one, a(ia,1), lda, beta, b(1,ib), ldb )
673  ia = ia + intv
674  ib = ib + jntv
675  10 CONTINUE
676 *
677 * If A is a row block ( ADIST = 'R' ),
678 *
679  ELSE
680  intv = lcmp * nb
681  jntv = lcmq * nb
682  ia = 1
683  ib = 1
684  DO 20 k = 1, iceil( n, jntv )
685  CALL pbsmatadd( icontxt, trans, min( n-ia+1, nb ), m,
686  $ one, a(1,ia), lda, beta, b(ib,1), ldb )
687  ia = ia + jntv
688  ib = ib + intv
689  20 CONTINUE
690  END IF
691  END IF
692 *
693  RETURN
694 *
695 * End of PBSTR2AT
696 *
697  END
698 *
699 *=======================================================================
700 * SUBROUTINE PBSTR2BT
701 *=======================================================================
702 *
703  SUBROUTINE pbstr2bt( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA,
704  $ BETA, B, LDB, INTV )
705 *
706 * -- PB-BLAS routine (version 2.1) --
707 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
708 * April 28, 1996
709 *
710 * .. Scalar Arguments ..
711  CHARACTER*1 ADIST, TRANS
712  INTEGER ICONTXT, INTV, LDA, LDB, M, N, NB
713  REAL BETA
714 * ..
715 * .. Array Arguments ..
716  REAL A( LDA, * ), B( LDB, * )
717 * ..
718 *
719 * Purpose
720 * =======
721 *
722 * PBSTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a
723 * ((conjugate) transposed) condensed block row (or column), copied from
724 * a scattered block column (or row) of A
725 *
726 * =====================================================================
727 *
728 * .. Parameters ..
729  REAL ONE
730  PARAMETER ( ONE = 1.0e+0 )
731 * ..
732 * .. Local Scalars ..
733  INTEGER IA, IB, K
734 * ..
735 * .. External Functions ..
736  LOGICAL LSAME
737  INTEGER ICEIL
738  EXTERNAL LSAME, ICEIL
739 * ..
740 * .. External Subroutines ..
741  EXTERNAL pbsmatadd
742 * ..
743 * .. Intrinsic Functions ..
744  INTRINSIC min
745 * ..
746 * .. Excutable Statements ..
747 *
748  IF( intv.EQ.nb ) THEN
749  CALL pbsmatadd( icontxt, trans, n, m, one, a, lda, beta, b,
750  $ ldb )
751 *
752  ELSE
753 *
754 * If A is a column block ( ADIST = 'C' ),
755 *
756  IF( lsame( adist, 'C' ) ) THEN
757  ia = 1
758  ib = 1
759  DO 10 k = 1, iceil( m, intv )
760  CALL pbsmatadd( icontxt, trans, n, min( m-ia+1, nb ),
761  $ one, a(ia,1), lda, beta, b(1,ib), ldb )
762  ia = ia + intv
763  ib = ib + nb
764  10 CONTINUE
765 *
766 * If A is a row block (ADIST = 'R'),
767 *
768  ELSE
769  ia = 1
770  ib = 1
771  DO 20 k = 1, iceil( n, intv )
772  CALL pbsmatadd( icontxt, trans, min( n-ia+1, nb ), m,
773  $ one, a(1,ia), lda, beta, b(ib,1), ldb )
774  ia = ia + intv
775  ib = ib + nb
776  20 CONTINUE
777  END IF
778  END IF
779 *
780  RETURN
781 *
782 * End of PBSTR2BT
783 *
784  END
785 *
786 *=======================================================================
787 * SUBROUTINE PBSTR2AF
788 *=======================================================================
789 *
790  SUBROUTINE pbstr2af( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B,
791  $ LDB, LCMP, LCMQ, NINT )
792 *
793 * -- PB-BLAS routine (version 2.1) --
794 * University of Tennessee, Knoxville, Oak Ridge National Laboratory.
795 * April 28, 1996
796 *
797 * .. Scalar Arguments ..
798  CHARACTER*1 ADIST
799  INTEGER ICONTXT, M, N, NB, LDA, LDB, LCMP, LCMQ, NINT
800  REAL BETA
801 * ..
802 * .. Array Arguments ..
803  REAL A( LDA, * ), B( LDB, * )
804 * ..
805 *
806 * Purpose
807 * =======
808 *
809 * PBSTR2AF forms T <== A + BETA*T, where T is a scattered block
810 * row (or column) copied from a (condensed) block column (or row) of A
811 *
812 * =====================================================================
813 *
814 * .. Parameters ..
815  REAL ONE
816  PARAMETER ( ONE = 1.0e+0 )
817 * ..
818 * .. Local Scalars ..
819  INTEGER JA, JB, K, INTV
820 * ..
821 * .. External Functions ..
822  LOGICAL LSAME
823  INTEGER ICEIL
824  EXTERNAL LSAME, ICEIL
825 * ..
826 * .. Intrinsic Functions ..
827  INTRINSIC min
828 * ..
829 * .. Executable Statements ..
830 *
831  IF( lsame( adist, 'R' ) ) THEN
832  intv = nb * lcmq
833  ja = 1
834  jb = 1
835  DO 10 k = 1, iceil( nint, nb )
836  CALL pbsmatadd( icontxt, 'G', m, min( n-jb+1, nb ), one,
837  $ a(1,ja), lda, beta, b(1,jb), ldb )
838  ja = ja + nb
839  jb = jb + intv
840  10 CONTINUE
841 *
842 * if( LSAME( ADIST, 'C' ) ) then
843 *
844  ELSE
845  intv = nb * lcmp
846  ja = 1
847  jb = 1
848  DO 20 k = 1, iceil( nint, nb )
849  CALL pbsmatadd( icontxt, 'G', min( m-jb+1, nb ), n, one,
850  $ a(ja,1), lda, beta, b(jb,1), ldb )
851  ja = ja + nb
852  jb = jb + intv
853  20 CONTINUE
854  END IF
855 *
856  RETURN
857 *
858 * End of PBSTR2AF
859 *
860  END
max
#define max(A, B)
Definition: pcgemr.c:180
pbstr2bt
subroutine pbstr2bt(ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, B, LDB, INTV)
Definition: pbstran.f:705
pbstr2at
subroutine pbstr2at(ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, B, LDB, LCMP, LCMQ)
Definition: pbstran.f:614
pbstran
subroutine pbstran(ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK)
Definition: pbstran.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pbstrsrt
subroutine pbstrsrt(ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B, LDB, LCMP, LCMQ, NINT)
Definition: pbstrsrt.f:3
pbstrget
subroutine pbstrget(ICONTXT, ADIST, M, N, MNB, A, LDA, MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW, NPCOL)
Definition: pbstrget.f:3
pbsmatadd
subroutine pbsmatadd(ICONTXT, MODE, M, N, ALPHA, A, LDA, BETA, B, LDB)
Definition: pbsmatadd.f:3
pbstr2af
subroutine pbstr2af(ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B, LDB, LCMP, LCMQ, NINT)
Definition: pbstran.f:792
min
#define min(A, B)
Definition: pcgemr.c:181