SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pbdtran.f
Go to the documentation of this file.
1 SUBROUTINE pbdtran( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA,
2 $ C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK )
3*
4* -- PB-BLAS routine (version 2.1) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
6* April 28, 1996
7*
8* Jaeyoung Choi, Oak Ridge National Laboratory
9* Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
10* David Walker, Oak Ridge National Laboratory
11*
12* .. Scalar Arguments ..
13 CHARACTER*1 ADIST, TRANS
14 INTEGER IACOL, IAROW, ICCOL, ICONTXT, ICROW, LDA, LDC,
15 $ m, n, nb
16 DOUBLE PRECISION BETA
17* ..
18* .. Array Arguments ..
19 DOUBLE PRECISION A( LDA, * ), C( LDC, * ), WORK( * )
20* ..
21*
22* Purpose
23* =======
24*
25* PBDTRAN transposes a column block to row block, or a row block to
26* column block by reallocating data distribution.
27*
28* C := A^T + beta*C, or C := A^C + beta*C
29*
30* where A is an M-by-N matrix and C is an N-by-M matrix, and the size
31* of M or N is limited to its block size NB.
32*
33* The first elements of the matrices A, and C should be located at
34* the beginnings of their first blocks. (not the middle of the blocks.)
35*
36* Parameters
37* ==========
38*
39* ICONTXT (input) INTEGER
40* ICONTXT is the BLACS mechanism for partitioning communication
41* space. A defining property of a context is that a message in
42* a context cannot be sent or received in another context. The
43* BLACS context includes the definition of a grid, and each
44* process' coordinates in it.
45*
46* ADIST - (input) CHARACTER*1
47* ADIST specifies whether A is a column block or a row block.
48*
49* ADIST = 'C', A is a column block
50* ADIST = 'R', A is a row block
51*
52* TRANS - (input) CHARACTER*1
53* TRANS specifies whether the transposed format is transpose
54* or conjugate transpose. If the matrices A and C are real,
55* the argument is ignored.
56*
57* TRANS = 'T', transpose
58* TRANS = 'C', conjugate transpose
59*
60* M - (input) INTEGER
61* M specifies the (global) number of rows of the matrix (block
62* column or block row) A and of columns of the matrix C.
63* M >= 0.
64*
65* N - (input) INTEGER
66* N specifies the (global) number of columns of the matrix
67* (block column or block row) A and of columns of the matrix
68* C. N >= 0.
69*
70* NB - (input) INTEGER
71* NB specifies the column block size of the matrix A and the
72* row block size of the matrix C when ADIST = 'C'. Otherwise,
73* it specifies the row block size of the matrix A and the
74* column block size of the matrix C. NB >= 1.
75*
76* A (input) DOUBLE PRECISION array of DIMENSION ( LDA, Lx ),
77* where Lx is N when ADIST = 'C', or Nq when ADIST = 'R'.
78* Before entry with ADIST = 'C', the leading Mp by N part of
79* the array A must contain the matrix A, otherwise the leading
80* M by Nq part of the array A must contain the matrix A. See
81* parameter details for the values of Mp and Nq.
82*
83* LDA (input) INTEGER
84* LDA specifies the leading dimension of (local) A as declared
85* in the calling (sub) program. LDA >= MAX(1,Mp) when
86* ADIST = 'C', or LDA >= MAX(1,M) otherwise.
87*
88* BETA (input) DOUBLE PRECISION
89* BETA specifies scaler beta.
90*
91* C (input/output) DOUBLE PRECISION array of DIMENSION
92* ( LDC, Lx ),
93* where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'.
94* If ADIST = 'C', the leading N-by-Mq part of the array C
95* contains the (local) matrix C, otherwise the leading
96* Np-by-M part of the array C must contain the (local) matrix
97* C. C will not be referenced if beta is zero.
98*
99* LDC (input) INTEGER
100* LDC specifies the leading dimension of (local) C as declared
101* in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C',
102* or LDC >= MAX(1,Np) otherwise.
103*
104* IAROW (input) INTEGER
105* IAROW specifies a row of the process template,
106* which holds the first block of the matrix A. If A is a row
107* of blocks (ADIST = 'R') and all rows of processes have a copy
108* of A, then set IAROW = -1.
109*
110* IACOL (input) INTEGER
111* IACOL specifies a column of the process template,
112* which holds the first block of the matrix A. If A is a
113* column of blocks (ADIST = 'C') and all columns of processes
114* have a copy of A, then set IACOL = -1.
115*
116* ICROW (input) INTEGER
117* ICROW specifies the current row process which holds
118* the first block of the matrix C, which is transposed of A.
119* If C is a row of blocks (ADIST = 'C') and the transposed
120* row block C is distributed all rows of processes, set
121* ICROW = -1.
122*
123* ICCOL (input) INTEGER
124* ICCOL specifies the current column process which holds
125* the first block of the matrix C, which is transposed of A.
126* If C is a column of blocks (ADIST = 'R') and the transposed
127* column block C is distributed all columns of processes,
128* set ICCOL = -1.
129*
130* WORK (workspace) DOUBLE PRECISION array of dimension Size(WORK).
131* It needs extra working space of A'.
132*
133* Parameters Details
134* ==================
135*
136* Lx It is a local portion of L owned by a process, (L is
137* replaced by M, or N, and x is replaced by either p (=NPROW)
138* or q (=NPCOL)). The value is determined by L, LB, x, and
139* MI, where LB is a block size and MI is a row or column
140* position in a process template. Lx is equal to or less
141* than Lx0 = CEIL( L, LB*x ) * LB.
142*
143* Communication Scheme
144* ====================
145*
146* The communication scheme of the routine is set to '1-tree', which is
147* fan-out. (For details, see BLACS user's guide.)
148*
149* Memory Requirement of WORK
150* ==========================
151*
152* Mqb = CEIL( M, NB*NPCOL )
153* Npb = CEIL( N, NB*NPROW )
154* LCMQ = LCM / NPCOL
155* LCMP = LCM / NPROW
156*
157* (1) ADIST = 'C'
158* (a) IACOL != -1
159* Size(WORK) = N * CEIL(Mqb,LCMQ)*NB
160* (b) IACOL = -1
161* Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB))
162*
163* (2) ADIST = 'R'
164* (a) IAROW != -1
165* Size(WORK) = M * CEIL(Npb,LCMP)*NB
166* (b) IAROW = -1
167* Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB))
168*
169* Notes
170* -----
171* More precise space can be computed as
172*
173* CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ )
174* CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP )
175*
176* =====================================================================
177*
178* ..
179* .. Parameters ..
180 DOUBLE PRECISION ONE, ZERO
181 parameter( one = 1.0d+0, zero = 0.0d+0 )
182* ..
183* .. Local Scalars ..
184 LOGICAL COLFORM, ROWFORM
185 INTEGER I, IDEX, IGD, INFO, JCCOL, JCROW, JDEX, LCM,
186 $ lcmp, lcmq, mccol, mcrow, ml, mp, mq, mq0,
187 $ mrcol, mrrow, mycol, myrow, np, np0, npcol,
188 $ nprow, nq
189 DOUBLE PRECISION TBETA
190* ..
191* .. External Functions ..
192 LOGICAL LSAME
193 INTEGER ILCM, ICEIL, NUMROC
194 EXTERNAL ilcm, iceil, lsame, numroc
195* ..
196* .. External Subroutines ..
197 EXTERNAL blacs_gridinfo, dgebr2d, dgebs2d, dgerv2d,
198 $ dgesd2d, pbdmatadd, pbdtr2af, pbdtr2at,
200* ..
201* .. Intrinsic Functions ..
202 INTRINSIC max, min, mod
203* ..
204* .. Executable Statements ..
205*
206* Quick return if possible.
207*
208 IF( m.EQ.0 .OR. n.EQ.0 ) RETURN
209*
210 CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
211*
212 colform = lsame( adist, 'C' )
213 rowform = lsame( adist, 'R' )
214*
215* Test the input parameters.
216*
217 info = 0
218 IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
219 info = 2
220 ELSE IF( m .LT.0 ) THEN
221 info = 4
222 ELSE IF( n .LT.0 ) THEN
223 info = 5
224 ELSE IF( nb.LT.1 ) THEN
225 info = 6
226 ELSE IF( iarow.LT.-1 .OR. iarow.GE.nprow .OR.
227 $ ( iarow.EQ.-1 .AND. colform ) ) THEN
228 info = 12
229 ELSE IF( iacol.LT.-1 .OR. iacol.GE.npcol .OR.
230 $ ( iacol.EQ.-1 .AND. rowform ) ) THEN
231 info = 13
232 ELSE IF( icrow.LT.-1 .OR. icrow.GE.nprow .OR.
233 $ ( icrow.EQ.-1 .AND. rowform ) ) THEN
234 info = 14
235 ELSE IF( iccol.LT.-1 .OR. iccol.GE.npcol .OR.
236 $ ( iccol.EQ.-1 .AND. colform ) ) THEN
237 info = 15
238 END IF
239*
240 10 CONTINUE
241 IF( info .NE. 0 ) THEN
242 CALL pxerbla( icontxt, 'PBDTRAN ', info )
243 RETURN
244 END IF
245*
246* Start the operations.
247*
248* LCM : the least common multiple of NPROW and NPCOL
249*
250 lcm = ilcm( nprow, npcol )
251 lcmp = lcm / nprow
252 lcmq = lcm / npcol
253 igd = npcol / lcmp
254*
255* When A is a column block
256*
257 IF( colform ) THEN
258*
259* Form C <== A' ( A is a column block )
260* _
261* | |
262* | |
263* _____________ | |
264* |______C______| <== |A|
265* | |
266* | |
267* |_|
268*
269* MRROW : row relative position in template from IAROW
270* MRCOL : column relative position in template from ICCOL
271*
272 mrrow = mod( nprow+myrow-iarow, nprow )
273 mrcol = mod( npcol+mycol-iccol, npcol )
274 jcrow = icrow
275 IF( icrow.EQ.-1 ) jcrow = iarow
276*
277 mp = numroc( m, nb, myrow, iarow, nprow )
278 mq = numroc( m, nb, mycol, iccol, npcol )
279 mq0 = numroc( numroc(m, nb, 0, 0, npcol), nb, 0, 0, lcmq )
280*
281 IF( lda.LT.mp .AND.
282 $ ( iacol.EQ.mycol .OR. iacol.EQ.-1 ) ) THEN
283 info = 8
284 ELSE IF( ldc.LT.n .AND.
285 $ ( icrow.EQ.myrow .OR. icrow.EQ.-1 ) ) THEN
286 info = 11
287 END IF
288 IF( info.NE.0 ) GO TO 10
289*
290* When a column process of IACOL has a column block A,
291*
292 IF( iacol.GE.0 ) THEN
293 tbeta = zero
294 IF( myrow.EQ.jcrow ) tbeta = beta
295*
296 DO 20 i = 0, min( lcm, iceil(m,nb) ) - 1
297 mcrow = mod( mod(i, nprow) + iarow, nprow )
298 mccol = mod( mod(i, npcol) + iccol, npcol )
299 IF( lcmq.EQ.1 ) mq0 = numroc( m, nb, i, 0, npcol )
300 jdex = (i/npcol) * nb
301*
302* A source node copies the blocks to WORK, and send it
303*
304 IF( myrow.EQ.mcrow .AND. mycol.EQ.iacol ) THEN
305*
306* The source node is a destination node
307*
308 idex = (i/nprow) * nb
309 IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN
310 CALL pbdtr2at( icontxt, 'Col', trans, mp-idex, n, nb,
311 $ a(idex+1,1), lda, tbeta, c(1,jdex+1),
312 $ ldc, lcmp, lcmq )
313*
314* The source node sends blocks to a destination node
315*
316 ELSE
317 CALL pbdtr2bt( icontxt, 'Col', trans, mp-idex, n, nb,
318 $ a(idex+1,1), lda, zero, work, n,
319 $ lcmp*nb )
320 CALL dgesd2d( icontxt, n, mq0, work, n, jcrow, mccol )
321 END IF
322*
323* A destination node receives the copied blocks
324*
325 ELSE IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN
326 IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN
327 CALL dgerv2d( icontxt, n, mq0, c, ldc, mcrow, iacol )
328 ELSE
329 CALL dgerv2d( icontxt, n, mq0, work, n, mcrow, iacol )
330 CALL pbdtr2af( icontxt, 'Row', n, mq-jdex, nb, work, n,
331 $ tbeta, c(1,jdex+1), ldc, lcmp, lcmq,
332 $ mq0 )
333 END IF
334 END IF
335 20 CONTINUE
336*
337* Broadcast a row block of C in each column of template
338*
339 IF( icrow.EQ.-1 ) THEN
340 IF( myrow.EQ.jcrow ) THEN
341 CALL dgebs2d( icontxt, 'Col', '1-tree', n, mq, c, ldc )
342 ELSE
343 CALL dgebr2d( icontxt, 'Col', '1-tree', n, mq, c, ldc,
344 $ jcrow, mycol )
345 END IF
346 END IF
347*
348* When all column procesors have a copy of the column block A,
349*
350 ELSE
351 IF( lcmq.EQ.1 ) mq0 = mq
352*
353* Processors, which have diagonal blocks of A, copy them to
354* WORK array in transposed form
355*
356 DO 30 i = 0, lcmp-1
357 IF( mrcol.EQ.mod( nprow*i+mrrow, npcol ) ) THEN
358 IF( lcmq.EQ.1.AND.(icrow.EQ.-1.OR.icrow.EQ.myrow) ) THEN
359 CALL pbdtr2bt( icontxt, 'Col', trans, mp-i*nb, n, nb,
360 $ a(i*nb+1,1), lda, beta, c, ldc,
361 $ lcmp*nb )
362 ELSE
363 CALL pbdtr2bt( icontxt, 'Col', trans, mp-i*nb, n, nb,
364 $ a(i*nb+1,1), lda, zero, work, n,
365 $ lcmp*nb )
366 END IF
367 END IF
368 30 CONTINUE
369*
370* Get diagonal blocks of A for each column of the template
371*
372 mcrow = mod( mod(mrcol,nprow)+iarow, nprow )
373 IF( lcmq.GT.1 ) THEN
374 mccol = mod( npcol+mycol-iccol, npcol )
375 CALL pbdtrget( icontxt, 'Row', n, mq0, iceil(m,nb), work, n,
376 $ mcrow, mccol, igd, myrow, mycol, nprow,
377 $ npcol )
378 END IF
379*
380* Broadcast a row block of WORK in every row of template
381*
382 IF( icrow.EQ.-1 ) THEN
383 IF( myrow.EQ.mcrow ) THEN
384 IF( lcmq.GT.1 )
385 $ CALL pbdtrsrt( icontxt, 'Row', n, mq, nb, work, n, beta,
386 $ c, ldc, lcmp, lcmq, mq0 )
387 CALL dgebs2d( icontxt, 'Col', '1-tree', n, mq, c, ldc )
388 ELSE
389 CALL dgebr2d( icontxt, 'Col', '1-tree', n, mq, c, ldc,
390 $ mcrow, mycol )
391 END IF
392*
393* Send a row block of WORK to the destination row
394*
395 ELSE
396 IF( lcmq.EQ.1 ) THEN
397 IF( myrow.EQ.mcrow ) THEN
398 IF( myrow.NE.icrow )
399 $ CALL dgesd2d( icontxt, n, mq, work, n, icrow, mycol )
400 ELSE IF( myrow.EQ.icrow ) THEN
401 IF( beta.EQ.zero ) THEN
402 CALL dgerv2d( icontxt, n, mq, c, ldc, mcrow, mycol )
403 ELSE
404 CALL dgerv2d( icontxt, n, mq, work, n, mcrow, mycol )
405 CALL pbdmatadd( icontxt, 'G', n, mq, one, work, n,
406 $ beta, c, ldc )
407 END IF
408 END IF
409*
410 ELSE
411 ml = mq0 * min( lcmq, max(0,iceil(m,nb)-mccol) )
412 IF( myrow.EQ.mcrow ) THEN
413 IF( myrow.NE.icrow )
414 $ CALL dgesd2d( icontxt, n, ml, work, n, icrow, mycol )
415 ELSE IF( myrow.EQ.icrow ) THEN
416 CALL dgerv2d( icontxt, n, ml, work, n, mcrow, mycol )
417 END IF
418*
419 IF( myrow.EQ.icrow )
420 $ CALL pbdtrsrt( icontxt, 'Row', n, mq, nb, work, n, beta,
421 $ c, ldc, lcmp, lcmq, mq0 )
422 END IF
423 END IF
424*
425 END IF
426*
427* When A is a row block
428*
429 ELSE
430*
431* Form C <== A' ( A is a row block )
432* _
433* | |
434* | |
435* | | _____________
436* |C| <== |______A______|
437* | |
438* | |
439* |_|
440*
441* MRROW : row relative position in template from ICROW
442* MRCOL : column relative position in template from IACOL
443*
444 mrrow = mod( nprow+myrow-icrow, nprow )
445 mrcol = mod( npcol+mycol-iacol, npcol )
446 jccol = iccol
447 IF( iccol.EQ.-1 ) jccol = iacol
448*
449 np = numroc( n, nb, myrow, icrow, nprow )
450 nq = numroc( n, nb, mycol, iacol, npcol )
451 np0 = numroc( numroc(n, nb, 0, 0, nprow), nb, 0, 0, lcmp )
452*
453 IF( lda.LT.m .AND.
454 $ ( iarow.EQ.myrow .OR. iarow.EQ.-1 ) ) THEN
455 info = 8
456 ELSE IF( ldc.LT.np .AND.
457 $ ( iccol.EQ.mycol .OR. iccol.EQ.-1 ) ) THEN
458 info = 11
459 END IF
460 IF( info.NE.0 ) GO TO 10
461*
462* When a row process of IAROW has a row block A,
463*
464 IF( iarow.GE.0 ) THEN
465 tbeta = zero
466 IF( mycol.EQ.jccol ) tbeta = beta
467*
468 DO 40 i = 0, min( lcm, iceil(n,nb) ) - 1
469 mcrow = mod( mod(i, nprow) + icrow, nprow )
470 mccol = mod( mod(i, npcol) + iacol, npcol )
471 IF( lcmp.EQ.1 ) np0 = numroc( n, nb, i, 0, nprow )
472 idex = (i/nprow) * nb
473*
474* A source node copies the blocks to WORK, and send it
475*
476 IF( myrow.EQ.iarow .AND. mycol.EQ.mccol ) THEN
477*
478* The source node is a destination node
479*
480 jdex = (i/npcol) * nb
481 IF( myrow.EQ.mcrow .AND. mycol.EQ.jccol ) THEN
482 CALL pbdtr2at( icontxt, 'Row', trans, m, nq-jdex, nb,
483 $ a(1,jdex+1), lda, tbeta, c(idex+1,1),
484 $ ldc, lcmp, lcmq )
485*
486* The source node sends blocks to a destination node
487*
488 ELSE
489 CALL pbdtr2bt( icontxt, 'Row', trans, m, nq-jdex, nb,
490 $ a(1,jdex+1), lda, zero, work, np0,
491 $ lcmq*nb )
492 CALL dgesd2d( icontxt, np0, m, work, np0,
493 $ mcrow, jccol )
494 END IF
495*
496* A destination node receives the copied blocks
497*
498 ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jccol ) THEN
499 IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN
500 CALL dgerv2d( icontxt, np0, m, c, ldc, iarow, mccol )
501 ELSE
502 CALL dgerv2d( icontxt, np0, m, work, np0, iarow, mccol )
503 CALL pbdtr2af( icontxt, 'Col', np-idex, m, nb, work,
504 $ np0, tbeta, c(idex+1,1), ldc, lcmp, lcmq,
505 $ np0 )
506 END IF
507 END IF
508 40 CONTINUE
509*
510* Broadcast a column block of WORK in each row of template
511*
512 IF( iccol.EQ.-1 ) THEN
513 IF( mycol.EQ.jccol ) THEN
514 CALL dgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )
515 ELSE
516 CALL dgebr2d( icontxt, 'Row', '1-tree', np, m, c, ldc,
517 $ myrow, jccol )
518 END IF
519 END IF
520*
521* When all row procesors have a copy of the row block A,
522*
523 ELSE
524 IF( lcmp.EQ.1 ) np0 = np
525*
526* Processors, which have diagonal blocks of A, copy them to
527* WORK array in transposed form
528*
529 DO 50 i = 0, lcmq-1
530 IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
531 IF( lcmp.EQ.1.AND.(iccol.EQ.-1.OR.iccol.EQ.mycol) ) THEN
532 CALL pbdtr2bt( icontxt, 'Row', trans, m, nq-i*nb, nb,
533 $ a(1,i*nb+1), lda, beta, c, ldc,
534 $ lcmq*nb )
535 ELSE
536 CALL pbdtr2bt( icontxt, 'Row', trans, m, nq-i*nb, nb,
537 $ a(1,i*nb+1), lda, zero, work, np0,
538 $ lcmq*nb )
539 END IF
540 END IF
541 50 CONTINUE
542*
543* Get diagonal blocks of A for each row of the template
544*
545 mccol = mod( mod(mrrow, npcol)+iacol, npcol )
546 IF( lcmp.GT.1 ) THEN
547 mcrow = mod( nprow+myrow-icrow, nprow )
548 CALL pbdtrget( icontxt, 'Col', np0, m, iceil(n,nb), work,
549 $ np0, mcrow, mccol, igd, myrow, mycol, nprow,
550 $ npcol )
551 END IF
552*
553* Broadcast a column block of WORK in every column of template
554*
555 IF( iccol.EQ.-1 ) THEN
556 IF( mycol.EQ.mccol ) THEN
557 IF( lcmp.GT.1 )
558 $ CALL pbdtrsrt( icontxt, 'Col', np, m, nb, work, np0,
559 $ beta, c, ldc, lcmp, lcmq, np0 )
560 CALL dgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )
561 ELSE
562 CALL dgebr2d( icontxt, 'Row', '1-tree', np, m, c, ldc,
563 $ myrow, mccol )
564 END IF
565*
566* Send a column block of WORK to the destination column
567*
568 ELSE
569 IF( lcmp.EQ.1 ) THEN
570 IF( mycol.EQ.mccol ) THEN
571 IF( mycol.NE.iccol )
572 $ CALL dgesd2d( icontxt, np, m, work, np, myrow, iccol )
573 ELSE IF( mycol.EQ.iccol ) THEN
574 IF( beta.EQ.zero ) THEN
575 CALL dgerv2d( icontxt, np, m, c, ldc, myrow, mccol )
576 ELSE
577 CALL dgerv2d( icontxt, np, m, work, np, myrow, mccol )
578 CALL pbdmatadd( icontxt, 'G', np, m, one, work, np,
579 $ beta, c, ldc )
580 END IF
581 END IF
582*
583 ELSE
584 ml = m * min( lcmp, max( 0, iceil(n,nb) - mcrow ) )
585 IF( mycol.EQ.mccol ) THEN
586 IF( mycol.NE.iccol )
587 $ CALL dgesd2d( icontxt, np0, ml, work, np0,
588 $ myrow, iccol )
589 ELSE IF( mycol.EQ.iccol ) THEN
590 CALL dgerv2d( icontxt, np0, ml, work, np0,
591 $ myrow, mccol )
592 END IF
593*
594 IF( mycol.EQ.iccol )
595 $ CALL pbdtrsrt( icontxt, 'Col', np, m, nb, work, np0,
596 $ beta, c, ldc, lcmp, lcmq, np0 )
597 END IF
598 END IF
599*
600 END IF
601 END IF
602*
603 RETURN
604*
605* End of PBDTRAN
606*
607 END
608*
609*=======================================================================
610* SUBROUTINE PBDTR2AT
611*=======================================================================
612*
613 SUBROUTINE pbdtr2at( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA,
614 $ BETA, B, LDB, LCMP, LCMQ )
615*
616* -- PB-BLAS routine (version 2.1) --
617* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
618* April 28, 1996
619*
620* .. Scalar Arguments ..
621 CHARACTER*1 ADIST, TRANS
622 INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB
623 DOUBLE PRECISION BETA
624* ..
625* .. Array Arguments ..
626 DOUBLE PRECISION A( LDA, * ), B( LDB, * )
627* ..
628*
629* Purpose
630* =======
631*
632* PBDTR2AT forms B <== A^T + beta*B, or A^C + beta*B
633* B is a ((conjugate) transposed) scattered block row (or column),
634* copied from a scattered block column (or row) of A
635*
636* =====================================================================
637*
638* .. Parameters ..
639 DOUBLE PRECISION ONE
640 PARAMETER ( ONE = 1.0d+0 )
641* ..
642* .. Local Scalars ..
643 INTEGER IA, IB, K, INTV, JNTV
644* ..
645* .. External Subroutines ..
646 EXTERNAL pbdmatadd
647* ..
648* .. External Functions ..
649 LOGICAL LSAME
650 INTEGER ICEIL
651 EXTERNAL lsame, iceil
652* ..
653* .. Intrinsic Functions ..
654 INTRINSIC min
655* ..
656* .. Excutable Statements ..
657*
658 IF( lcmp.EQ.lcmq ) THEN
659 CALL pbdmatadd( icontxt, trans, n, m, one, a, lda, beta, b,
660 $ ldb )
661*
662 ELSE
663*
664* If A is a column block ( ADIST = 'C' ),
665*
666 IF( lsame( adist, 'C' ) ) THEN
667 intv = lcmp * nb
668 jntv = lcmq * nb
669 ia = 1
670 ib = 1
671 DO 10 k = 1, iceil( m, intv )
672 CALL pbdmatadd( icontxt, trans, n, min( m-ia+1, nb ),
673 $ one, a(ia,1), lda, beta, b(1,ib), ldb )
674 ia = ia + intv
675 ib = ib + jntv
676 10 CONTINUE
677*
678* If A is a row block ( ADIST = 'R' ),
679*
680 ELSE
681 intv = lcmp * nb
682 jntv = lcmq * nb
683 ia = 1
684 ib = 1
685 DO 20 k = 1, iceil( n, jntv )
686 CALL pbdmatadd( icontxt, trans, min( n-ia+1, nb ), m,
687 $ one, a(1,ia), lda, beta, b(ib,1), ldb )
688 ia = ia + jntv
689 ib = ib + intv
690 20 CONTINUE
691 END IF
692 END IF
693*
694 RETURN
695*
696* End of PBDTR2AT
697*
698 END
699*
700*=======================================================================
701* SUBROUTINE PBDTR2BT
702*=======================================================================
703*
704 SUBROUTINE pbdtr2bt( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA,
705 $ BETA, B, LDB, INTV )
706*
707* -- PB-BLAS routine (version 2.1) --
708* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
709* April 28, 1996
710*
711* .. Scalar Arguments ..
712 CHARACTER*1 ADIST, TRANS
713 INTEGER ICONTXT, INTV, LDA, LDB, M, N, NB
714 DOUBLE PRECISION BETA
715* ..
716* .. Array Arguments ..
717 DOUBLE PRECISION A( LDA, * ), B( LDB, * )
718* ..
719*
720* Purpose
721* =======
722*
723* PBDTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a
724* ((conjugate) transposed) condensed block row (or column), copied from
725* a scattered block column (or row) of A
726*
727* =====================================================================
728*
729* .. Parameters ..
730 DOUBLE PRECISION ONE
731 PARAMETER ( ONE = 1.0d+0 )
732* ..
733* .. Local Scalars ..
734 INTEGER IA, IB, K
735* ..
736* .. External Functions ..
737 LOGICAL LSAME
738 INTEGER ICEIL
739 EXTERNAL LSAME, ICEIL
740* ..
741* .. External Subroutines ..
742 EXTERNAL pbdmatadd
743* ..
744* .. Intrinsic Functions ..
745 INTRINSIC min
746* ..
747* .. Excutable Statements ..
748*
749 IF( intv.EQ.nb ) THEN
750 CALL pbdmatadd( icontxt, trans, n, m, one, a, lda, beta, b,
751 $ ldb )
752*
753 ELSE
754*
755* If A is a column block ( ADIST = 'C' ),
756*
757 IF( lsame( adist, 'C' ) ) THEN
758 ia = 1
759 ib = 1
760 DO 10 k = 1, iceil( m, intv )
761 CALL pbdmatadd( icontxt, trans, n, min( m-ia+1, nb ),
762 $ one, a(ia,1), lda, beta, b(1,ib), ldb )
763 ia = ia + intv
764 ib = ib + nb
765 10 CONTINUE
766*
767* If A is a row block (ADIST = 'R'),
768*
769 ELSE
770 ia = 1
771 ib = 1
772 DO 20 k = 1, iceil( n, intv )
773 CALL pbdmatadd( icontxt, trans, min( n-ia+1, nb ), m,
774 $ one, a(1,ia), lda, beta, b(ib,1), ldb )
775 ia = ia + intv
776 ib = ib + nb
777 20 CONTINUE
778 END IF
779 END IF
780*
781 RETURN
782*
783* End of PBDTR2BT
784*
785 END
786*
787*=======================================================================
788* SUBROUTINE PBDTR2AF
789*=======================================================================
790*
791 SUBROUTINE pbdtr2af( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B,
792 $ LDB, LCMP, LCMQ, NINT )
793*
794* -- PB-BLAS routine (version 2.1) --
795* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
796* April 28, 1996
797*
798* .. Scalar Arguments ..
799 CHARACTER*1 ADIST
800 INTEGER ICONTXT, M, N, NB, LDA, LDB, LCMP, LCMQ, NINT
801 DOUBLE PRECISION BETA
802* ..
803* .. Array Arguments ..
804 DOUBLE PRECISION A( LDA, * ), B( LDB, * )
805* ..
806*
807* Purpose
808* =======
809*
810* PBDTR2AF forms T <== A + BETA*T, where T is a scattered block
811* row (or column) copied from a (condensed) block column (or row) of A
812*
813* =====================================================================
814*
815* .. Parameters ..
816 DOUBLE PRECISION ONE
817 PARAMETER ( ONE = 1.0d+0 )
818* ..
819* .. Local Scalars ..
820 INTEGER JA, JB, K, INTV
821* ..
822* .. External Functions ..
823 LOGICAL LSAME
824 INTEGER ICEIL
825 EXTERNAL LSAME, ICEIL
826* ..
827* .. Intrinsic Functions ..
828 INTRINSIC min
829* ..
830* .. Executable Statements ..
831*
832 IF( lsame( adist, 'R' ) ) THEN
833 intv = nb * lcmq
834 ja = 1
835 jb = 1
836 DO 10 k = 1, iceil( nint, nb )
837 CALL pbdmatadd( icontxt, 'G', m, min( n-jb+1, nb ), one,
838 $ a(1,ja), lda, beta, b(1,jb), ldb )
839 ja = ja + nb
840 jb = jb + intv
841 10 CONTINUE
842*
843* if( LSAME( ADIST, 'C' ) ) then
844*
845 ELSE
846 intv = nb * lcmp
847 ja = 1
848 jb = 1
849 DO 20 k = 1, iceil( nint, nb )
850 CALL pbdmatadd( icontxt, 'G', min( m-jb+1, nb ), n, one,
851 $ a(ja,1), lda, beta, b(jb,1), ldb )
852 ja = ja + nb
853 jb = jb + intv
854 20 CONTINUE
855 END IF
856*
857 RETURN
858*
859* End of PBDTR2AF
860*
861 END
subroutine pbdmatadd(icontxt, mode, m, n, alpha, a, lda, beta, b, ldb)
Definition pbdmatadd.f:3
subroutine pbdtr2bt(icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, intv)
Definition pbdtran.f:706
subroutine pbdtr2af(icontxt, adist, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq, nint)
Definition pbdtran.f:793
subroutine pbdtran(icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
Definition pbdtran.f:3
subroutine pbdtr2at(icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq)
Definition pbdtran.f:615
subroutine pbdtrget(icontxt, adist, m, n, mnb, a, lda, mcrow, mccol, igd, myrow, mycol, nprow, npcol)
Definition pbdtrget.f:3
subroutine pbdtrsrt(icontxt, adist, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq, nint)
Definition pbdtrsrt.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2