ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdmatadd.f
Go to the documentation of this file.
1  SUBROUTINE pdmatadd( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC,
2  $ JC, DESCC )
3 *
4 * -- ScaLAPACK tools routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IA, IC, JA, JC, M, N
11  DOUBLE PRECISION ALPHA, BETA
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * ), DESCC( * )
15  DOUBLE PRECISION A( * ), C( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PDMATADD performs a distributed matrix-matrix addition
22 *
23 * sub( C ) := alpha * sub( A ) + beta * sub( C ),
24 *
25 * where sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1) and sub( A ) denotes
26 * A(IA:IA+M-1,JA:JA+N-1). No communications are performed in this
27 * routine, the arrays are supposed to be aligned.
28 *
29 * Notes
30 * =====
31 *
32 * Each global data object is described by an associated description
33 * vector. This vector stores the information required to establish
34 * the mapping between an object element and its corresponding process
35 * and memory location.
36 *
37 * Let A be a generic term for any 2D block cyclicly distributed array.
38 * Such a global array has an associated description vector DESCA.
39 * In the following comments, the character _ should be read as
40 * "of the global array".
41 *
42 * NOTATION STORED IN EXPLANATION
43 * --------------- -------------- --------------------------------------
44 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
45 * DTYPE_A = 1.
46 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
47 * the BLACS process grid A is distribu-
48 * ted over. The context itself is glo-
49 * bal, but the handle (the integer
50 * value) may vary.
51 * M_A (global) DESCA( M_ ) The number of rows in the global
52 * array A.
53 * N_A (global) DESCA( N_ ) The number of columns in the global
54 * array A.
55 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
56 * the rows of the array.
57 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
58 * the columns of the array.
59 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60 * row of the array A is distributed.
61 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
62 * first column of the array A is
63 * distributed.
64 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
65 * array. LLD_A >= MAX(1,LOCr(M_A)).
66 *
67 * Let K be the number of rows or columns of a distributed matrix,
68 * and assume that its process grid has dimension p x q.
69 * LOCr( K ) denotes the number of elements of K that a process
70 * would receive if K were distributed over the p processes of its
71 * process column.
72 * Similarly, LOCc( K ) denotes the number of elements of K that a
73 * process would receive if K were distributed over the q processes of
74 * its process row.
75 * The values of LOCr() and LOCc() may be determined via a call to the
76 * ScaLAPACK tool function, NUMROC:
77 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
78 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
79 * An upper bound for these quantities may be computed by:
80 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
81 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
82 *
83 * Arguments
84 * =========
85 *
86 * M (global input) INTEGER
87 * The number of rows to be operated on i.e the number of rows
88 * of the distributed submatrices sub( A ) and sub( C ). M >= 0.
89 *
90 * N (global input) INTEGER
91 * The number of columns to be operated on i.e the number of
92 * columns of the distributed submatrices sub( A ) and
93 * sub( C ). N >= 0.
94 *
95 * ALPHA (global input) DOUBLE PRECISION
96 * The scalar ALPHA.
97 *
98 * A (local input) DOUBLE PRECISION pointer into the local memory
99 * to a local array of dimension (LLD_A, LOCc(JA+N-1) ). This
100 * array contains the local pieces of the distributed matrix
101 * sub( A ).
102 *
103 * IA (global input) INTEGER
104 * The row index in the global array A indicating the first
105 * row of sub( A ).
106 *
107 * JA (global input) INTEGER
108 * The column index in the global array A indicating the
109 * first column of sub( A ).
110 *
111 * DESCA (global and local input) INTEGER array of dimension DLEN_.
112 * The array descriptor for the distributed matrix A.
113 *
114 * BETA (global input) DOUBLE PRECISION
115 * The scalar BETA.
116 *
117 * C (local input/local output) DOUBLE PRECISION pointer into the
118 * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
119 * This array contains the local pieces of the distributed
120 * matrix sub( C ). On exit, this array contains the local
121 * pieces of the resulting distributed matrix.
122 *
123 * IC (global input) INTEGER
124 * The row index in the global array C indicating the first
125 * row of sub( C ).
126 *
127 * JC (global input) INTEGER
128 * The column index in the global array C indicating the
129 * first column of sub( C ).
130 *
131 * DESCC (global and local input) INTEGER array of dimension DLEN_.
132 * The array descriptor for the distributed matrix C.
133 *
134 * =====================================================================
135 *
136 * .. Parameters ..
137  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
138  $ lld_, mb_, m_, nb_, n_, rsrc_
139  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
140  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
141  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
142  DOUBLE PRECISION ZERO, ONE
143  parameter( zero = 0.0d+0, one = 1.0d+0 )
144 * ..
145 * .. Local Scalars ..
146  INTEGER I, IACOL, IAROW, ICCOL, ICOFF, ICROW, IIA,
147  $ iic, ioffa, ioffc, iroff, j, jja, jjc, lda,
148  $ ldc, mp, mycol, myrow, npcol, nprow, nq
149 * ..
150 * .. External Subroutines ..
151  EXTERNAL blacs_gridinfo, infog2l
152 * ..
153 * .. External Functions ..
154  INTEGER NUMROC
155  EXTERNAL numroc
156 * ..
157 * .. Executable Statements ..
158 *
159 * Get grid parameters.
160 *
161  CALL blacs_gridinfo( desca(ctxt_), nprow, npcol, myrow, mycol )
162 *
163 * Quick return if possible.
164 *
165  IF( (m.EQ.0).OR.(n.EQ.0).OR.
166  $ ((alpha.EQ.zero).AND.(beta.EQ.one)) )
167  $ RETURN
168 *
169  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol,
170  $ iia, jja, iarow, iacol )
171  CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol,
172  $ iic, jjc, icrow, iccol )
173 *
174  iroff = mod( ia-1, desca(mb_) )
175  icoff = mod( ja-1, desca(nb_) )
176  mp = numroc( m+iroff, desca(mb_), myrow, iarow, nprow )
177  nq = numroc( n+icoff, desca(nb_), mycol, iacol, npcol )
178  IF( myrow.EQ.iarow )
179  $ mp = mp-iroff
180  IF( mycol.EQ.iacol )
181  $ nq = nq-icoff
182  lda = desca(lld_)
183  ldc = descc(lld_)
184 *
185  IF( nq.EQ.1 ) THEN
186  IF( beta.EQ.zero ) THEN
187  IF( alpha.EQ.zero ) THEN
188  ioffc = iic + (jjc-1)*ldc
189  DO 10 i = ioffc, ioffc+mp-1
190  c( i ) = zero
191  10 CONTINUE
192  ELSE
193  ioffa = iia + (jja-1)*lda
194  ioffc = iic + (jjc-1)*ldc
195  DO 20 i = ioffc, ioffc+mp-1
196  c( i ) = alpha * a( ioffa )
197  ioffa = ioffa + 1
198  20 CONTINUE
199  END IF
200  ELSE
201  IF( alpha.EQ.one ) THEN
202  IF( beta.EQ.one ) THEN
203  ioffa = iia + (jja-1)*lda
204  ioffc = iic + (jjc-1)*ldc
205  DO 30 i = ioffc, ioffc+mp-1
206  c( i ) = c( i ) + a( ioffa )
207  ioffa = ioffa + 1
208  30 CONTINUE
209  ELSE
210  ioffa = iia + (jja-1)*lda
211  ioffc = iic + (jjc-1)*ldc
212  DO 40 i = ioffc, ioffc+mp-1
213  c( i ) = beta * c( i ) + a( ioffa )
214  ioffa = ioffa + 1
215  40 CONTINUE
216  END IF
217  ELSE IF( beta.EQ.one ) THEN
218  ioffa = iia + (jja-1)*lda
219  ioffc = iic + (jjc-1)*ldc
220  DO 50 i = ioffc, ioffc+mp-1
221  c( i ) = c( i ) + alpha * a( ioffa )
222  ioffa = ioffa + 1
223  50 CONTINUE
224  ELSE
225  ioffa = iia + (jja-1)*lda
226  ioffc = iic + (jjc-1)*ldc
227  DO 60 i = ioffc, ioffc+mp-1
228  c( i ) = beta * c( i ) + alpha * a( ioffa )
229  ioffa = ioffa + 1
230  60 CONTINUE
231  END IF
232  END IF
233  ELSE
234  IF( beta.EQ.zero ) THEN
235  IF( alpha.EQ.zero ) THEN
236  ioffc = iic+(jjc-1)*ldc
237  DO 80 j = 1, nq
238  DO 70 i = ioffc, ioffc+mp-1
239  c( i ) = zero
240  70 CONTINUE
241  ioffc = ioffc + ldc
242  80 CONTINUE
243  ELSE
244  ioffa = iia+(jja-1)*lda
245  ioffc = iic+(jjc-1)*ldc
246  DO 100 j = 1, nq
247  DO 90 i = ioffc, ioffc+mp-1
248  c( i ) = alpha * a( ioffa )
249  ioffa = ioffa + 1
250  90 CONTINUE
251  ioffa = ioffa + lda - mp
252  ioffc = ioffc + ldc
253  100 CONTINUE
254  END IF
255  ELSE
256  IF( alpha.EQ.one ) THEN
257  IF( beta.EQ.one ) THEN
258  ioffa = iia+(jja-1)*lda
259  ioffc = iic+(jjc-1)*ldc
260  DO 120 j = 1, nq
261  DO 110 i = ioffc, ioffc+mp-1
262  c( i ) = c( i ) + a( ioffa )
263  ioffa = ioffa + 1
264  110 CONTINUE
265  ioffa = ioffa + lda - mp
266  ioffc = ioffc + ldc
267  120 CONTINUE
268  ELSE
269  ioffa = iia+(jja-1)*lda
270  ioffc = iic+(jjc-1)*ldc
271  DO 140 j = 1, nq
272  DO 130 i = ioffc, ioffc+mp-1
273  c( i ) = beta * c( i ) + a( ioffa )
274  ioffa = ioffa + 1
275  130 CONTINUE
276  ioffa = ioffa + lda - mp
277  ioffc = ioffc + ldc
278  140 CONTINUE
279  END IF
280  ELSE IF( beta.EQ.one ) THEN
281  ioffa = iia+(jja-1)*lda
282  ioffc = iic+(jjc-1)*ldc
283  DO 160 j = 1, nq
284  DO 150 i = ioffc, ioffc+mp-1
285  c( i ) = c( i ) + alpha * a( ioffa )
286  ioffa = ioffa + 1
287  150 CONTINUE
288  ioffa = ioffa + lda - mp
289  ioffc = ioffc + ldc
290  160 CONTINUE
291  ELSE
292  ioffa = iia+(jja-1)*lda
293  ioffc = iic+(jjc-1)*ldc
294  DO 180 j = 1, nq
295  DO 170 i = ioffc, ioffc+mp-1
296  c( i ) = beta * c( i ) + alpha * a( ioffa )
297  ioffa = ioffa + 1
298  170 CONTINUE
299  ioffa = ioffa + lda - mp
300  ioffc = ioffc + ldc
301  180 CONTINUE
302  END IF
303  END IF
304  END IF
305 *
306  RETURN
307 *
308 * End of PDMATADD
309 *
310  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pdmatadd
subroutine pdmatadd(M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC)
Definition: pdmatadd.f:3