SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzmatadd.f
Go to the documentation of this file.
1 SUBROUTINE pzmatadd( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC,
2 $ JC, DESCC )
3*
4* -- ScaLAPACK tools routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER IA, IC, JA, JC, M, N
11 COMPLEX*16 ALPHA, BETA
12* ..
13* .. Array Arguments ..
14 INTEGER DESCA( * ), DESCC( * )
15 COMPLEX*16 A( * ), C( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PZMATADD performs a distributed matrix-matrix addition
22*
23* sub( C ) := alpha * sub( A ) + beta * sub( C ),
24*
25* where sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1) and sub( A ) denotes
26* A(IA:IA+M-1,JA:JA+N-1). No communications are performed in this
27* routine, the arrays are supposed to be aligned.
28*
29* Notes
30* =====
31*
32* Each global data object is described by an associated description
33* vector. This vector stores the information required to establish
34* the mapping between an object element and its corresponding process
35* and memory location.
36*
37* Let A be a generic term for any 2D block cyclicly distributed array.
38* Such a global array has an associated description vector DESCA.
39* In the following comments, the character _ should be read as
40* "of the global array".
41*
42* NOTATION STORED IN EXPLANATION
43* --------------- -------------- --------------------------------------
44* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
45* DTYPE_A = 1.
46* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
47* the BLACS process grid A is distribu-
48* ted over. The context itself is glo-
49* bal, but the handle (the integer
50* value) may vary.
51* M_A (global) DESCA( M_ ) The number of rows in the global
52* array A.
53* N_A (global) DESCA( N_ ) The number of columns in the global
54* array A.
55* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
56* the rows of the array.
57* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
58* the columns of the array.
59* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60* row of the array A is distributed.
61* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
62* first column of the array A is
63* distributed.
64* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
65* array. LLD_A >= MAX(1,LOCr(M_A)).
66*
67* Let K be the number of rows or columns of a distributed matrix,
68* and assume that its process grid has dimension p x q.
69* LOCr( K ) denotes the number of elements of K that a process
70* would receive if K were distributed over the p processes of its
71* process column.
72* Similarly, LOCc( K ) denotes the number of elements of K that a
73* process would receive if K were distributed over the q processes of
74* its process row.
75* The values of LOCr() and LOCc() may be determined via a call to the
76* ScaLAPACK tool function, NUMROC:
77* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
78* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
79* An upper bound for these quantities may be computed by:
80* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
81* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
82*
83* Arguments
84* =========
85*
86* M (global input) INTEGER
87* The number of rows to be operated on i.e the number of rows
88* of the distributed submatrices sub( A ) and sub( C ). M >= 0.
89*
90* N (global input) INTEGER
91* The number of columns to be operated on i.e the number of
92* columns of the distributed submatrices sub( A ) and
93* sub( C ). N >= 0.
94*
95* ALPHA (global input) COMPLEX*16
96* The scalar ALPHA.
97*
98* A (local input) COMPLEX*16 pointer into the local memory
99* to a local array of dimension (LLD_A, LOCc(JA+N-1) ). This
100* array contains the local pieces of the distributed matrix
101* sub( A ).
102*
103* IA (global input) INTEGER
104* The row index in the global array A indicating the first
105* row of sub( A ).
106*
107* JA (global input) INTEGER
108* The column index in the global array A indicating the
109* first column of sub( A ).
110*
111* DESCA (global and local input) INTEGER array of dimension DLEN_.
112* The array descriptor for the distributed matrix A.
113*
114* BETA (global input) COMPLEX*16
115* The scalar BETA.
116*
117* C (local input/local output) COMPLEX*16 pointer into the
118* local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
119* This array contains the local pieces of the distributed
120* matrix sub( C ). On exit, this array contains the local
121* pieces of the resulting distributed matrix.
122*
123* IC (global input) INTEGER
124* The row index in the global array C indicating the first
125* row of sub( C ).
126*
127* JC (global input) INTEGER
128* The column index in the global array C indicating the
129* first column of sub( C ).
130*
131* DESCC (global and local input) INTEGER array of dimension DLEN_.
132* The array descriptor for the distributed matrix C.
133*
134* =====================================================================
135*
136* .. Parameters ..
137 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
138 $ lld_, mb_, m_, nb_, n_, rsrc_
139 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
140 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
141 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
142 COMPLEX*16 ZERO, ONE
143 parameter( zero = ( 0.0d+0, 0.0d+0 ),
144 $ one = ( 1.0d+0, 0.0d+0 ) )
145* ..
146* .. Local Scalars ..
147 INTEGER I, IACOL, IAROW, ICCOL, ICOFF, ICROW, IIA,
148 $ iic, ioffa, ioffc, iroff, j, jja, jjc, lda,
149 $ ldc, mp, mycol, myrow, npcol, nprow, nq
150* ..
151* .. External Subroutines ..
152 EXTERNAL blacs_gridinfo, infog2l
153* ..
154* .. External Functions ..
155 INTEGER NUMROC
156 EXTERNAL numroc
157* ..
158* .. Executable Statements ..
159*
160* Get grid parameters.
161*
162 CALL blacs_gridinfo( desca(ctxt_), nprow, npcol, myrow, mycol )
163*
164* Quick return if possible.
165*
166 IF( (m.EQ.0).OR.(n.EQ.0).OR.
167 $ ((alpha.EQ.zero).AND.(beta.EQ.one)) )
168 $ RETURN
169*
170 CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol,
171 $ iia, jja, iarow, iacol )
172 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol,
173 $ iic, jjc, icrow, iccol )
174*
175 iroff = mod( ia-1, desca(mb_) )
176 icoff = mod( ja-1, desca(nb_) )
177 mp = numroc( m+iroff, desca(mb_), myrow, iarow, nprow )
178 nq = numroc( n+icoff, desca(nb_), mycol, iacol, npcol )
179 IF( myrow.EQ.iarow )
180 $ mp = mp-iroff
181 IF( mycol.EQ.iacol )
182 $ nq = nq-icoff
183 lda = desca(lld_)
184 ldc = descc(lld_)
185*
186 IF( nq.EQ.1 ) THEN
187 IF( beta.EQ.zero ) THEN
188 IF( alpha.EQ.zero ) THEN
189 ioffc = iic + (jjc-1)*ldc
190 DO 10 i = ioffc, ioffc+mp-1
191 c( i ) = zero
192 10 CONTINUE
193 ELSE
194 ioffa = iia + (jja-1)*lda
195 ioffc = iic + (jjc-1)*ldc
196 DO 20 i = ioffc, ioffc+mp-1
197 c( i ) = alpha * a( ioffa )
198 ioffa = ioffa + 1
199 20 CONTINUE
200 END IF
201 ELSE
202 IF( alpha.EQ.one ) THEN
203 IF( beta.EQ.one ) THEN
204 ioffa = iia + (jja-1)*lda
205 ioffc = iic + (jjc-1)*ldc
206 DO 30 i = ioffc, ioffc+mp-1
207 c( i ) = c( i ) + a( ioffa )
208 ioffa = ioffa + 1
209 30 CONTINUE
210 ELSE
211 ioffa = iia + (jja-1)*lda
212 ioffc = iic + (jjc-1)*ldc
213 DO 40 i = ioffc, ioffc+mp-1
214 c( i ) = beta * c( i ) + a( ioffa )
215 ioffa = ioffa + 1
216 40 CONTINUE
217 END IF
218 ELSE IF( beta.EQ.one ) THEN
219 ioffa = iia + (jja-1)*lda
220 ioffc = iic + (jjc-1)*ldc
221 DO 50 i = ioffc, ioffc+mp-1
222 c( i ) = c( i ) + alpha * a( ioffa )
223 ioffa = ioffa + 1
224 50 CONTINUE
225 ELSE
226 ioffa = iia + (jja-1)*lda
227 ioffc = iic + (jjc-1)*ldc
228 DO 60 i = ioffc, ioffc+mp-1
229 c( i ) = beta * c( i ) + alpha * a( ioffa )
230 ioffa = ioffa + 1
231 60 CONTINUE
232 END IF
233 END IF
234 ELSE
235 IF( beta.EQ.zero ) THEN
236 IF( alpha.EQ.zero ) THEN
237 ioffc = iic+(jjc-1)*ldc
238 DO 80 j = 1, nq
239 DO 70 i = ioffc, ioffc+mp-1
240 c( i ) = zero
241 70 CONTINUE
242 ioffc = ioffc + ldc
243 80 CONTINUE
244 ELSE
245 ioffa = iia+(jja-1)*lda
246 ioffc = iic+(jjc-1)*ldc
247 DO 100 j = 1, nq
248 DO 90 i = ioffc, ioffc+mp-1
249 c( i ) = alpha * a( ioffa )
250 ioffa = ioffa + 1
251 90 CONTINUE
252 ioffa = ioffa + lda - mp
253 ioffc = ioffc + ldc
254 100 CONTINUE
255 END IF
256 ELSE
257 IF( alpha.EQ.one ) THEN
258 IF( beta.EQ.one ) THEN
259 ioffa = iia+(jja-1)*lda
260 ioffc = iic+(jjc-1)*ldc
261 DO 120 j = 1, nq
262 DO 110 i = ioffc, ioffc+mp-1
263 c( i ) = c( i ) + a( ioffa )
264 ioffa = ioffa + 1
265 110 CONTINUE
266 ioffa = ioffa + lda - mp
267 ioffc = ioffc + ldc
268 120 CONTINUE
269 ELSE
270 ioffa = iia+(jja-1)*lda
271 ioffc = iic+(jjc-1)*ldc
272 DO 140 j = 1, nq
273 DO 130 i = ioffc, ioffc+mp-1
274 c( i ) = beta * c( i ) + a( ioffa )
275 ioffa = ioffa + 1
276 130 CONTINUE
277 ioffa = ioffa + lda - mp
278 ioffc = ioffc + ldc
279 140 CONTINUE
280 END IF
281 ELSE IF( beta.EQ.one ) THEN
282 ioffa = iia+(jja-1)*lda
283 ioffc = iic+(jjc-1)*ldc
284 DO 160 j = 1, nq
285 DO 150 i = ioffc, ioffc+mp-1
286 c( i ) = c( i ) + alpha * a( ioffa )
287 ioffa = ioffa + 1
288 150 CONTINUE
289 ioffa = ioffa + lda - mp
290 ioffc = ioffc + ldc
291 160 CONTINUE
292 ELSE
293 ioffa = iia+(jja-1)*lda
294 ioffc = iic+(jjc-1)*ldc
295 DO 180 j = 1, nq
296 DO 170 i = ioffc, ioffc+mp-1
297 c( i ) = beta * c( i ) + alpha * a( ioffa )
298 ioffa = ioffa + 1
299 170 CONTINUE
300 ioffa = ioffa + lda - mp
301 ioffc = ioffc + ldc
302 180 CONTINUE
303 END IF
304 END IF
305 END IF
306*
307 RETURN
308*
309* End of PZMATADD
310*
311 END
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pzmatadd(m, n, alpha, a, ia, ja, desca, beta, c, ic, jc, descc)
Definition pzmatadd.f:3