SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ psmatadd()

subroutine psmatadd ( integer  m,
integer  n,
real  alpha,
real, dimension( * )  a,
integer  ia,
integer  ja,
integer, dimension( * )  desca,
real  beta,
real, dimension( * )  c,
integer  ic,
integer  jc,
integer, dimension( * )  descc 
)

Definition at line 1 of file psmatadd.f.

3*
4* -- ScaLAPACK tools routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER IA, IC, JA, JC, M, N
11 REAL ALPHA, BETA
12* ..
13* .. Array Arguments ..
14 INTEGER DESCA( * ), DESCC( * )
15 REAL A( * ), C( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PSMATADD performs a distributed matrix-matrix addition
22*
23* sub( C ) := alpha * sub( A ) + beta * sub( C ),
24*
25* where sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1) and sub( A ) denotes
26* A(IA:IA+M-1,JA:JA+N-1). No communications are performed in this
27* routine, the arrays are supposed to be aligned.
28*
29* Notes
30* =====
31*
32* Each global data object is described by an associated description
33* vector. This vector stores the information required to establish
34* the mapping between an object element and its corresponding process
35* and memory location.
36*
37* Let A be a generic term for any 2D block cyclicly distributed array.
38* Such a global array has an associated description vector DESCA.
39* In the following comments, the character _ should be read as
40* "of the global array".
41*
42* NOTATION STORED IN EXPLANATION
43* --------------- -------------- --------------------------------------
44* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
45* DTYPE_A = 1.
46* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
47* the BLACS process grid A is distribu-
48* ted over. The context itself is glo-
49* bal, but the handle (the integer
50* value) may vary.
51* M_A (global) DESCA( M_ ) The number of rows in the global
52* array A.
53* N_A (global) DESCA( N_ ) The number of columns in the global
54* array A.
55* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
56* the rows of the array.
57* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
58* the columns of the array.
59* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60* row of the array A is distributed.
61* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
62* first column of the array A is
63* distributed.
64* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
65* array. LLD_A >= MAX(1,LOCr(M_A)).
66*
67* Let K be the number of rows or columns of a distributed matrix,
68* and assume that its process grid has dimension p x q.
69* LOCr( K ) denotes the number of elements of K that a process
70* would receive if K were distributed over the p processes of its
71* process column.
72* Similarly, LOCc( K ) denotes the number of elements of K that a
73* process would receive if K were distributed over the q processes of
74* its process row.
75* The values of LOCr() and LOCc() may be determined via a call to the
76* ScaLAPACK tool function, NUMROC:
77* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
78* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
79* An upper bound for these quantities may be computed by:
80* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
81* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
82*
83* Arguments
84* =========
85*
86* M (global input) INTEGER
87* The number of rows to be operated on i.e the number of rows
88* of the distributed submatrices sub( A ) and sub( C ). M >= 0.
89*
90* N (global input) INTEGER
91* The number of columns to be operated on i.e the number of
92* columns of the distributed submatrices sub( A ) and
93* sub( C ). N >= 0.
94*
95* ALPHA (global input) REAL
96* The scalar ALPHA.
97*
98* A (local input) REAL pointer into the local memory
99* to a local array of dimension (LLD_A, LOCc(JA+N-1) ). This
100* array contains the local pieces of the distributed matrix
101* sub( A ).
102*
103* IA (global input) INTEGER
104* The row index in the global array A indicating the first
105* row of sub( A ).
106*
107* JA (global input) INTEGER
108* The column index in the global array A indicating the
109* first column of sub( A ).
110*
111* DESCA (global and local input) INTEGER array of dimension DLEN_.
112* The array descriptor for the distributed matrix A.
113*
114* BETA (global input) REAL
115* The scalar BETA.
116*
117* C (local input/local output) REAL pointer into the
118* local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
119* This array contains the local pieces of the distributed
120* matrix sub( C ). On exit, this array contains the local
121* pieces of the resulting distributed matrix.
122*
123* IC (global input) INTEGER
124* The row index in the global array C indicating the first
125* row of sub( C ).
126*
127* JC (global input) INTEGER
128* The column index in the global array C indicating the
129* first column of sub( C ).
130*
131* DESCC (global and local input) INTEGER array of dimension DLEN_.
132* The array descriptor for the distributed matrix C.
133*
134* =====================================================================
135*
136* .. Parameters ..
137 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
138 $ LLD_, MB_, M_, NB_, N_, RSRC_
139 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
140 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
141 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
142 REAL ZERO, ONE
143 parameter( zero = 0.0e+0, one = 1.0e+0 )
144* ..
145* .. Local Scalars ..
146 INTEGER I, IACOL, IAROW, ICCOL, ICOFF, ICROW, IIA,
147 $ IIC, IOFFA, IOFFC, IROFF, J, JJA, JJC, LDA,
148 $ LDC, MP, MYCOL, MYROW, NPCOL, NPROW, NQ
149* ..
150* .. External Subroutines ..
151 EXTERNAL blacs_gridinfo, infog2l
152* ..
153* .. External Functions ..
154 INTEGER NUMROC
155 EXTERNAL numroc
156* ..
157* .. Executable Statements ..
158*
159* Get grid parameters.
160*
161 CALL blacs_gridinfo( desca(ctxt_), nprow, npcol, myrow, mycol )
162*
163* Quick return if possible.
164*
165 IF( (m.EQ.0).OR.(n.EQ.0).OR.
166 $ ((alpha.EQ.zero).AND.(beta.EQ.one)) )
167 $ RETURN
168*
169 CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol,
170 $ iia, jja, iarow, iacol )
171 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol,
172 $ iic, jjc, icrow, iccol )
173*
174 iroff = mod( ia-1, desca(mb_) )
175 icoff = mod( ja-1, desca(nb_) )
176 mp = numroc( m+iroff, desca(mb_), myrow, iarow, nprow )
177 nq = numroc( n+icoff, desca(nb_), mycol, iacol, npcol )
178 IF( myrow.EQ.iarow )
179 $ mp = mp-iroff
180 IF( mycol.EQ.iacol )
181 $ nq = nq-icoff
182 lda = desca(lld_)
183 ldc = descc(lld_)
184*
185 IF( nq.EQ.1 ) THEN
186 IF( beta.EQ.zero ) THEN
187 IF( alpha.EQ.zero ) THEN
188 ioffc = iic + (jjc-1)*ldc
189 DO 10 i = ioffc, ioffc+mp-1
190 c( i ) = zero
191 10 CONTINUE
192 ELSE
193 ioffa = iia + (jja-1)*lda
194 ioffc = iic + (jjc-1)*ldc
195 DO 20 i = ioffc, ioffc+mp-1
196 c( i ) = alpha * a( ioffa )
197 ioffa = ioffa + 1
198 20 CONTINUE
199 END IF
200 ELSE
201 IF( alpha.EQ.one ) THEN
202 IF( beta.EQ.one ) THEN
203 ioffa = iia + (jja-1)*lda
204 ioffc = iic + (jjc-1)*ldc
205 DO 30 i = ioffc, ioffc+mp-1
206 c( i ) = c( i ) + a( ioffa )
207 ioffa = ioffa + 1
208 30 CONTINUE
209 ELSE
210 ioffa = iia + (jja-1)*lda
211 ioffc = iic + (jjc-1)*ldc
212 DO 40 i = ioffc, ioffc+mp-1
213 c( i ) = beta * c( i ) + a( ioffa )
214 ioffa = ioffa + 1
215 40 CONTINUE
216 END IF
217 ELSE IF( beta.EQ.one ) THEN
218 ioffa = iia + (jja-1)*lda
219 ioffc = iic + (jjc-1)*ldc
220 DO 50 i = ioffc, ioffc+mp-1
221 c( i ) = c( i ) + alpha * a( ioffa )
222 ioffa = ioffa + 1
223 50 CONTINUE
224 ELSE
225 ioffa = iia + (jja-1)*lda
226 ioffc = iic + (jjc-1)*ldc
227 DO 60 i = ioffc, ioffc+mp-1
228 c( i ) = beta * c( i ) + alpha * a( ioffa )
229 ioffa = ioffa + 1
230 60 CONTINUE
231 END IF
232 END IF
233 ELSE
234 IF( beta.EQ.zero ) THEN
235 IF( alpha.EQ.zero ) THEN
236 ioffc = iic+(jjc-1)*ldc
237 DO 80 j = 1, nq
238 DO 70 i = ioffc, ioffc+mp-1
239 c( i ) = zero
240 70 CONTINUE
241 ioffc = ioffc + ldc
242 80 CONTINUE
243 ELSE
244 ioffa = iia+(jja-1)*lda
245 ioffc = iic+(jjc-1)*ldc
246 DO 100 j = 1, nq
247 DO 90 i = ioffc, ioffc+mp-1
248 c( i ) = alpha * a( ioffa )
249 ioffa = ioffa + 1
250 90 CONTINUE
251 ioffa = ioffa + lda - mp
252 ioffc = ioffc + ldc
253 100 CONTINUE
254 END IF
255 ELSE
256 IF( alpha.EQ.one ) THEN
257 IF( beta.EQ.one ) THEN
258 ioffa = iia+(jja-1)*lda
259 ioffc = iic+(jjc-1)*ldc
260 DO 120 j = 1, nq
261 DO 110 i = ioffc, ioffc+mp-1
262 c( i ) = c( i ) + a( ioffa )
263 ioffa = ioffa + 1
264 110 CONTINUE
265 ioffa = ioffa + lda - mp
266 ioffc = ioffc + ldc
267 120 CONTINUE
268 ELSE
269 ioffa = iia+(jja-1)*lda
270 ioffc = iic+(jjc-1)*ldc
271 DO 140 j = 1, nq
272 DO 130 i = ioffc, ioffc+mp-1
273 c( i ) = beta * c( i ) + a( ioffa )
274 ioffa = ioffa + 1
275 130 CONTINUE
276 ioffa = ioffa + lda - mp
277 ioffc = ioffc + ldc
278 140 CONTINUE
279 END IF
280 ELSE IF( beta.EQ.one ) THEN
281 ioffa = iia+(jja-1)*lda
282 ioffc = iic+(jjc-1)*ldc
283 DO 160 j = 1, nq
284 DO 150 i = ioffc, ioffc+mp-1
285 c( i ) = c( i ) + alpha * a( ioffa )
286 ioffa = ioffa + 1
287 150 CONTINUE
288 ioffa = ioffa + lda - mp
289 ioffc = ioffc + ldc
290 160 CONTINUE
291 ELSE
292 ioffa = iia+(jja-1)*lda
293 ioffc = iic+(jjc-1)*ldc
294 DO 180 j = 1, nq
295 DO 170 i = ioffc, ioffc+mp-1
296 c( i ) = beta * c( i ) + alpha * a( ioffa )
297 ioffa = ioffa + 1
298 170 CONTINUE
299 ioffa = ioffa + lda - mp
300 ioffc = ioffc + ldc
301 180 CONTINUE
302 END IF
303 END IF
304 END IF
305*
306 RETURN
307*
308* End of PSMATADD
309*
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
Here is the call graph for this function:
Here is the caller graph for this function: