ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "pblas.h"
14 #include "PBpblas.h"
15 #include "PBtools.h"
16 #include "PBblacs.h"
17 #include "PBblas.h"
18
19 #ifdef __STDC__
20 void pctradd_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * M, int * N,
21  float * ALPHA,
22  float * A, int * IA, int * JA, int * DESCA,
23  float * BETA,
24  float * C, int * IC, int * JC, int * DESCC )
25 #else
26 void pctradd_( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA,
27  C, IC, JC, DESCC )
28 /*
29 * .. Scalar Arguments ..
30 */
31  F_CHAR_T TRANS, UPLO;
32  int * IA, * IC, * JA, * JC, * M, * N;
33  float * ALPHA, * BETA;
34 /*
35 * .. Array Arguments ..
36 */
37  int * DESCA, * DESCC;
38  float * A, * C;
39 #endif
40 {
41 /*
42 * Purpose
43 * =======
44 *
46 *
47 * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) )
48 *
49 * where
50 *
51 * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of
52 *
53 * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ).
54 *
55 * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if TRANS = 'N',
56 * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'T',
57 * conjg(A(IA:IA+N-1,JA:JA+M-1)') if TRANS = 'C',
58 *
59 * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n
60 * upper or lower trapezoidal submatrices.
61 *
62 * Notes
63 * =====
64 *
65 * A description vector is associated with each 2D block-cyclicly dis-
66 * tributed matrix. This vector stores the information required to
67 * establish the mapping between a matrix entry and its corresponding
68 * process and memory location.
69 *
70 * In the following comments, the character _ should be read as
71 * "of the distributed matrix". Let A be a generic term for any 2D
72 * block cyclicly distributed matrix. Its description vector is DESC_A:
73 *
74 * NOTATION STORED IN EXPLANATION
75 * ---------------- --------------- ------------------------------------
76 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
77 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
78 * the NPROW x NPCOL BLACS process grid
79 * A is distributed over. The context
80 * itself is global, but the handle
81 * (the integer value) may vary.
82 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
83 * ted matrix A, M_A >= 0.
84 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
85 * buted matrix A, N_A >= 0.
86 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
87 * block of the matrix A, IMB_A > 0.
88 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
89 * left block of the matrix A,
90 * INB_A > 0.
91 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
92 * bute the last M_A-IMB_A rows of A,
93 * MB_A > 0.
94 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
95 * bute the last N_A-INB_A columns of
96 * A, NB_A > 0.
97 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
98 * row of the matrix A is distributed,
99 * NPROW > RSRC_A >= 0.
100 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
101 * first column of A is distributed.
102 * NPCOL > CSRC_A >= 0.
103 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
104 * array storing the local blocks of
105 * the distributed matrix A,
106 * IF( Lc( 1, N_A ) > 0 )
107 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
108 * ELSE
109 * LLD_A >= 1.
110 *
111 * Let K be the number of rows of a matrix A starting at the global in-
112 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
113 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
114 * receive if these K rows were distributed over NPROW processes. If K
115 * is the number of columns of a matrix A starting at the global index
116 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
117 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
118 * these K columns were distributed over NPCOL processes.
119 *
120 * The values of Lr() and Lc() may be determined via a call to the func-
121 * tion PB_Cnumroc:
122 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
123 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
124 *
125 * Arguments
126 * =========
127 *
128 * UPLO (global input) CHARACTER*1
129 * On entry, UPLO specifies whether the local pieces of the
130 * array C containing the upper or lower triangular part of the
131 * triangular submatrix sub( C ) is to be referenced as follows:
132 *
133 * UPLO = 'U' or 'u' Only the local pieces corresponding to
134 * the upper triangular part of the
135 * triangular submatrix sub( C ) is to be
136 * referenced,
137 *
138 * UPLO = 'L' or 'l' Only the local pieces corresponding to
139 * the lower triangular part of the
140 * triangular submatrix sub( C ) is to be
141 * referenced.
142 *
143 * TRANS (global input) CHARACTER*1
144 * On entry, TRANS specifies the form of op( sub( A ) ) to be
145 * used in the matrix addition as follows:
146 *
147 * TRANS = 'N' or 'n' op( sub( A ) ) = sub( A ),
148 *
149 * TRANS = 'T' or 't' op( sub( A ) ) = sub( A )',
150 *
151 * TRANS = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ).
152 *
153 * M (global input) INTEGER
154 * On entry, M specifies the number of rows of the submatrix
155 * sub( C ) and the number of columns of the submatrix sub( A ).
156 * M must be at least zero.
157 *
158 * N (global input) INTEGER
159 * On entry, N specifies the number of columns of the submatrix
160 * sub( C ) and the number of rows of the submatrix sub( A ). N
161 * must be at least zero.
162 *
163 * ALPHA (global input) COMPLEX
164 * On entry, ALPHA specifies the scalar alpha. When ALPHA is
165 * supplied as zero then the local entries of the array A
166 * corresponding to the entries of the submatrix sub( A ) need
167 * not be set on input.
168 *
169 * A (local input) COMPLEX array
170 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
171 * at least Lc( 1, JA+N-1 ) when TRANS = 'N' or 'n' and is at
172 * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array
173 * contains the local entries of the matrix A.
174 * Before entry with UPLO = 'U' or 'u' and TRANS = 'N' or 'n' or
175 * UPLO = 'L' or 'l' and TRANS = 'T', 'C', 't' or 'c', this ar-
176 * ray contains the local entries corresponding to the entries
177 * of the upper triangular submatrix sub( A ), and the local en-
178 * tries corresponding to the entries of the strictly lower tri-
179 * angular part of the submatrix sub( A ) are not referenced.
180 * Before entry with UPLO = 'L' or 'l' and TRANS = 'N' or 'n' or
181 * UPLO = 'U' or 'u' and TRANS = 'T', 'C', 't' or 'c', this ar-
182 * ray contains the local entries corresponding to the entries
183 * of the lower triangular submatrix sub( A ), and the local en-
184 * tries corresponding to the entries of the strictly upper tri-
185 * angular part of the submatrix sub( A ) are not referenced.
186 *
187 * IA (global input) INTEGER
188 * On entry, IA specifies A's global row index, which points to
189 * the beginning of the submatrix sub( A ).
190 *
191 * JA (global input) INTEGER
192 * On entry, JA specifies A's global column index, which points
193 * to the beginning of the submatrix sub( A ).
194 *
195 * DESCA (global and local input) INTEGER array
196 * On entry, DESCA is an integer array of dimension DLEN_. This
197 * is the array descriptor for the matrix A.
198 *
199 * BETA (global input) COMPLEX
200 * On entry, BETA specifies the scalar beta. When BETA is
201 * supplied as zero then the local entries of the array C
202 * corresponding to the entries of the submatrix sub( C ) need
203 * not be set on input.
204 *
205 * C (local input/local output) COMPLEX array
206 * On entry, C is an array of dimension (LLD_C, Kc), where Kc is
207 * at least Lc( 1, JC+N-1 ). Before entry, this array contains
208 * the local entries of the matrix C.
209 * Before entry with UPLO = 'U' or 'u', this array contains
210 * the local entries corresponding to the upper triangular part
211 * of the triangular submatrix sub( C ), and the local entries
212 * corresponding to the strictly lower triangular of sub( C )
213 * are not referenced. On exit, the upper triangular part of
214 * sub( C ) is overwritten by the upper triangular part of the
215 * updated submatrix.
216 * Before entry with UPLO = 'L' or 'l', this array contains
217 * the local entries corresponding to the lower triangular part
218 * of the triangular submatrix sub( C ), and the local entries
219 * corresponding to the strictly upper triangular of sub( C )
220 * are not referenced. On exit, the lower triangular part of
221 * sub( C ) is overwritten by the lower triangular part of the
222 * updated submatrix.
223 *
224 * IC (global input) INTEGER
225 * On entry, IC specifies C's global row index, which points to
226 * the beginning of the submatrix sub( C ).
227 *
228 * JC (global input) INTEGER
229 * On entry, JC specifies C's global column index, which points
230 * to the beginning of the submatrix sub( C ).
231 *
232 * DESCC (global and local input) INTEGER array
233 * On entry, DESCC is an integer array of dimension DLEN_. This
234 * is the array descriptor for the matrix C.
235 *
236 * -- Written on April 1, 1998 by
237 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
238 *
239 * ---------------------------------------------------------------------
240 */
241 /*
242 * .. Local Scalars ..
243 */
244  char DirAC, TranOp, UploC, ctop, rtop;
245  int Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, notran, npcol,
246  nprow, upper;
247 /*
248 * .. Local Arrays ..
249 */
251 /* ..
252 * .. Executable Statements ..
253 *
254 */
255  upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
256  notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
257  PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
258  PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd );
259 #ifndef NO_ARGCHK
260 /*
261 * Test the input parameters
262 */
263  Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
264  if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) )
265  {
266  if( ( !upper ) && ( UploC != CLOWER ) )
267  {
268  PB_Cwarn( ctxt, __LINE__, "PCTRADD", "Illegal UPLO = %c\n", UploC );
269  info = -1;
270  }
271  else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) )
272  {
273  PB_Cwarn( ctxt, __LINE__, "PCTRADD", "Illegal TRANS = %c\n", TranOp );
274  info = -2;
275  }
276  if( notran )
277  PB_Cchkmat( ctxt, "PCTRADD", "A", *M, 3, *N, 4, Ai, Aj, Ad, 9,
278  &info );
279  else
280  PB_Cchkmat( ctxt, "PCTRADD", "A", *N, 4, *M, 3, Ai, Aj, Ad, 9,
281  &info );
282  PB_Cchkmat( ctxt, "PCTRADD", "C", *M, 3, *N, 4, Ci, Cj, Cd, 14,
283  &info );
284  }
285  if( info ) { PB_Cabort( ctxt, "PCTRADD", info ); return; }
286 #endif
287 /*
288 * Quick return if possible
289 */
290  if( ( *M == 0 ) || ( *N == 0 ) ||
291  ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) &&
292  ( BETA [REAL_PART] == ONE && BETA [IMAG_PART] == ZERO ) ) )
293  return;
294 /*
295 * And when alpha is zero
296 */
297  if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) )
298  {
299  if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) )
300  {
301  PB_Cplapad( PB_Cctypeset(), &UploC, NOCONJG, *M, *N,
302  ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd );
303  }
304  else
305  {
306  PB_Cplascal( PB_Cctypeset(), &UploC, NOCONJG, *M, *N,
307  ((char *)BETA), ((char * )C), Ci, Cj, Cd );
308  }
309  return;
310  }
311 /*
312 * Start the operations
313 */
314 /*
315 * This operation mainly involves point-to-point send and receive communication.
316 * There is therefore no particular BLACS topology to recommend. Still, one can
317 * choose the main loop direction in which the operands will be added, but not
318 * transposed. This selection is based on the current setting for the BLACS
320 */
321  rtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
322  ctop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
323
324  if( *M <= *N )
325  DirAC = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
326  else
327  DirAC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
328  PB_Cptradd( PB_Cctypeset(), &DirAC, &UploC, ( notran ? NOTRAN :
329  ( ( TranOp == CCOTRAN ) ? COTRAN : TRAN ) ), *M, *N,
330  ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA),
331  ((char *) C), Ci, Cj, Cd );
332 /*
334 */
335 }
ROW
#define ROW
Definition: PBblacs.h:46
PB_Cwarn
void PB_Cwarn()
COLUMN
#define COLUMN
Definition: PBblacs.h:45
PBblacs.h
PBtools.h
PBblas.h
CCOTRAN
#define CCOTRAN
Definition: PBblas.h:22
NOCONJG
#define NOCONJG
Definition: PBblas.h:45
REAL_PART
#define REAL_PART
Definition: pblas.h:135
PBpblas.h
DLEN_
#define DLEN_
Definition: PBtools.h:48
TRAN
#define TRAN
Definition: PBblas.h:46
NOTRAN
#define NOTRAN
Definition: PBblas.h:44
F_CHAR_T
char * F_CHAR_T
Definition: pblas.h:118
ZERO
#define ZERO
Definition: PBtools.h:66
CTOP_DRING
#define CTOP_DRING
Definition: PBblacs.h:28
void pctradd_(F_CHAR_T UPLO, F_CHAR_T TRANS, int *M, int *N, float *ALPHA, float *A, int *IA, int *JA, int *DESCA, float *BETA, float *C, int *IC, int *JC, int *DESCC)
PB_Cplascal
void PB_Cplascal()
PB_Cabort
void PB_Cabort()
CLOWER
#define CLOWER
Definition: PBblas.h:25
F2C_CHAR
#define F2C_CHAR(a)
Definition: pblas.h:120
TOP_GET
#define TOP_GET
Definition: PBblacs.h:50
PB_Ctop
char * PB_Ctop()
ONE
#define ONE
Definition: PBtools.h:64
CNOTRAN
#define CNOTRAN
Definition: PBblas.h:18
PB_CargFtoC
void PB_CargFtoC()
BCAST
#define BCAST
Definition: PBblacs.h:48
COTRAN
#define COTRAN
Definition: PBblas.h:48
PB_Cchkmat
void PB_Cchkmat()
CFORWARD
#define CFORWARD
Definition: PBblas.h:38
Cblacs_gridinfo
void Cblacs_gridinfo()
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
pblas.h
CUPPER
#define CUPPER
Definition: PBblas.h:26
CTRAN
#define CTRAN
Definition: PBblas.h:20