SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_Cplapad.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_Cplapad( PBTYP_T * TYPE, char * UPLO, char * CONJUG, Int M,
21 Int N, char * ALPHA, char * BETA, char * A, Int IA,
22 Int JA, Int * DESCA )
23#else
24void PB_Cplapad( TYPE, UPLO, CONJUG, M, N, ALPHA, BETA, A, IA, JA,
25 DESCA )
26/*
27* .. Scalar Arguments ..
28*/
29 char * CONJUG, * UPLO;
30 Int IA, JA, M, N;
31 char * ALPHA, * BETA;
32 PBTYP_T * TYPE;
33/*
34* .. Array Arguments ..
35*/
36 Int * DESCA;
37 char * A;
38#endif
39{
40/*
41* Purpose
42* =======
43*
44* PB_Cplapad initializes an m by n submatrix A(IA:IA+M-1,JA:JA+N-1)
45* denoted by sub( A ) to beta on the diagonal or zeros the imaginary
46* part of those diagonals and set the offdiagonals to alpha.
47*
48* Notes
49* =====
50*
51* A description vector is associated with each 2D block-cyclicly dis-
52* tributed matrix. This vector stores the information required to
53* establish the mapping between a matrix entry and its corresponding
54* process and memory location.
55*
56* In the following comments, the character _ should be read as
57* "of the distributed matrix". Let A be a generic term for any 2D
58* block cyclicly distributed matrix. Its description vector is DESC_A:
59*
60* NOTATION STORED IN EXPLANATION
61* ---------------- --------------- ------------------------------------
62* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
63* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
64* the NPROW x NPCOL BLACS process grid
65* A is distributed over. The context
66* itself is global, but the handle
67* (the integer value) may vary.
68* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
69* ted matrix A, M_A >= 0.
70* N_A (global) DESCA[ N_ ] The number of columns in the distri-
71* buted matrix A, N_A >= 0.
72* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
73* block of the matrix A, IMB_A > 0.
74* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
75* left block of the matrix A,
76* INB_A > 0.
77* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
78* bute the last M_A-IMB_A rows of A,
79* MB_A > 0.
80* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
81* bute the last N_A-INB_A columns of
82* A, NB_A > 0.
83* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
84* row of the matrix A is distributed,
85* NPROW > RSRC_A >= 0.
86* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
87* first column of A is distributed.
88* NPCOL > CSRC_A >= 0.
89* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
90* array storing the local blocks of
91* the distributed matrix A,
92* IF( Lc( 1, N_A ) > 0 )
93* LLD_A >= MAX( 1, Lr( 1, M_A ) )
94* ELSE
95* LLD_A >= 1.
96*
97* Let K be the number of rows of a matrix A starting at the global in-
98* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
99* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
100* receive if these K rows were distributed over NPROW processes. If K
101* is the number of columns of a matrix A starting at the global index
102* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
103* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
104* these K columns were distributed over NPCOL processes.
105*
106* The values of Lr() and Lc() may be determined via a call to the func-
107* tion PB_Cnumroc:
108* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
109* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
110*
111* Arguments
112* =========
113*
114* TYPE (local input) pointer to a PBTYP_T structure
115* On entry, TYPE is a pointer to a structure of type PBTYP_T,
116* that contains type information (See pblas.h).
117*
118* UPLO (global input) pointer to CHAR
119* On entry, UPLO specifies the part of the submatrix sub( A )
120* to be set:
121* = 'L' or 'l': Lower triangular part is set; the strictly
122* upper triangular part of sub( A ) is not changed;
123* = 'U' or 'u': Upper triangular part is set; the strictly
124* lower triangular part of sub( A ) is not changed;
125* Otherwise: All of the matrix sub( A ) is set.
126*
127* CONJUG (global input) pointer to CHAR
128* On entry, CONJUG specifies what should be done to the diago-
129* nals as follows. When UPLO is 'L', 'l', 'U' or 'u' and CONJUG
130* is 'Z' or 'z', the imaginary part of the diagonals is set to
131* zero. Otherwise, the diagonals are set to beta.
132*
133* M (global input) INTEGER
134* On entry, M specifies the number of rows of the submatrix
135* sub( A ). M must be at least zero.
136*
137* N (global input) INTEGER
138* On entry, N specifies the number of columns of the submatrix
139* sub( A ). N must be at least zero.
140*
141* ALPHA (global input) pointer to CHAR
142* On entry, ALPHA specifies the scalar alpha, i.e., the cons-
143* tant to which the offdiagonal elements are to be set.
144*
145* BETA (global input) pointer to CHAR
146* On entry, BETA specifies the scalar beta, i.e., the constant
147* to which the diagonal elements are to be set. BETA is not re-
148* ferenced when UPLO is 'L', 'l', 'U' or 'u' and CONJUG is 'Z'.
149*
150* A (local input/local output) pointer to CHAR
151* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
152* at least Lc( 1, JA+N-1 ). Before entry, this array contains
153* the local entries of the matrix A to be set. On exit, the
154* leading m by n submatrix sub( A ) is set as follows:
155*
156* UPLO = 'L' or 'l', A(IA+i-1,JA+j-1)=ALPHA, j+1<=i<=M, 1<=j<=N
157* UPLO = 'U' or 'u', A(IA+i-1,JA+j-1)=ALPHA, 1<=i<=j-1, 1<=j<=N
158* otherwise, A(IA+i-1,JA+j-1)=ALPHA, 1<=i<=M, 1<=j<=N
159* and IA+i.NE.JA+j,
160* and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N).
161*
162* IA (global input) INTEGER
163* On entry, IA specifies A's global row index, which points to
164* the beginning of the submatrix sub( A ).
165*
166* JA (global input) INTEGER
167* On entry, JA specifies A's global column index, which points
168* to the beginning of the submatrix sub( A ).
169*
170* DESCA (global and local input) INTEGER array
171* On entry, DESCA is an integer array of dimension DLEN_. This
172* is the array descriptor for the matrix A.
173*
174* -- Written on April 1, 1998 by
175* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
176*
177* ---------------------------------------------------------------------
178*/
179/*
180* .. Local Scalars ..
181*/
182 char type;
183 Int Acol, Aii, Aimb1, Ainb1, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0,
184 Anb, Anq, Anq0, Arow, ctxt, izero=0, k, kb, ktmp, mn, mycol,
185 myrow, nb, npcol, nprow, size;
186 TZPAD_T pad;
187/*
188* .. Local Arrays ..
189*/
190 Int Ad0[DLEN_];
191 char * Aptr = NULL;
192/* ..
193* .. Executable Statements ..
194*
195*/
196/*
197* Quick return if possible
198*/
199 if( ( M <= 0 ) || ( N <= 0 ) ) return;
200/*
201* Retrieve process grid information
202*/
203 Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
204/*
205* Compute descriptor Ad0 for sub( A )
206*/
207 PB_Cdescribe( M, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
208 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
209/*
210* Quick return if I don't own any of sub( A ).
211*/
212 Amp = PB_Cnumroc( M, 0, Aimb1, Amb, myrow, Arow, nprow );
213 Anq = PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol );
214 if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return;
215
216 size = TYPE->size; type = TYPE->type; pad = TYPE->Ftzpad;
217 Aptr = Mptr( A, Aii, Ajj, Ald, size );
218/*
219* When the entire sub( A ) needs to be padded and alpha is equal to beta, or
220* sub( A ) is replicated in all processes, just call the local routine.
221*/
222 if( type == SREAL )
223 {
224 if( ( ( Mupcase( UPLO[0] ) == CALL ) &&
225 ( ((float*)(ALPHA))[REAL_PART] ==
226 ((float*)(BETA ))[REAL_PART] ) ) ||
227 ( ( ( Arow < 0 ) || ( nprow == 1 ) ) &&
228 ( ( Acol < 0 ) || ( npcol == 1 ) ) ) )
229 {
230 pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA,
231 BETA, Aptr, &Ald );
232 return;
233 }
234 }
235 else if( type == DREAL )
236 {
237 if( ( ( Mupcase( UPLO[0] ) == CALL ) &&
238 ( ((double*)(ALPHA))[REAL_PART] ==
239 ((double*)(BETA ))[REAL_PART] ) ) ||
240 ( ( ( Arow < 0 ) || ( nprow == 1 ) ) &&
241 ( ( Acol < 0 ) || ( npcol == 1 ) ) ) )
242 {
243 pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA,
244 BETA, Aptr, &Ald );
245 return;
246 }
247 }
248 else if( type == SCPLX )
249 {
250 if( ( ( Mupcase( UPLO[0] ) == CALL ) &&
251 ( ((float*)(ALPHA))[REAL_PART] ==
252 ((float*)(BETA ))[REAL_PART] ) &&
253 ( ((float*)(ALPHA))[IMAG_PART] ==
254 ((float*)(BETA ))[IMAG_PART] ) ) ||
255 ( ( ( Arow < 0 ) || ( nprow == 1 ) ) &&
256 ( ( Acol < 0 ) || ( npcol == 1 ) ) ) )
257 {
258 pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA,
259 BETA, Aptr, &Ald );
260 return;
261 }
262 }
263 else if( type == DCPLX )
264 {
265 if( ( ( Mupcase( UPLO[0] ) == CALL ) &&
266 ( ((double*)(ALPHA))[REAL_PART] ==
267 ((double*)(BETA ))[REAL_PART] ) &&
268 ( ((double*)(ALPHA))[IMAG_PART] ==
269 ((double*)(BETA ))[IMAG_PART] ) ) ||
270 ( ( ( Arow < 0 ) || ( nprow == 1 ) ) &&
271 ( ( Acol < 0 ) || ( npcol == 1 ) ) ) )
272 {
273 pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA,
274 BETA, Aptr, &Ald );
275 return;
276 }
277 }
278/*
279* Computational partitioning size is computed as the product of the logical
280* value returned by pilaenv_ and two times the least common multiple of nprow
281* and npcol.
282*/
283 nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type ) ) *
284 PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
285
286 mn = MIN( M, N );
287
288 if( Mupcase( UPLO[0] ) == CLOWER )
289 {
290/*
291* Lower triangle of sub( A ): proceed by block of columns. For each block of
292* columns, operate on the logical diagonal block first and then the remaining
293* rows of that block of columns.
294*/
295 for( k = 0; k < mn; k += nb )
296 {
297 kb = mn - k; ktmp = k + ( kb = MIN( kb, nb ) );
298 PB_Cplapd2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, BETA, Aptr, k, k, Ad0 );
299 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
300 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
301 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
302 if( ( Amp0 = Amp - Akp ) > 0 )
303 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp0, &Anq0, &izero,
304 ALPHA, ALPHA, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald );
305 }
306 }
307 else if( Mupcase( UPLO[0] ) == CUPPER )
308 {
309/*
310* Upper triangle of sub( A ): proceed by block of columns. For each block of
311* columns, operate on the trailing rows and then the logical diagonal block
312* of that block of columns. When M < N, the last columns of sub( A ) are
313* handled together.
314*/
315 for( k = 0; k < mn; k += nb )
316 {
317 kb = mn - k; kb = MIN( kb, nb );
318 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
319 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
320 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
321 if( Akp > 0 )
322 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Akp, &Anq0, &izero,
323 ALPHA, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
324 PB_Cplapd2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, BETA, Aptr, k, k, Ad0 );
325 }
326 if( ( Anq -= ( Akq += Anq0 ) ) > 0 )
327 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &Anq, &izero, ALPHA,
328 ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
329 }
330 else
331 {
332/*
333* All of sub( A ): proceed by block of columns. For each block of columns,
334* operate on the trailing rows, then the logical diagonal block, and finally
335* the remaining rows of that block of columns. When M < N, the last columns
336* of sub( A ) are handled together.
337*/
338 for( k = 0; k < mn; k += nb )
339 {
340 kb = mn - k; kb = MIN( kb, nb );
341 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
342 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
343 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
344 if( Akp > 0 )
345 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Akp, &Anq0, &izero,
346 ALPHA, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
347 PB_Cplapd2( TYPE, UPLO, NOCONJG, kb, kb, ALPHA, BETA, Aptr, k, k,
348 Ad0 );
349 Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow );
350 if( ( Amp0 = Amp - Akp ) > 0 )
351 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp0, &Anq0, &izero,
352 ALPHA, ALPHA, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald );
353 }
354 if( ( Anq -= ( Akq += Anq0 ) ) > 0 )
355 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &Anq, &izero, ALPHA,
356 ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
357 }
358/*
359* End of PB_Cplapad
360*/
361}
#define Int
Definition Bconfig.h:22
#define SREAL
Definition pblas.h:473
#define REAL_PART
Definition pblas.h:139
#define SCPLX
Definition pblas.h:475
#define C2F_CHAR(a)
Definition pblas.h:125
#define DREAL
Definition pblas.h:474
#define DCPLX
Definition pblas.h:476
#define IMAG_PART
Definition pblas.h:140
F_VOID_FCT(* TZPAD_T)()
Definition pblas.h:292
void Cblacs_gridinfo()
#define ALL
Definition PBblas.h:50
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define CALL
Definition PBblas.h:24
#define CLOWER
Definition PBblas.h:25
#define pilaenv_
Definition PBpblas.h:44
#define CTXT_
Definition PBtools.h:38
void PB_Cplapd2()
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
Int PB_Cnumroc()
void PB_Cplapad()
Int PB_Clcm()
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
void PB_Cdescribe()
#define TYPE
Definition clamov.c:7