SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_Cplascal.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_Cplascal( PBTYP_T * TYPE, char * UPLO, char * CONJUG, Int M,
21 Int N, char * ALPHA, char * A, Int IA, Int JA,
22 Int * DESCA )
23#else
24void PB_Cplascal( TYPE, UPLO, CONJUG, M, N, ALPHA, A, IA, JA, DESCA )
25/*
26* .. Scalar Arguments ..
27*/
28 char * CONJUG, * UPLO;
29 Int IA, JA, M, N;
30 char * ALPHA;
31 PBTYP_T * TYPE;
32/*
33* .. Array Arguments ..
34*/
35 Int * DESCA;
36 char * A;
37#endif
38{
39/*
40* Purpose
41* =======
42*
43* PB_Cplascal scales by alpha an m by n submatrix sub( A ) denoting
44* A(IA:IA+M-1,JA:JA+N-1).
45*
46* Notes
47* =====
48*
49* A description vector is associated with each 2D block-cyclicly dis-
50* tributed matrix. This vector stores the information required to
51* establish the mapping between a matrix entry and its corresponding
52* process and memory location.
53*
54* In the following comments, the character _ should be read as
55* "of the distributed matrix". Let A be a generic term for any 2D
56* block cyclicly distributed matrix. Its description vector is DESC_A:
57*
58* NOTATION STORED IN EXPLANATION
59* ---------------- --------------- ------------------------------------
60* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
61* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
62* the NPROW x NPCOL BLACS process grid
63* A is distributed over. The context
64* itself is global, but the handle
65* (the integer value) may vary.
66* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
67* ted matrix A, M_A >= 0.
68* N_A (global) DESCA[ N_ ] The number of columns in the distri-
69* buted matrix A, N_A >= 0.
70* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
71* block of the matrix A, IMB_A > 0.
72* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
73* left block of the matrix A,
74* INB_A > 0.
75* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
76* bute the last M_A-IMB_A rows of A,
77* MB_A > 0.
78* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
79* bute the last N_A-INB_A columns of
80* A, NB_A > 0.
81* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
82* row of the matrix A is distributed,
83* NPROW > RSRC_A >= 0.
84* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
85* first column of A is distributed.
86* NPCOL > CSRC_A >= 0.
87* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
88* array storing the local blocks of
89* the distributed matrix A,
90* IF( Lc( 1, N_A ) > 0 )
91* LLD_A >= MAX( 1, Lr( 1, M_A ) )
92* ELSE
93* LLD_A >= 1.
94*
95* Let K be the number of rows of a matrix A starting at the global in-
96* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
97* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
98* receive if these K rows were distributed over NPROW processes. If K
99* is the number of columns of a matrix A starting at the global index
100* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
101* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
102* these K columns were distributed over NPCOL processes.
103*
104* The values of Lr() and Lc() may be determined via a call to the func-
105* tion PB_Cnumroc:
106* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
107* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
108*
109* Arguments
110* =========
111*
112* TYPE (local input) pointer to a PBTYP_T structure
113* On entry, TYPE is a pointer to a structure of type PBTYP_T,
114* that contains type information (See pblas.h).
115*
116* UPLO (global input) pointer to CHAR
117* On entry, UPLO specifies the part of the submatrix sub( A )
118* to be scaled as follows:
119* = 'L' or 'l': Lower triangular part is scaled; the
120* strictly upper triangular part of sub( A ) is not changed;
121* = 'U' or 'u': Upper triangular part is scaled; the
122* strictly lower triangular part of sub( A ) is not changed;
123* Otherwise: All of the submatrix sub( A ) is scaled.
124*
125* CONJUG (global input) pointer to CHAR
126* On entry, CONJUG specifies what kind of scaling should be
127* done as follows: when UPLO is 'L', 'l', 'U' or 'u' and CONJUG
128* is 'Z' or 'z', alpha is assumed to be real and the imaginary
129* part of the diagonals are set to zero. Otherwise, alpha is of
130* the same type as the entries of sub( A ) and nothing particu-
131* lar is done to the diagonals of sub( A ).
132*
133* M (global input) INTEGER
134* On entry, M specifies the number of rows of the submatrix
135* sub( A ). M must be at least zero.
136*
137* N (global input) INTEGER
138* On entry, N specifies the number of columns of the submatrix
139* sub( A ). N must be at least zero.
140*
141* ALPHA (global input) pointer to CHAR
142* On entry, ALPHA specifies the scalar alpha, i.e., the cons-
143* tant with which the matrix elements are to be scaled.
144*
145* A (local input/local output) pointer to CHAR
146* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
147* at least Lc( 1, JA+N-1 ). Before entry, this array contains
148* the local entries of the matrix A to be scaled. On exit, the
149* local entries of this array corresponding to the to the en-
150* tries of the submatrix sub( A ) are overwritten by the local
151* entries of the m by n scaled submatrix.
152*
153* IA (global input) INTEGER
154* On entry, IA specifies A's global row index, which points to
155* the beginning of the submatrix sub( A ).
156*
157* JA (global input) INTEGER
158* On entry, JA specifies A's global column index, which points
159* to the beginning of the submatrix sub( A ).
160*
161* DESCA (global and local input) INTEGER array
162* On entry, DESCA is an integer array of dimension DLEN_. This
163* is the array descriptor for the matrix A.
164*
165* -- Written on April 1, 1998 by
166* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
167*
168* ---------------------------------------------------------------------
169*/
170/*
171* .. Local Scalars ..
172*/
173 char UploA, herm, type;
174 Int Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Akp, Akq, Ald, Amb, Amp,
175 Amp0, Anb, Anq, Anq0, ctxt, izero=0, k, kb, ktmp, mn, mycol,
176 myrow, nb, npcol, nprow, size;
177 TZSCAL_T scal;
178/*
179* .. Local Arrays ..
180*/
181 Int Ad0[DLEN_];
182 char * Aptr = NULL;
183/* ..
184* .. Executable Statements ..
185*
186*/
187/*
188* Quick return if possible
189*/
190 if( ( M <= 0 ) || ( N <= 0 ) ) return;
191/*
192* If alpha is zero, then call PB_Cplapad instead.
193*/
194 type = TYPE->type;
195 UploA = Mupcase( UPLO[0] );
196 herm = ( UploA == CALL ? CNOCONJG : Mupcase( CONJUG[0] ) );
197
198 if( type == SREAL )
199 {
200 if( ((float*)(ALPHA))[REAL_PART] == ZERO )
201 {
202 PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A, IA,
203 JA, DESCA );
204 return;
205 }
206 else if( ((float*)(ALPHA))[REAL_PART] == ONE ) return;
207 }
208 else if( type == DREAL )
209 {
210 if( ((double*)(ALPHA))[REAL_PART] == ZERO )
211 {
212 PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A, IA,
213 JA, DESCA );
214 return;
215 }
216 else if( ((double*)(ALPHA))[REAL_PART] == ONE ) return;
217 }
218 else if( type == SCPLX )
219 {
220 if( herm == CCONJG )
221 {
222 if( ((float*)(ALPHA))[REAL_PART] == ZERO )
223 {
224 PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A,
225 IA, JA, DESCA );
226 return;
227 }
228 }
229 else
230 {
231 if( ((float*)(ALPHA))[IMAG_PART] == ZERO )
232 {
233 if( ((float*)(ALPHA))[REAL_PART] == ZERO )
234 {
235 PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A,
236 IA, JA, DESCA );
237 return;
238 }
239 else if( ((float*)(ALPHA))[REAL_PART] == ONE ) return;
240 }
241 }
242 }
243 else if( type == DCPLX )
244 {
245 if( herm == CCONJG )
246 {
247 if( ((double*)(ALPHA))[REAL_PART] == ZERO )
248 {
249 PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A,
250 IA, JA, DESCA );
251 return;
252 }
253 }
254 else
255 {
256 if( ((double*)(ALPHA))[IMAG_PART] == ZERO )
257 {
258 if( ((double*)(ALPHA))[REAL_PART] == ZERO )
259 {
260 PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A,
261 IA, JA, DESCA );
262 return;
263 }
264 else if( ((double*)(ALPHA))[REAL_PART] == ONE ) return;
265 }
266 }
267 }
268/*
269* Retrieve process grid information
270*/
271 Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
272/*
273* Compute descriptor Ad0 for sub( A )
274*/
275 PB_Cdescribe( M, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
276 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
277/*
278* Quick return if I don't own any of sub( A ).
279*/
280 Amp = PB_Cnumroc( M, 0, Aimb1, Amb, myrow, Arow, nprow );
281 Anq = PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol );
282 if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return;
283
284 size = TYPE->size;
285 scal = ( herm == CCONJG ? TYPE->Fhescal : TYPE->Ftzscal );
286 Aptr = Mptr( A, Aii, Ajj, Ald, size );
287/*
288* When the entire sub( A ) needs to be scaled or when sub( A ) is replicated in
289* all processes, just call the local routine.
290*/
291 if( ( Mupcase( UPLO[0] ) == CALL ) ||
292 ( ( ( Arow < 0 ) || ( nprow == 1 ) ) &&
293 ( ( Acol < 0 ) || ( npcol == 1 ) ) ) )
294 {
295 scal( C2F_CHAR( UPLO ), &Amp, &Anq, &izero, ALPHA, Aptr, &Ald );
296 return;
297 }
298/*
299* Computational partitioning size is computed as the product of the logical
300* value returned by pilaenv_ and two times the least common multiple of nprow
301* and npcol.
302*/
303 nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type ) ) *
304 PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
305
306 mn = MIN( M, N );
307
308 if( Mupcase( UPLO[0] ) == CLOWER )
309 {
310/*
311* Lower triangle of sub( A ): proceed by block of columns. For each block of
312* columns, operate on the logical diagonal block first and then the remaining
313* rows of that block of columns.
314*/
315 for( k = 0; k < mn; k += nb )
316 {
317 kb = mn - k; ktmp = k + ( kb = MIN( kb, nb ) );
318 PB_Cplasca2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, Aptr, k, k, Ad0 );
319 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
320 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
321 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
322 if( ( Amp0 = Amp - Akp ) > 0 )
323 scal( C2F_CHAR( ALL ), &Amp0, &Anq0, &izero, ALPHA, Mptr( Aptr,
324 Akp, Akq, Ald, size ), &Ald );
325 }
326 }
327 else if( Mupcase( UPLO[0] ) == CUPPER )
328 {
329/*
330* Upper triangle of sub( A ): proceed by block of columns. For each block of
331* columns, operate on the trailing rows and then the logical diagonal block
332* of that block of columns. When M < N, the last columns of sub( A ) are
333* handled together.
334*/
335 for( k = 0; k < mn; k += nb )
336 {
337 kb = mn - k; kb = MIN( kb, nb );
338 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
339 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
340 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
341 if( Akp > 0 )
342 scal( C2F_CHAR( ALL ), &Akp, &Anq0, &izero, ALPHA, Mptr( Aptr,
343 0, Akq, Ald, size ), &Ald );
344 PB_Cplasca2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, Aptr, k, k, Ad0 );
345 }
346 if( ( Anq -= ( Akq += Anq0 ) ) > 0 )
347 scal( C2F_CHAR( ALL ), &Amp, &Anq, &izero, ALPHA, Mptr( Aptr, 0,
348 Akq, Ald, size ), &Ald );
349 }
350 else
351 {
352/*
353* All of sub( A ): proceed by block of columns. For each block of columns,
354* operate on the trailing rows, then the logical diagonal block, and finally
355* the remaining rows of that block of columns. When M < N, the last columns
356* of sub( A ) are handled together.
357*/
358 for( k = 0; k < mn; k += nb )
359 {
360 kb = mn - k; kb = MIN( kb, nb );
361 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
362 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
363 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
364 if( Akp > 0 )
365 scal( C2F_CHAR( ALL ), &Akp, &Anq0, &izero, ALPHA, Mptr( Aptr,
366 0, Akq, Ald, size ), &Ald );
367 PB_Cplasca2( TYPE, UPLO, NOCONJG, kb, kb, ALPHA, Aptr, k, k, Ad0 );
368 Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow );
369 if( ( Amp0 = Amp - Akp ) > 0 )
370 scal( C2F_CHAR( ALL ), &Amp0, &Anq0, &izero, ALPHA, Mptr( Aptr,
371 Akp, Akq, Ald, size ), &Ald );
372 }
373 if( ( Anq -= ( Akq += Anq0 ) ) > 0 )
374 scal( C2F_CHAR( ALL ), &Amp, &Anq, &izero, ALPHA, Mptr( Aptr, 0,
375 Akq, Ald, size ), &Ald );
376 }
377/*
378* End of PB_Cplascal
379*/
380}
#define Int
Definition Bconfig.h:22
#define SREAL
Definition pblas.h:473
#define REAL_PART
Definition pblas.h:139
#define SCPLX
Definition pblas.h:475
#define C2F_CHAR(a)
Definition pblas.h:125
#define DREAL
Definition pblas.h:474
#define DCPLX
Definition pblas.h:476
#define IMAG_PART
Definition pblas.h:140
F_VOID_FCT(* TZSCAL_T)()
Definition pblas.h:295
void Cblacs_gridinfo()
#define CCONJG
Definition PBblas.h:21
#define ALL
Definition PBblas.h:50
#define CNOCONJG
Definition PBblas.h:19
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define CALL
Definition PBblas.h:24
#define CLOWER
Definition PBblas.h:25
#define pilaenv_
Definition PBpblas.h:44
#define CTXT_
Definition PBtools.h:38
#define ONE
Definition PBtools.h:64
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
Int PB_Cnumroc()
void PB_Cplapad()
void PB_Cplascal()
void PB_Cplasca2()
Int PB_Clcm()
#define ZERO
Definition PBtools.h:66
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
void PB_Cdescribe()
#define TYPE
Definition clamov.c:7
char type
Definition pblas.h:331