ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pssyr2_.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "pblas.h"
14 #include "PBpblas.h"
15 #include "PBtools.h"
16 #include "PBblacs.h"
17 #include "PBblas.h"
18 
19 #ifdef __STDC__
20 void pssyr2_( F_CHAR_T UPLO, int * N, float * ALPHA,
21  float * X, int * IX, int * JX, int * DESCX, int * INCX,
22  float * Y, int * IY, int * JY, int * DESCY, int * INCY,
23  float * A, int * IA, int * JA, int * DESCA )
24 #else
25 void pssyr2_( UPLO, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY,
26  DESCY, INCY, A, IA, JA, DESCA )
27 /*
28 * .. Scalar Arguments ..
29 */
30  F_CHAR_T UPLO;
31  int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
32  * N;
33  float * ALPHA;
34 /*
35 * .. Array Arguments ..
36 */
37  int * DESCA, * DESCX, * DESCY;
38  float * A, * X, * Y;
39 #endif
40 {
41 /*
42 * Purpose
43 * =======
44 *
45 * PSSYR2 performs the symmetric rank 2 operation
46 *
47 * sub( A ) := alpha*sub( X )*sub( Y )' +
48 * alpha*sub( Y )*sub( X )' + sub( A ) ,
49 *
50 * where
51 *
52 * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1),
53 *
54 * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
55 * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and,
56 *
57 * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
58 * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
59 *
60 * Alpha is a scalar, sub( X ) and sub( Y ) are n element subvectors and
61 * sub( A ) is an n by n symmetric submatrix.
62 *
63 * Notes
64 * =====
65 *
66 * A description vector is associated with each 2D block-cyclicly dis-
67 * tributed matrix. This vector stores the information required to
68 * establish the mapping between a matrix entry and its corresponding
69 * process and memory location.
70 *
71 * In the following comments, the character _ should be read as
72 * "of the distributed matrix". Let A be a generic term for any 2D
73 * block cyclicly distributed matrix. Its description vector is DESC_A:
74 *
75 * NOTATION STORED IN EXPLANATION
76 * ---------------- --------------- ------------------------------------
77 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
78 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
79 * the NPROW x NPCOL BLACS process grid
80 * A is distributed over. The context
81 * itself is global, but the handle
82 * (the integer value) may vary.
83 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
84 * ted matrix A, M_A >= 0.
85 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
86 * buted matrix A, N_A >= 0.
87 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
88 * block of the matrix A, IMB_A > 0.
89 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
90 * left block of the matrix A,
91 * INB_A > 0.
92 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
93 * bute the last M_A-IMB_A rows of A,
94 * MB_A > 0.
95 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
96 * bute the last N_A-INB_A columns of
97 * A, NB_A > 0.
98 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
99 * row of the matrix A is distributed,
100 * NPROW > RSRC_A >= 0.
101 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
102 * first column of A is distributed.
103 * NPCOL > CSRC_A >= 0.
104 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
105 * array storing the local blocks of
106 * the distributed matrix A,
107 * IF( Lc( 1, N_A ) > 0 )
108 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
109 * ELSE
110 * LLD_A >= 1.
111 *
112 * Let K be the number of rows of a matrix A starting at the global in-
113 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
114 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
115 * receive if these K rows were distributed over NPROW processes. If K
116 * is the number of columns of a matrix A starting at the global index
117 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
118 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
119 * these K columns were distributed over NPCOL processes.
120 *
121 * The values of Lr() and Lc() may be determined via a call to the func-
122 * tion PB_Cnumroc:
123 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
124 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
125 *
126 * Arguments
127 * =========
128 *
129 * UPLO (global input) CHARACTER*1
130 * On entry, UPLO specifies whether the local pieces of
131 * the array A containing the upper or lower triangular part
132 * of the symmetric submatrix sub( A ) are to be referenced as
133 * follows:
134 *
135 * UPLO = 'U' or 'u' Only the local pieces corresponding to
136 * the upper triangular part of the
137 * symmetric submatrix sub( A ) are to be
138 * referenced,
139 *
140 * UPLO = 'L' or 'l' Only the local pieces corresponding to
141 * the lower triangular part of the
142 * symmetric submatrix sub( A ) are to be
143 * referenced.
144 *
145 * N (global input) INTEGER
146 * On entry, N specifies the order of the submatrix sub( A ).
147 * N must be at least zero.
148 *
149 * ALPHA (global input) REAL
150 * On entry, ALPHA specifies the scalar alpha. When ALPHA is
151 * supplied as zero then the local entries of the arrays X
152 * and Y corresponding to the entries of the subvectors sub( X )
153 * and sub( Y ) respectively need not be set on input.
154 *
155 * X (local input) REAL array
156 * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
157 * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
158 * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least
159 * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
160 * Before entry, this array contains the local entries of the
161 * matrix X.
162 *
163 * IX (global input) INTEGER
164 * On entry, IX specifies X's global row index, which points to
165 * the beginning of the submatrix sub( X ).
166 *
167 * JX (global input) INTEGER
168 * On entry, JX specifies X's global column index, which points
169 * to the beginning of the submatrix sub( X ).
170 *
171 * DESCX (global and local input) INTEGER array
172 * On entry, DESCX is an integer array of dimension DLEN_. This
173 * is the array descriptor for the matrix X.
174 *
175 * INCX (global input) INTEGER
176 * On entry, INCX specifies the global increment for the
177 * elements of X. Only two values of INCX are supported in
178 * this version, namely 1 and M_X. INCX must not be zero.
179 *
180 * Y (local input) REAL array
181 * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
182 * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
183 * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least
184 * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
185 * Before entry, this array contains the local entries of the
186 * matrix Y.
187 *
188 * IY (global input) INTEGER
189 * On entry, IY specifies Y's global row index, which points to
190 * the beginning of the submatrix sub( Y ).
191 *
192 * JY (global input) INTEGER
193 * On entry, JY specifies Y's global column index, which points
194 * to the beginning of the submatrix sub( Y ).
195 *
196 * DESCY (global and local input) INTEGER array
197 * On entry, DESCY is an integer array of dimension DLEN_. This
198 * is the array descriptor for the matrix Y.
199 *
200 * INCY (global input) INTEGER
201 * On entry, INCY specifies the global increment for the
202 * elements of Y. Only two values of INCY are supported in
203 * this version, namely 1 and M_Y. INCY must not be zero.
204 *
205 * A (local input/local output) REAL array
206 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
207 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
208 * the local entries of the matrix A.
209 * Before entry with UPLO = 'U' or 'u', this array contains
210 * the local entries corresponding to the upper triangular part
211 * of the symmetric submatrix sub( A ), and the local entries
212 * corresponding to the strictly lower triangular of sub( A )
213 * are not referenced. On exit, the upper triangular part of
214 * sub( A ) is overwritten by the upper triangular part of the
215 * updated submatrix.
216 * Before entry with UPLO = 'L' or 'l', this array contains
217 * the local entries corresponding to the lower triangular part
218 * of the symmetric submatrix sub( A ), and the local entries
219 * corresponding to the strictly upper triangular of sub( A )
220 * are not referenced. On exit, the lower triangular part of
221 * sub( A ) is overwritten by the lower triangular part of the
222 * updated submatrix.
223 *
224 * IA (global input) INTEGER
225 * On entry, IA specifies A's global row index, which points to
226 * the beginning of the submatrix sub( A ).
227 *
228 * JA (global input) INTEGER
229 * On entry, JA specifies A's global column index, which points
230 * to the beginning of the submatrix sub( A ).
231 *
232 * DESCA (global and local input) INTEGER array
233 * On entry, DESCA is an integer array of dimension DLEN_. This
234 * is the array descriptor for the matrix A.
235 *
236 * -- Written on April 1, 1998 by
237 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
238 *
239 * ---------------------------------------------------------------------
240 */
241 /*
242 * .. Local Scalars ..
243 */
244  char UploA;
245  int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb,
246  Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld,
247  Xi, Xj, YCfr, YCld, YRfr, YRld, Yi, Yj, ctxt, info, ione=1,
248  k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper;
249  PBTYP_T * type;
250 /*
251 * .. Local Arrays ..
252 */
253  int Ad [DLEN_], Ad0 [DLEN_], XCd0[DLEN_], XRd0[DLEN_], Xd[DLEN_],
254  YCd0[DLEN_], YRd0[DLEN_], Yd [DLEN_];
255  char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL,
256  * YR = NULL;
257 /* ..
258 * .. Executable Statements ..
259 *
260 */
261  upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
262  PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
263  PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
264  PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
265 #ifndef NO_ARGCHK
266 /*
267 * Test the input parameters
268 */
269  Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
270  if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) )
271  {
272  if( ( !upper ) && ( UploA != CLOWER ) )
273  {
274  PB_Cwarn( ctxt, __LINE__, "PSSYR2", "Illegal UPLO = %c\n", UploA );
275  info = -1;
276  }
277  PB_Cchkvec( ctxt, "PSSYR2", "X", *N, 2, Xi, Xj, Xd, *INCX, 7, &info );
278  PB_Cchkvec( ctxt, "PSSYR2", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info );
279  PB_Cchkmat( ctxt, "PSSYR2", "A", *N, 2, *N, 2, Ai, Aj, Ad, 17, &info );
280  }
281  if( info ) { PB_Cabort( ctxt, "PSSYR2", info ); return; }
282 #endif
283 /*
284 * Quick return if possible
285 */
286  if( (*N == 0) || ( ALPHA[REAL_PART] == ZERO ) ) return;
287 /*
288 * Retrieve process grid information
289 */
290 #ifdef NO_ARGCHK
291  Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
292 #endif
293 /*
294 * Get type structure
295 */
296  type = PB_Cstypeset();
297 /*
298 * Compute descriptor Ad0 for sub( A )
299 */
300  PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
301  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
302 /*
303 * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by
304 * sub( A )
305 */
306  if( *INCX == Xd[M_] )
307  {
308  PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj,
309  Xd, ROW, &XR, XRd0, &XRfr );
310  PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0,
311  XRd0, ROW, &XC, XCd0, &XCfr );
312  }
313  else
314  {
315  PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj,
316  Xd, COLUMN, &XC, XCd0, &XCfr );
317  PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0,
318  XCd0, COLUMN, &XR, XRd0, &XRfr );
319  }
320 /*
321 * Replicate sub( Y ) in process rows (YR) and process columns (YC) spanned by
322 * sub( A )
323 */
324  if( *INCY == Yd[M_] )
325  {
326  PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj,
327  Yd, ROW, &YR, YRd0, &YRfr );
328  PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, YR, 0, 0,
329  YRd0, ROW, &YC, YCd0, &YCfr );
330  }
331  else
332  {
333  PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj,
334  Yd, COLUMN, &YC, YCd0, &YCfr );
335  PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, YC, 0, 0,
336  YCd0, COLUMN, &YR, YRd0, &YRfr );
337  }
338 /*
339 * Local rank-2 update if I own some data
340 */
341  Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
342  Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
343 
344  if( ( Amp > 0 ) && ( Anq > 0 ) )
345  {
346  size = type->size;
347  Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
348 
349  XCld = XCd0[LLD_]; YCld = YCd0[LLD_];
350  XRld = XRd0[LLD_]; YRld = YRd0[LLD_];
351 /*
352 * Computational partitioning size is computed as the product of the logical
353 * value returned by pilaenv_ and 2 * lcm( nprow, npcol ).
354 */
355  nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) *
356  PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
357  if( upper )
358  {
359  for( k = 0; k < *N; k += nb )
360  {
361  kb = *N - k; kb = MIN( kb, nb );
362  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
363  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
364  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
365  if( Akp > 0 && Anq0 > 0 )
366  {
367  sger_( &Akp, &Anq0, ((char *) ALPHA), XC, &ione,
368  Mptr( YR, 0, Akq, YRld, size ), &YRld,
369  Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
370  sger_( &Akp, &Anq0, ((char *) ALPHA), YC, &ione,
371  Mptr( XR, 0, Akq, XRld, size ), &XRld,
372  Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
373  }
374  PB_Cpsyr2( type, UPPER, kb, 1, ((char *) ALPHA),
375  Mptr( XC, Akp, 0, XCld, size ), XCld,
376  Mptr( XR, 0, Akq, XRld, size ), XRld,
377  Mptr( YC, Akp, 0, YCld, size ), YCld,
378  Mptr( YR, 0, Akq, YRld, size ), YRld,
379  Aptr, k, k, Ad0, PB_Ctzsyr2 );
380  }
381  }
382  else
383  {
384  for( k = 0; k < *N; k += nb )
385  {
386  kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
387  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
388  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
389  PB_Cpsyr2( type, LOWER, kb, 1, ((char *) ALPHA),
390  Mptr( XC, Akp, 0, XCld, size ), XCld,
391  Mptr( XR, 0, Akq, XRld, size ), XRld,
392  Mptr( YC, Akp, 0, YCld, size ), YCld,
393  Mptr( YR, 0, Akq, YRld, size ), YRld,
394  Aptr, k, k, Ad0, PB_Ctzsyr2 );
395  Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
396  Amp0 = Amp - Akp;
397  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
398  if( Amp0 > 0 && Anq0 > 0 )
399  {
400  sger_( &Amp0, &Anq0, ((char *) ALPHA),
401  Mptr( XC, Akp, 0, XCld, size ), &ione,
402  Mptr( YR, 0, Akq, YRld, size ), &YRld,
403  Mptr( Aptr, Akp, Akq, Ald, size ), &Ald );
404  sger_( &Amp0, &Anq0, ((char *) ALPHA),
405  Mptr( YC, Akp, 0, YCld, size ), &ione,
406  Mptr( XR, 0, Akq, XRld, size ), &XRld,
407  Mptr( Aptr, Akp, Akq, Ald, size ), &Ald );
408  }
409  }
410  }
411  }
412  if( XRfr ) free( XR );
413  if( XCfr ) free( XC );
414  if( YRfr ) free( YR );
415  if( YCfr ) free( YC );
416 /*
417 * End of PSSYR2
418 */
419 }
M_
#define M_
Definition: PBtools.h:39
ROW
#define ROW
Definition: PBblacs.h:46
PB_Cwarn
void PB_Cwarn()
COLUMN
#define COLUMN
Definition: PBblacs.h:45
PBblacs.h
PBtools.h
PBblas.h
NOCONJG
#define NOCONJG
Definition: PBblas.h:45
PB_Ctzsyr2
void PB_Ctzsyr2()
REAL_PART
#define REAL_PART
Definition: pblas.h:135
PBTYP_T::type
char type
Definition: pblas.h:327
PBpblas.h
DLEN_
#define DLEN_
Definition: PBtools.h:48
LLD_
#define LLD_
Definition: PBtools.h:47
PB_Cpsyr2
void PB_Cpsyr2()
PB_Cdescribe
void PB_Cdescribe()
F_CHAR_T
char * F_CHAR_T
Definition: pblas.h:118
ZERO
#define ZERO
Definition: PBtools.h:66
pssyr2_
void pssyr2_(F_CHAR_T UPLO, int *N, float *ALPHA, float *X, int *IX, int *JX, int *DESCX, int *INCX, float *Y, int *IY, int *JY, int *DESCY, int *INCY, float *A, int *IA, int *JA, int *DESCA)
Definition: pssyr2_.c:25
PB_Cchkvec
void PB_Cchkvec()
UPPER
#define UPPER
Definition: PBblas.h:52
pilaenv_
int pilaenv_()
PB_Cabort
void PB_Cabort()
CLOWER
#define CLOWER
Definition: PBblas.h:25
F2C_CHAR
#define F2C_CHAR(a)
Definition: pblas.h:120
PB_CargFtoC
void PB_CargFtoC()
PBTYP_T::size
int size
Definition: pblas.h:329
PB_Cchkmat
void PB_Cchkmat()
PB_Cnumroc
int PB_Cnumroc()
PB_Cstypeset
PBTYP_T * PB_Cstypeset()
Definition: PB_Cstypeset.c:19
PB_CInV
void PB_CInV()
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
LOWER
#define LOWER
Definition: PBblas.h:51
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
pblas.h
CUPPER
#define CUPPER
Definition: PBblas.h:26
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CTXT_
#define CTXT_
Definition: PBtools.h:38
sger_
F_VOID_FCT sger_()
PB_Clcm
int PB_Clcm()