SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pssyr2_.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "pblas.h"
14#include "PBpblas.h"
15#include "PBtools.h"
16#include "PBblacs.h"
17#include "PBblas.h"
18
19#ifdef __STDC__
20void pssyr2_( F_CHAR_T UPLO, Int * N, float * ALPHA,
21 float * X, Int * IX, Int * JX, Int * DESCX, Int * INCX,
22 float * Y, Int * IY, Int * JY, Int * DESCY, Int * INCY,
23 float * A, Int * IA, Int * JA, Int * DESCA )
24#else
25void pssyr2_( UPLO, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY,
26 DESCY, INCY, A, IA, JA, DESCA )
27/*
28* .. Scalar Arguments ..
29*/
30 F_CHAR_T UPLO;
31 Int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
32 * N;
33 float * ALPHA;
34/*
35* .. Array Arguments ..
36*/
37 Int * DESCA, * DESCX, * DESCY;
38 float * A, * X, * Y;
39#endif
40{
41/*
42* Purpose
43* =======
44*
45* PSSYR2 performs the symmetric rank 2 operation
46*
47* sub( A ) := alpha*sub( X )*sub( Y )' +
48* alpha*sub( Y )*sub( X )' + sub( A ) ,
49*
50* where
51*
52* sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1),
53*
54* sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
55* X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and,
56*
57* sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
58* Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
59*
60* Alpha is a scalar, sub( X ) and sub( Y ) are n element subvectors and
61* sub( A ) is an n by n symmetric submatrix.
62*
63* Notes
64* =====
65*
66* A description vector is associated with each 2D block-cyclicly dis-
67* tributed matrix. This vector stores the information required to
68* establish the mapping between a matrix entry and its corresponding
69* process and memory location.
70*
71* In the following comments, the character _ should be read as
72* "of the distributed matrix". Let A be a generic term for any 2D
73* block cyclicly distributed matrix. Its description vector is DESC_A:
74*
75* NOTATION STORED IN EXPLANATION
76* ---------------- --------------- ------------------------------------
77* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
78* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
79* the NPROW x NPCOL BLACS process grid
80* A is distributed over. The context
81* itself is global, but the handle
82* (the integer value) may vary.
83* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
84* ted matrix A, M_A >= 0.
85* N_A (global) DESCA[ N_ ] The number of columns in the distri-
86* buted matrix A, N_A >= 0.
87* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
88* block of the matrix A, IMB_A > 0.
89* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
90* left block of the matrix A,
91* INB_A > 0.
92* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
93* bute the last M_A-IMB_A rows of A,
94* MB_A > 0.
95* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
96* bute the last N_A-INB_A columns of
97* A, NB_A > 0.
98* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
99* row of the matrix A is distributed,
100* NPROW > RSRC_A >= 0.
101* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
102* first column of A is distributed.
103* NPCOL > CSRC_A >= 0.
104* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
105* array storing the local blocks of
106* the distributed matrix A,
107* IF( Lc( 1, N_A ) > 0 )
108* LLD_A >= MAX( 1, Lr( 1, M_A ) )
109* ELSE
110* LLD_A >= 1.
111*
112* Let K be the number of rows of a matrix A starting at the global in-
113* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
114* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
115* receive if these K rows were distributed over NPROW processes. If K
116* is the number of columns of a matrix A starting at the global index
117* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
118* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
119* these K columns were distributed over NPCOL processes.
120*
121* The values of Lr() and Lc() may be determined via a call to the func-
122* tion PB_Cnumroc:
123* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
124* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
125*
126* Arguments
127* =========
128*
129* UPLO (global input) CHARACTER*1
130* On entry, UPLO specifies whether the local pieces of
131* the array A containing the upper or lower triangular part
132* of the symmetric submatrix sub( A ) are to be referenced as
133* follows:
134*
135* UPLO = 'U' or 'u' Only the local pieces corresponding to
136* the upper triangular part of the
137* symmetric submatrix sub( A ) are to be
138* referenced,
139*
140* UPLO = 'L' or 'l' Only the local pieces corresponding to
141* the lower triangular part of the
142* symmetric submatrix sub( A ) are to be
143* referenced.
144*
145* N (global input) INTEGER
146* On entry, N specifies the order of the submatrix sub( A ).
147* N must be at least zero.
148*
149* ALPHA (global input) REAL
150* On entry, ALPHA specifies the scalar alpha. When ALPHA is
151* supplied as zero then the local entries of the arrays X
152* and Y corresponding to the entries of the subvectors sub( X )
153* and sub( Y ) respectively need not be set on input.
154*
155* X (local input) REAL array
156* On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
157* is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
158* MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least
159* Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
160* Before entry, this array contains the local entries of the
161* matrix X.
162*
163* IX (global input) INTEGER
164* On entry, IX specifies X's global row index, which points to
165* the beginning of the submatrix sub( X ).
166*
167* JX (global input) INTEGER
168* On entry, JX specifies X's global column index, which points
169* to the beginning of the submatrix sub( X ).
170*
171* DESCX (global and local input) INTEGER array
172* On entry, DESCX is an integer array of dimension DLEN_. This
173* is the array descriptor for the matrix X.
174*
175* INCX (global input) INTEGER
176* On entry, INCX specifies the global increment for the
177* elements of X. Only two values of INCX are supported in
178* this version, namely 1 and M_X. INCX must not be zero.
179*
180* Y (local input) REAL array
181* On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
182* is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
183* MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least
184* Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
185* Before entry, this array contains the local entries of the
186* matrix Y.
187*
188* IY (global input) INTEGER
189* On entry, IY specifies Y's global row index, which points to
190* the beginning of the submatrix sub( Y ).
191*
192* JY (global input) INTEGER
193* On entry, JY specifies Y's global column index, which points
194* to the beginning of the submatrix sub( Y ).
195*
196* DESCY (global and local input) INTEGER array
197* On entry, DESCY is an integer array of dimension DLEN_. This
198* is the array descriptor for the matrix Y.
199*
200* INCY (global input) INTEGER
201* On entry, INCY specifies the global increment for the
202* elements of Y. Only two values of INCY are supported in
203* this version, namely 1 and M_Y. INCY must not be zero.
204*
205* A (local input/local output) REAL array
206* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
207* at least Lc( 1, JA+N-1 ). Before entry, this array contains
208* the local entries of the matrix A.
209* Before entry with UPLO = 'U' or 'u', this array contains
210* the local entries corresponding to the upper triangular part
211* of the symmetric submatrix sub( A ), and the local entries
212* corresponding to the strictly lower triangular of sub( A )
213* are not referenced. On exit, the upper triangular part of
214* sub( A ) is overwritten by the upper triangular part of the
215* updated submatrix.
216* Before entry with UPLO = 'L' or 'l', this array contains
217* the local entries corresponding to the lower triangular part
218* of the symmetric submatrix sub( A ), and the local entries
219* corresponding to the strictly upper triangular of sub( A )
220* are not referenced. On exit, the lower triangular part of
221* sub( A ) is overwritten by the lower triangular part of the
222* updated submatrix.
223*
224* IA (global input) INTEGER
225* On entry, IA specifies A's global row index, which points to
226* the beginning of the submatrix sub( A ).
227*
228* JA (global input) INTEGER
229* On entry, JA specifies A's global column index, which points
230* to the beginning of the submatrix sub( A ).
231*
232* DESCA (global and local input) INTEGER array
233* On entry, DESCA is an integer array of dimension DLEN_. This
234* is the array descriptor for the matrix A.
235*
236* -- Written on April 1, 1998 by
237* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
238*
239* ---------------------------------------------------------------------
240*/
241/*
242* .. Local Scalars ..
243*/
244 char UploA;
245 Int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb,
246 Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld,
247 Xi, Xj, YCfr, YCld, YRfr, YRld, Yi, Yj, ctxt, info, ione=1,
248 k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper;
249 PBTYP_T * type;
250/*
251* .. Local Arrays ..
252*/
253 Int Ad [DLEN_], Ad0 [DLEN_], XCd0[DLEN_], XRd0[DLEN_], Xd[DLEN_],
254 YCd0[DLEN_], YRd0[DLEN_], Yd [DLEN_];
255 char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL,
256 * YR = NULL;
257/* ..
258* .. Executable Statements ..
259*
260*/
261 upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
262 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
263 PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
264 PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
265#ifndef NO_ARGCHK
266/*
267* Test the input parameters
268*/
269 Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
270 if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) )
271 {
272 if( ( !upper ) && ( UploA != CLOWER ) )
273 {
274 PB_Cwarn( ctxt, __LINE__, "PSSYR2", "Illegal UPLO = %c\n", UploA );
275 info = -1;
276 }
277 PB_Cchkvec( ctxt, "PSSYR2", "X", *N, 2, Xi, Xj, Xd, *INCX, 7, &info );
278 PB_Cchkvec( ctxt, "PSSYR2", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info );
279 PB_Cchkmat( ctxt, "PSSYR2", "A", *N, 2, *N, 2, Ai, Aj, Ad, 17, &info );
280 }
281 if( info ) { PB_Cabort( ctxt, "PSSYR2", info ); return; }
282#endif
283/*
284* Quick return if possible
285*/
286 if( (*N == 0) || ( ALPHA[REAL_PART] == ZERO ) ) return;
287/*
288* Retrieve process grid information
289*/
290#ifdef NO_ARGCHK
291 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
292#endif
293/*
294* Get type structure
295*/
296 type = PB_Cstypeset();
297/*
298* Compute descriptor Ad0 for sub( A )
299*/
300 PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
301 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
302/*
303* Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by
304* sub( A )
305*/
306 if( *INCX == Xd[M_] )
307 {
308 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj,
309 Xd, ROW, &XR, XRd0, &XRfr );
310 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0,
311 XRd0, ROW, &XC, XCd0, &XCfr );
312 }
313 else
314 {
315 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj,
316 Xd, COLUMN, &XC, XCd0, &XCfr );
317 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0,
318 XCd0, COLUMN, &XR, XRd0, &XRfr );
319 }
320/*
321* Replicate sub( Y ) in process rows (YR) and process columns (YC) spanned by
322* sub( A )
323*/
324 if( *INCY == Yd[M_] )
325 {
326 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj,
327 Yd, ROW, &YR, YRd0, &YRfr );
328 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, YR, 0, 0,
329 YRd0, ROW, &YC, YCd0, &YCfr );
330 }
331 else
332 {
333 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj,
334 Yd, COLUMN, &YC, YCd0, &YCfr );
335 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, YC, 0, 0,
336 YCd0, COLUMN, &YR, YRd0, &YRfr );
337 }
338/*
339* Local rank-2 update if I own some data
340*/
341 Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
342 Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
343
344 if( ( Amp > 0 ) && ( Anq > 0 ) )
345 {
346 size = type->size;
347 Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
348
349 XCld = XCd0[LLD_]; YCld = YCd0[LLD_];
350 XRld = XRd0[LLD_]; YRld = YRd0[LLD_];
351/*
352* Computational partitioning size is computed as the product of the logical
353* value returned by pilaenv_ and 2 * lcm( nprow, npcol ).
354*/
355 nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) *
356 PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
357 if( upper )
358 {
359 for( k = 0; k < *N; k += nb )
360 {
361 kb = *N - k; kb = MIN( kb, nb );
362 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
363 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
364 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
365 if( Akp > 0 && Anq0 > 0 )
366 {
367 sger_( &Akp, &Anq0, ((char *) ALPHA), XC, &ione,
368 Mptr( YR, 0, Akq, YRld, size ), &YRld,
369 Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
370 sger_( &Akp, &Anq0, ((char *) ALPHA), YC, &ione,
371 Mptr( XR, 0, Akq, XRld, size ), &XRld,
372 Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
373 }
374 PB_Cpsyr2( type, UPPER, kb, 1, ((char *) ALPHA),
375 Mptr( XC, Akp, 0, XCld, size ), XCld,
376 Mptr( XR, 0, Akq, XRld, size ), XRld,
377 Mptr( YC, Akp, 0, YCld, size ), YCld,
378 Mptr( YR, 0, Akq, YRld, size ), YRld,
379 Aptr, k, k, Ad0, PB_Ctzsyr2 );
380 }
381 }
382 else
383 {
384 for( k = 0; k < *N; k += nb )
385 {
386 kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
387 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
388 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
389 PB_Cpsyr2( type, LOWER, kb, 1, ((char *) ALPHA),
390 Mptr( XC, Akp, 0, XCld, size ), XCld,
391 Mptr( XR, 0, Akq, XRld, size ), XRld,
392 Mptr( YC, Akp, 0, YCld, size ), YCld,
393 Mptr( YR, 0, Akq, YRld, size ), YRld,
394 Aptr, k, k, Ad0, PB_Ctzsyr2 );
395 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
396 Amp0 = Amp - Akp;
397 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
398 if( Amp0 > 0 && Anq0 > 0 )
399 {
400 sger_( &Amp0, &Anq0, ((char *) ALPHA),
401 Mptr( XC, Akp, 0, XCld, size ), &ione,
402 Mptr( YR, 0, Akq, YRld, size ), &YRld,
403 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald );
404 sger_( &Amp0, &Anq0, ((char *) ALPHA),
405 Mptr( YC, Akp, 0, YCld, size ), &ione,
406 Mptr( XR, 0, Akq, XRld, size ), &XRld,
407 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald );
408 }
409 }
410 }
411 }
412 if( XRfr ) free( XR );
413 if( XCfr ) free( XC );
414 if( YRfr ) free( YR );
415 if( YCfr ) free( YC );
416/*
417* End of PSSYR2
418*/
419}
#define Int
Definition Bconfig.h:22
#define REAL_PART
Definition pblas.h:139
#define F2C_CHAR(a)
Definition pblas.h:124
#define C2F_CHAR(a)
Definition pblas.h:125
char * F_CHAR_T
Definition pblas.h:122
#define COLUMN
Definition PBblacs.h:45
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define sger_
Definition PBblas.h:159
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define LOWER
Definition PBblas.h:51
#define UPPER
Definition PBblas.h:52
#define CLOWER
Definition PBblas.h:25
#define pssyr2_
Definition PBpblas.h:141
#define pilaenv_
Definition PBpblas.h:44
#define CTXT_
Definition PBtools.h:38
void PB_Cabort()
void PB_Cchkvec()
void PB_Cchkmat()
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
void PB_Cwarn()
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
void PB_CInV()
PBTYP_T * PB_Cstypeset()
#define M_
Definition PBtools.h:39
void PB_CargFtoC()
void PB_Ctzsyr2()
Int PB_Clcm()
#define ZERO
Definition PBtools.h:66
void PB_Cpsyr2()
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
void PB_Cdescribe()
char type
Definition pblas.h:331
Int size
Definition pblas.h:333