ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_CpsyrkA.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 void PB_CpsyrkA( PBTYP_T * TYPE, char * DIRECA, char * CONJUG,
21  char * UPLO, char * TRANS, int N, int K, char * ALPHA,
22  char * A, int IA, int JA, int * DESCA, char * BETA,
23  char * C, int IC, int JC, int * DESCC )
24 #else
25 void PB_CpsyrkA( TYPE, DIRECA, CONJUG, UPLO, TRANS, N, K, ALPHA, A, IA,
26  JA, DESCA, BETA, C, IC, JC, DESCC )
27 /*
28 * .. Scalar Arguments ..
29 */
30  char * CONJUG, * DIRECA, * TRANS, * UPLO;
31  int IA, IC, JA, JC, K, N;
32  char * ALPHA, * BETA;
33  PBTYP_T * TYPE;
34 /*
35 * .. Array Arguments ..
36 */
37  int * DESCA, * DESCC;
38  char * A, * C;
39 #endif
40 {
41 /*
42 * Purpose
43 * =======
44 *
45 * PB_CpsyrkA performs one of the following symmetric or Hermitian rank
46 * k operations
47 *
48 * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
49 * or
50 * sub( C ) := alpha*sub( A )*conjg( sub( A )' ) + beta*sub( C ),
51 * or
52 * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
53 * or
54 * sub( C ) := alpha*conjg( sub( A )' )*sub( A ) + beta*sub( C ),
55 *
56 * where
57 *
58 * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and,
59 *
60 * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N',
61 * A(IA:IA+K-1,JA:JA+N-1) otherwise.
62 *
63 * Alpha and beta are scalars, sub( C ) is an n by n symmetric
64 * or Hermitian submatrix and sub( A ) is an n by k submatrix in the
65 * first case and a k by n submatrix in the second case.
66 *
67 * This is the outer-product algorithm using the logical LCM hybrid
68 * and static blocking techniques. The submatrix operand sub( C ) stays
69 * in place.
70 *
71 * Notes
72 * =====
73 *
74 * A description vector is associated with each 2D block-cyclicly dis-
75 * tributed matrix. This vector stores the information required to
76 * establish the mapping between a matrix entry and its corresponding
77 * process and memory location.
78 *
79 * In the following comments, the character _ should be read as
80 * "of the distributed matrix". Let A be a generic term for any 2D
81 * block cyclicly distributed matrix. Its description vector is DESC_A:
82 *
83 * NOTATION STORED IN EXPLANATION
84 * ---------------- --------------- ------------------------------------
85 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
86 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
87 * the NPROW x NPCOL BLACS process grid
88 * A is distributed over. The context
89 * itself is global, but the handle
90 * (the integer value) may vary.
91 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
92 * ted matrix A, M_A >= 0.
93 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
94 * buted matrix A, N_A >= 0.
95 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
96 * block of the matrix A, IMB_A > 0.
97 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
98 * left block of the matrix A,
99 * INB_A > 0.
100 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
101 * bute the last M_A-IMB_A rows of A,
102 * MB_A > 0.
103 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
104 * bute the last N_A-INB_A columns of
105 * A, NB_A > 0.
106 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
107 * row of the matrix A is distributed,
108 * NPROW > RSRC_A >= 0.
109 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
110 * first column of A is distributed.
111 * NPCOL > CSRC_A >= 0.
112 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
113 * array storing the local blocks of
114 * the distributed matrix A,
115 * IF( Lc( 1, N_A ) > 0 )
116 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
117 * ELSE
118 * LLD_A >= 1.
119 *
120 * Let K be the number of rows of a matrix A starting at the global in-
121 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
122 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
123 * receive if these K rows were distributed over NPROW processes. If K
124 * is the number of columns of a matrix A starting at the global index
125 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
126 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
127 * these K columns were distributed over NPCOL processes.
128 *
129 * The values of Lr() and Lc() may be determined via a call to the func-
130 * tion PB_Cnumroc:
131 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
132 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
133 *
134 * Arguments
135 * =========
136 *
137 * TYPE (local input) pointer to a PBTYP_T structure
138 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
139 * that contains type information (See pblas.h).
140 *
141 * DIRECA (global input) pointer to CHAR
142 * On entry, DIRECA specifies the direction in which the rows
143 * or columns of sub( A ) should be looped over as follows:
144 * DIRECA = 'F' or 'f' forward or increasing,
145 * DIRECA = 'B' or 'b' backward or decreasing.
146 *
147 * CONJUG (global input) pointer to CHAR
148 * On entry, CONJUG specifies whether sub( C ) is a symmetric or
149 * Hermitian submatrix operand as follows:
150 * CONJUG = 'N' or 'n' sub( C ) is symmetric,
151 * CONJUG = 'Z' or 'z' sub( C ) is Hermitian.
152 *
153 * UPLO (global input) pointer to CHAR
154 * On entry, UPLO specifies whether the local pieces of
155 * the array C containing the upper or lower triangular part
156 * of the submatrix sub( C ) are to be referenced as follows:
157 * UPLO = 'U' or 'u' Only the local pieces corresponding to
158 * the upper triangular part of the
159 * submatrix sub( C ) are referenced,
160 * UPLO = 'L' or 'l' Only the local pieces corresponding to
161 * the lower triangular part of the
162 * submatrix sub( C ) are referenced.
163 *
164 * TRANS (global input) pointer to CHAR
165 * On entry, TRANS specifies the operation to be performed as
166 * follows:
167 *
168 * TRANS = 'N' or 'n'
169 * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
170 * or
171 * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
172 * or
173 * sub( C ) := alpha*sub( A )*conjg( sub( A )' ) +
174 * beta*sub( C ),
175 *
176 * TRANS = 'T' or 't'
177 * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
178 * or
179 * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
180 *
181 * TRANS = 'C' or 'c'
182 * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
183 * or
184 * sub( C ) := alpha*conjg( sub( A )' )*sub( A ) +
185 * beta*sub( C ).
186 *
187 * N (global input) INTEGER
188 * On entry, N specifies the order of the submatrix sub( C ).
189 * N must be at least zero.
190 *
191 * K (global input) INTEGER
192 * On entry, with TRANS = 'N' or 'n', K specifies the number of
193 * columns of the submatrix sub( A ), and with TRANS = 'T' or
194 * 't' or 'C' or 'c', K specifies the number of rows of the sub-
195 * matrix sub( A ). K must be at least zero.
196 *
197 * ALPHA (global input) pointer to CHAR
198 * On entry, ALPHA specifies the scalar alpha. When ALPHA is
199 * supplied as zero then the local entries of the array A
200 * corresponding to the entries of the submatrix sub( A ) need
201 * not be set on input.
202 *
203 * A (local input) pointer to CHAR
204 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
205 * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at
206 * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array
207 * contains the local entries of the matrix A.
208 * Before entry with TRANS = 'N' or 'n', this array contains the
209 * local entries corresponding to the entries of the n by k sub-
210 * matrix sub( A ), otherwise the local entries corresponding to
211 * the entries of the k by n submatrix sub( A ).
212 *
213 * IA (global input) INTEGER
214 * On entry, IA specifies A's global row index, which points to
215 * the beginning of the submatrix sub( A ).
216 *
217 * JA (global input) INTEGER
218 * On entry, JA specifies A's global column index, which points
219 * to the beginning of the submatrix sub( A ).
220 *
221 * DESCA (global and local input) INTEGER array
222 * On entry, DESCA is an integer array of dimension DLEN_. This
223 * is the array descriptor for the matrix A.
224 *
225 * BETA (global input) pointer to CHAR
226 * On entry, BETA specifies the scalar beta. When BETA is
227 * supplied as zero then the local entries of the array C
228 * corresponding to the entries of the submatrix sub( C ) need
229 * not be set on input.
230 *
231 * C (local input/local output) pointer to CHAR
232 * On entry, C is an array of dimension (LLD_C, Kc), where Kc is
233 * at least Lc( 1, JC+N-1 ). Before entry, this array contains
234 * the local entries of the matrix C.
235 * Before entry with UPLO = 'U' or 'u', this array contains
236 * the local entries corresponding to the upper triangular part
237 * of the symmetric or Hermitian submatrix sub( C ), and the
238 * local entries corresponding to the strictly lower triangular
239 * of sub( C ) are not referenced. On exit, the upper triangular
240 * part of sub( C ) is overwritten by the upper triangular part
241 * of the updated submatrix.
242 * Before entry with UPLO = 'L' or 'l', this array contains
243 * the local entries corresponding to the lower triangular part
244 * of the symmetric or Hermitian submatrix sub( C ), and the
245 * local entries corresponding to the strictly upper triangular
246 * of sub( C ) are not referenced. On exit, the lower triangular
247 * part of sub( C ) is overwritten by the lower triangular part
248 * of the updated submatrix.
249 * Note that the imaginary parts of the local entries corres-
250 * ponding to the diagonal elements of sub( C ) need not be
251 * set, they are assumed to be zero, and on exit they are set
252 * to zero.
253 *
254 * IC (global input) INTEGER
255 * On entry, IC specifies C's global row index, which points to
256 * the beginning of the submatrix sub( C ).
257 *
258 * JC (global input) INTEGER
259 * On entry, JC specifies C's global column index, which points
260 * to the beginning of the submatrix sub( C ).
261 *
262 * DESCC (global and local input) INTEGER array
263 * On entry, DESCC is an integer array of dimension DLEN_. This
264 * is the array descriptor for the matrix C.
265 *
266 * -- Written on April 1, 1998 by
267 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
268 *
269 * ---------------------------------------------------------------------
270 */
271 /*
272 * .. Local Scalars ..
273 */
274  char * one;
275  int AcurrocR, Afwd, AiD, AiR, AiiD, AiiR, AinbD, AinbR, Ainb1D,
276  Ainb1R, AisR, Ald, AmyprocD, AmyprocR, AnbD, AnbR, AnpR,
277  AnprocsD, AnprocsR, ArocD, ArocR, Arocs, AsrcR, Ccol, Cii,
278  Cimb1, Cinb1, Cjj, Clcmb, Cld, Clp, Clq, Cnq0, Cmb, Cmp,
279  Cmp0, Cnb, Cnq, Crow, WACfr, WACld, WACsum, WARfr, WARld,
280  WARsum, Wkbb=0, ctxt, k, kb, kbb, l, lb, ltmp, maxp, mycol,
281  myrow, notran, npcol, nprow, p=0, size, tmp, upper;
282  GEMM_T gemm;
283  TZSYR_T tzsyrk;
284 /*
285 * .. Local Arrays ..
286 */
287  int Cd0[DLEN_], DBUFA[DLEN_], WACd0[DLEN_], WARd0[DLEN_];
288  char * Aptr = NULL, * Cptr = NULL, * WAC = NULL, * WAR = NULL;
289 /* ..
290 * .. Executable Statements ..
291 *
292 */
293 /*
294 * sub( C ) := beta * sub( C )
295 */
296  PB_Cplascal( TYPE, UPLO, CONJUG, N, N, BETA, C, IC, JC, DESCC );
297 /*
298 * Retrieve process grid information
299 */
300  Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
301 
302  size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm;
303  kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) );
304 /*
305 * Compute descriptor Cd0 for sub( C )
306 */
307  PB_Cdescribe( N, N, IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj,
308  &Cld, &Cimb1, &Cinb1, &Cmb, &Cnb, &Crow, &Ccol, Cd0 );
309 
310  Cmp = PB_Cnumroc( N, 0, Cimb1, Cmb, myrow, Crow, nprow );
311  Cnq = PB_Cnumroc( N, 0, Cinb1, Cnb, mycol, Ccol, npcol );
312 
313  if( ( Cmp > 0 ) && ( Cnq > 0 ) )
314  {
315  Cptr = Mptr( C, Cii, Cjj, Cld, size );
316  tzsyrk = ( ( Mupcase( CONJUG[0] ) == CNOCONJG ) ? PB_Ctzsyrk :
317  PB_Ctzherk );
318 /*
319 * Computational partitioning size is computed as the product of the logical
320 * value returned by pilaenv_ and 2 * lcm( nprow, npcol ).
321 */
322  Clcmb = 2 * kb * PB_Clcm( ( Crow >= 0 ? nprow : 1 ),
323  ( Ccol >= 0 ? npcol : 1 ) );
324  }
325 /*
326 * Retrieve local information for sub( A )
327 */
328  if( ( notran = ( Mupcase( TRANS[0] ) == CNOTRAN ) ) != 0 )
329  {
330  AiR = JA; AnprocsR = npcol; AinbR = DESCA[INB_]; AnbR = DESCA[NB_];
331  AsrcR = DESCA[CSRC_];
332  }
333  else
334  {
335  AiR = IA; AnprocsR = nprow; AinbR = DESCA[IMB_]; AnbR = DESCA[MB_];
336  AsrcR = DESCA[RSRC_];
337  }
338 /*
339 * If sub( A ) only spans one process row or column, then there is no need to
340 * pack the data.
341 */
342  if( !( PB_Cspan( K, AiR, AinbR, AnbR, AsrcR, AnprocsR ) ) )
343  {
344 /*
345 * Replicate sub( A ) in process rows and columns spanned by sub( C ): WAC, WAR
346 */
347  if( notran )
348  {
349  PB_CInV( TYPE, NOCONJG, COLUMN, N, N, Cd0, K, A, IA, JA, DESCA,
350  COLUMN, &WAC, WACd0, &WACfr );
351  PB_CInV( TYPE, CONJUG, ROW, N, N, Cd0, K, WAC, 0, 0, WACd0,
352  COLUMN, &WAR, WARd0, &WARfr );
353  }
354  else
355  {
356  PB_CInV( TYPE, NOCONJG, ROW, N, N, Cd0, K, A, IA, JA, DESCA,
357  ROW, &WAR, WARd0, &WARfr );
358  PB_CInV( TYPE, CONJUG, COLUMN, N, N, Cd0, K, WAR, 0, 0, WARd0,
359  ROW, &WAC, WACd0, &WACfr );
360  }
361 /*
362 * Perform the local update if I own some data
363 */
364  if( ( Cmp > 0 ) && ( Cnq > 0 ) )
365  {
366  WACld = WACd0[LLD_]; WARld = WARd0[LLD_];
367 
368  if( Mupcase( UPLO[0] ) == CUPPER )
369  {
370  for( l = 0; l < N; l += Clcmb )
371  {
372  lb = N - l; lb = MIN( lb, Clcmb );
373  Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
374  Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
375  Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
376  if( Clp > 0 && Cnq0 > 0 )
377  gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Clp, &Cnq0, &K,
378  ALPHA, WAC, &WACld, Mptr( WAR, 0, Clq, WARld, size ),
379  &WARld, one, Mptr( Cptr, 0, Clq, Cld, size ), &Cld );
380  PB_Cpsyr( TYPE, UPPER, lb, K, ALPHA, Mptr( WAC, Clp, 0, WACld,
381  size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld,
382  Cptr, l, l, Cd0, tzsyrk );
383  }
384  }
385  else
386  {
387  for( l = 0; l < N; l += Clcmb )
388  {
389  lb = N - l; ltmp = l + ( lb = MIN( lb, Clcmb ) );
390  Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
391  Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
392  PB_Cpsyr( TYPE, LOWER, lb, K, ALPHA, Mptr( WAC, Clp, 0, WACld,
393  size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld,
394  Cptr, l, l, Cd0, tzsyrk );
395  Clp = PB_Cnumroc( ltmp, 0, Cimb1, Cmb, myrow, Crow, nprow );
396  Cmp0 = Cmp - Clp;
397  Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
398  if( Cmp0 > 0 && Cnq0 > 0 )
399  gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp0, &Cnq0,
400  &K, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), &WACld,
401  Mptr( WAR, 0, Clq, WARld, size ), &WARld, one,
402  Mptr( Cptr, Clp, Clq, Cld, size ), &Cld );
403  }
404  }
405  }
406 
407  if( WACfr ) free( WAC );
408  if( WARfr ) free( WAR );
409 
410  return;
411  }
412 /*
413 * Otherwise sub( A ) spans more than one process row or columns -> LCM hybrid
414 */
415  Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD );
416  upper = ( Mupcase( UPLO [0] ) == CUPPER );
417 
418  if( notran )
419  {
420  AiD = IA; AinbD = DESCA[IMB_]; AnbD = DESCA[MB_]; Ald = DESCA[LLD_];
421  AmyprocD = myrow; AmyprocR = mycol; AnprocsD = nprow;
422  PB_Cinfog2l( IA, JA, DESCA, AnprocsD, AnprocsR, AmyprocD, AmyprocR,
423  &AiiD, &AiiR, &ArocD, &ArocR );
424  }
425  else
426  {
427  AiD = JA; AinbD = DESCA[INB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_];
428  AmyprocD = mycol; AmyprocR = myrow; AnprocsD = npcol;
429  PB_Cinfog2l( IA, JA, DESCA, AnprocsR, AnprocsD, AmyprocR, AmyprocD,
430  &AiiR, &AiiD, &ArocR, &ArocD );
431  }
432  Ainb1D = PB_Cfirstnb( N, AiD, AinbD, AnbD );
433  Ainb1R = PB_Cfirstnb( K, AiR, AinbR, AnbR );
434  AisR = ( ( AsrcR < 0 ) || ( AnprocsR == 1 ) );
435 /*
436 * When sub( A ) is not replicated and backward pass on sub( A ), find the
437 * virtual process (p,p) owning the last row or column of sub( A ).
438 */
439  if( !( AisR ) && !( Afwd ) )
440  {
441  tmp = PB_Cindxg2p( K - 1, Ainb1R, AnbR, ArocR, ArocR, AnprocsR );
442  p = MModSub( tmp, ArocR, AnprocsR );
443  }
444 /*
445 * Allocate work space in process rows and columns spanned by sub( C )
446 */
447  PB_COutV( TYPE, COLUMN, NOINIT, N, N, Cd0, kb, &WAC, WACd0, &WACfr,
448  &WACsum );
449  PB_COutV( TYPE, ROW, NOINIT, N, N, Cd0, kb, &WAR, WARd0, &WARfr,
450  &WARsum );
451 /*
452 * Loop over the virtual process grid induced by the rows or columns of sub( A )
453 */
454  maxp = ( AisR ? 1 : AnprocsR );
455  AcurrocR = ( AisR ? -1 : MModAdd( ArocR, p, AnprocsR ) );
456  AnpR = PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR, AnprocsR );
457 
458  for( k = 0; k < K; k += kb )
459  {
460  kbb = K - k; kbb = MIN( kbb, kb );
461 
462  while( Wkbb != kbb )
463  {
464 /*
465 * Ensure that the current virtual process (p,p) has something to contribute to
466 * the replicated buffers WAC and WAR.
467 */
468  while( AnpR == 0 )
469  {
470  p = ( Afwd ? MModAdd1( p, maxp ) : MModSub1( p, maxp ) );
471  AcurrocR = ( AisR ? -1 : MModAdd( ArocR, p, AnprocsR ) );
472  AnpR = PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR,
473  AnprocsR );
474  }
475 /*
476 * Current virtual process (p,p) has something, find out how many rows or
477 * columns could be used: Arocs.
478 */
479  if( Wkbb == 0 ) { Arocs = ( AnpR < kbb ? AnpR : kbb ); }
480  else { Arocs = kbb - Wkbb; Arocs = MIN( Arocs, AnpR ); }
481 /*
482 * The current virtual process (p,p) has Arocs rows or columns of sub( A )
483 * to contribute, replicate the data over sub( C ).
484 */
485  if( notran )
486  {
487  if( AisR || ( AmyprocR == AcurrocR ) )
488  { Aptr = Mptr( A, AiiD, AiiR, Ald, size ); AiiR += Arocs; }
489  PB_Cdescset( DBUFA, N, Arocs, Ainb1D, Arocs, AnbD, Arocs,
490  ArocD, AcurrocR, ctxt, Ald );
491 /*
492 * Replicate Arocs columns of sub( A ) in process columns spanned by sub( C )
493 */
494  PB_CInV2( TYPE, NOCONJG, COLUMN, N, N, Cd0, Arocs, Aptr, 0, 0,
495  DBUFA, COLUMN, WAC, Wkbb, WACd0 );
496  }
497  else
498  {
499  if( AisR || ( AmyprocR == AcurrocR ) )
500  { Aptr = Mptr( A, AiiR, AiiD, Ald, size ); AiiR += Arocs; }
501  PB_Cdescset( DBUFA, Arocs, N, Arocs, Ainb1D, Arocs, AnbD,
502  AcurrocR, ArocD, ctxt, Ald );
503 /*
504 * Replicate Arocs rows of sub( A ) in process rows spanned by sub( C )
505 */
506  PB_CInV2( TYPE, NOCONJG, ROW, N, N, Cd0, Arocs, Aptr, 0, 0,
507  DBUFA, ROW, WAR, Wkbb, WARd0 );
508  }
509 /*
510 * Arocs rows or columns of sub( A ) have been replicated over sub( C ),
511 * update the number of diagonals in this virtual process as well as the
512 * number of rows or columns of sub( A ) that are in WAR or WAC.
513 */
514  AnpR -= Arocs;
515  Wkbb += Arocs;
516  }
517 
518  if( notran )
519  {
520 /*
521 * WAR := WAC'
522 */
523  PB_CInV2( TYPE, CONJUG, ROW, N, N, Cd0, kbb, WAC, 0, 0, WACd0,
524  COLUMN, WAR, 0, WARd0 );
525  }
526  else
527  {
528 /*
529 * WAC := WAR'
530 */
531  PB_CInV2( TYPE, CONJUG, COLUMN, N, N, Cd0, kbb, WAR, 0, 0, WARd0,
532  ROW, WAC, 0, WACd0 );
533  }
534 /*
535 * Perform the local update if I own some data
536 */
537  if( ( Cmp > 0 ) && ( Cnq > 0 ) )
538  {
539  WACld = WACd0[LLD_]; WARld = WARd0[LLD_];
540 
541  if( upper )
542  {
543  for( l = 0; l < N; l += Clcmb )
544  {
545  lb = N - l; lb = MIN( lb, Clcmb );
546  Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
547  Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
548  Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
549  if( Clp > 0 && Cnq0 > 0 )
550  gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Clp, &Cnq0,
551  &kbb, ALPHA, WAC, &WACld, Mptr( WAR, 0, Clq, WARld,
552  size ), &WARld, one, Mptr( Cptr, 0, Clq, Cld, size ),
553  &Cld );
554  PB_Cpsyr( TYPE, UPPER, lb, kbb, ALPHA, Mptr( WAC, Clp, 0, WACld,
555  size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld,
556  Cptr, l, l, Cd0, tzsyrk );
557  }
558  }
559  else
560  {
561  for( l = 0; l < N; l += Clcmb )
562  {
563  lb = N - l; ltmp = l + ( lb = MIN( lb, Clcmb ) );
564  Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
565  Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
566  PB_Cpsyr( TYPE, LOWER, lb, kbb, ALPHA, Mptr( WAC, Clp, 0, WACld,
567  size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld,
568  Cptr, l, l, Cd0, tzsyrk );
569  Clp = PB_Cnumroc( ltmp, 0, Cimb1, Cmb, myrow, Crow, nprow );
570  Cmp0 = Cmp - Clp;
571  Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
572  if( Cmp0 > 0 && Cnq0 > 0 )
573  gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp0, &Cnq0,
574  &kbb, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), &WACld,
575  Mptr( WAR, 0, Clq, WARld, size ), &WARld, one,
576  Mptr( Cptr, Clp, Clq, Cld, size ), &Cld );
577  }
578  }
579  }
580 
581  Wkbb = 0;
582  }
583 
584  if( WACfr ) free( WAC );
585  if( WARfr ) free( WAR );
586 /*
587 * End of PB_CpsyrkA
588 */
589 }
NOINIT
#define NOINIT
Definition: PBblas.h:62
TYPE
#define TYPE
Definition: clamov.c:7
ROW
#define ROW
Definition: PBblacs.h:46
MB_
#define MB_
Definition: PBtools.h:43
PB_Ctzsyrk
void PB_Ctzsyrk()
NB_
#define NB_
Definition: PBtools.h:44
COLUMN
#define COLUMN
Definition: PBblacs.h:45
CSRC_
#define CSRC_
Definition: PBtools.h:46
PB_Cpsyr
void PB_Cpsyr()
PB_Ctzherk
void PB_Ctzherk()
NOCONJG
#define NOCONJG
Definition: PBblas.h:45
PB_Cfirstnb
int PB_Cfirstnb()
DLEN_
#define DLEN_
Definition: PBtools.h:48
GEMM_T
F_VOID_FCT(* GEMM_T)()
Definition: pblas.h:313
NOTRAN
#define NOTRAN
Definition: PBblas.h:44
LLD_
#define LLD_
Definition: PBtools.h:47
PB_Cdescribe
void PB_Cdescribe()
CNOCONJG
#define CNOCONJG
Definition: PBblas.h:19
MModAdd
#define MModAdd(I1, I2, d)
Definition: PBtools.h:97
UPPER
#define UPPER
Definition: PBblas.h:52
IMB_
#define IMB_
Definition: PBtools.h:41
MModSub
#define MModSub(I1, I2, d)
Definition: PBtools.h:102
pilaenv_
int pilaenv_()
PB_Cplascal
void PB_Cplascal()
PB_Cdescset
void PB_Cdescset()
MModAdd1
#define MModAdd1(I, d)
Definition: PBtools.h:100
PB_Cindxg2p
int PB_Cindxg2p()
RSRC_
#define RSRC_
Definition: PBtools.h:45
CNOTRAN
#define CNOTRAN
Definition: PBblas.h:18
PB_Cinfog2l
void PB_Cinfog2l()
PB_Cnumroc
int PB_Cnumroc()
CFORWARD
#define CFORWARD
Definition: PBblas.h:38
PB_CInV
void PB_CInV()
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
INB_
#define INB_
Definition: PBtools.h:42
LOWER
#define LOWER
Definition: PBblas.h:51
PB_COutV
void PB_COutV()
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
PB_Cspan
int PB_Cspan()
MModSub1
#define MModSub1(I, d)
Definition: PBtools.h:105
PB_CpsyrkA
void PB_CpsyrkA(PBTYP_T *TYPE, char *DIRECA, char *CONJUG, char *UPLO, char *TRANS, int N, int K, char *ALPHA, char *A, int IA, int JA, int *DESCA, char *BETA, char *C, int IC, int JC, int *DESCC)
Definition: PB_CpsyrkA.c:25
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
CUPPER
#define CUPPER
Definition: PBblas.h:26
PB_CInV2
void PB_CInV2()
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CTXT_
#define CTXT_
Definition: PBtools.h:38
TZSYR_T
void(* TZSYR_T)()
Definition: pblas.h:425
PB_Clcm
int PB_Clcm()