ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzatrmv_.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "pblas.h"
14 #include "PBpblas.h"
15 #include "PBtools.h"
16 #include "PBblacs.h"
17 #include "PBblas.h"
18 
19 #ifdef __STDC__
20 void pzatrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N,
21  double * ALPHA,
22  double * A, int * IA, int * JA, int * DESCA,
23  double * X, int * IX, int * JX, int * DESCX,
24  int * INCX, double * BETA,
25  double * Y, int * IY, int * JY, int * DESCY,
26  int * INCY )
27 #else
28 void pzatrmv_( UPLO, TRANS, DIAG, N, ALPHA, A, IA, JA, DESCA, X, IX,
29  JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY )
30 /*
31 * .. Scalar Arguments ..
32 */
33  F_CHAR_T DIAG, TRANS, UPLO;
34  int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
35  * N;
36  double * ALPHA, * BETA;
37 /*
38 * .. Array Arguments ..
39 */
40  int * DESCA, * DESCX, * DESCY;
41  double * A, * X, * Y;
42 #endif
43 {
44 /*
45 * Purpose
46 * =======
47 *
48 * PZATRMV performs one of the matrix-vector operations
49 *
50 * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) +
51 * abs( beta*sub( Y ) ),
52 * or
53 *
54 * sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) +
55 * abs( beta*sub( Y ) ),
56 * or
57 *
58 * sub( Y ) := abs( alpha )*abs( conjg( sub( A )' ) )*abs( sub( X ) )
59 * + abs( beta*sub( Y ) ),
60 *
61 * where
62 *
63 * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1),
64 *
65 * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X,
66 * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X,
67 * and,
68 *
69 * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y,
70 * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y.
71 *
72 * Alpha and beta are real scalars, sub( Y ) is a real subvector,
73 * sub( X ) is a subvector and sub( A ) is an n by n triangular subma-
74 * trix.
75 *
76 * Notes
77 * =====
78 *
79 * A description vector is associated with each 2D block-cyclicly dis-
80 * tributed matrix. This vector stores the information required to
81 * establish the mapping between a matrix entry and its corresponding
82 * process and memory location.
83 *
84 * In the following comments, the character _ should be read as
85 * "of the distributed matrix". Let A be a generic term for any 2D
86 * block cyclicly distributed matrix. Its description vector is DESC_A:
87 *
88 * NOTATION STORED IN EXPLANATION
89 * ---------------- --------------- ------------------------------------
90 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
91 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
92 * the NPROW x NPCOL BLACS process grid
93 * A is distributed over. The context
94 * itself is global, but the handle
95 * (the integer value) may vary.
96 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
97 * ted matrix A, M_A >= 0.
98 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
99 * buted matrix A, N_A >= 0.
100 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
101 * block of the matrix A, IMB_A > 0.
102 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
103 * left block of the matrix A,
104 * INB_A > 0.
105 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
106 * bute the last M_A-IMB_A rows of A,
107 * MB_A > 0.
108 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
109 * bute the last N_A-INB_A columns of
110 * A, NB_A > 0.
111 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
112 * row of the matrix A is distributed,
113 * NPROW > RSRC_A >= 0.
114 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
115 * first column of A is distributed.
116 * NPCOL > CSRC_A >= 0.
117 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
118 * array storing the local blocks of
119 * the distributed matrix A,
120 * IF( Lc( 1, N_A ) > 0 )
121 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
122 * ELSE
123 * LLD_A >= 1.
124 *
125 * Let K be the number of rows of a matrix A starting at the global in-
126 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
127 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
128 * receive if these K rows were distributed over NPROW processes. If K
129 * is the number of columns of a matrix A starting at the global index
130 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
131 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
132 * these K columns were distributed over NPCOL processes.
133 *
134 * The values of Lr() and Lc() may be determined via a call to the func-
135 * tion PB_Cnumroc:
136 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
137 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
138 *
139 * Arguments
140 * =========
141 *
142 * UPLO (global input) CHARACTER*1
143 * On entry, UPLO specifies whether the submatrix sub( A ) is
144 * an upper or lower triangular submatrix as follows:
145 *
146 * UPLO = 'U' or 'u' sub( A ) is an upper triangular
147 * submatrix,
148 *
149 * UPLO = 'L' or 'l' sub( A ) is a lower triangular
150 * submatrix.
151 *
152 * TRANS (global input) CHARACTER*1
153 * On entry, TRANS specifies the operation to be performed as
154 * follows:
155 *
156 * TRANS = 'N' or 'n'
157 * sub( Y ) := |alpha|*|sub( A )|*|sub( X )| +
158 * |beta*sub( Y )|.
159 *
160 * TRANS = 'T' or 't'
161 * sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| +
162 * |beta*sub( Y )|.
163 *
164 * TRANS = 'C' or 'c'
165 * sub( Y ) := |alpha|*|conjg( sub( A )' )|*|sub( X )| +
166 * |beta*sub( Y )|.
167 *
168 * DIAG (global input) CHARACTER*1
169 * On entry, DIAG specifies whether or not sub( A ) is unit
170 * triangular as follows:
171 *
172 * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian-
173 * gular,
174 *
175 * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri-
176 * angular.
177 *
178 * N (global input) INTEGER
179 * On entry, N specifies the order of the submatrix sub( A ).
180 * N must be at least zero.
181 *
182 * ALPHA (global input) DOUBLE PRECISION
183 * On entry, ALPHA specifies the scalar alpha. When ALPHA is
184 * supplied as zero then the local entries of the arrays A
185 * and X corresponding to the entries of the submatrix sub( A )
186 * and the subvector sub( X ) need not be set on input.
187 *
188 * A (local input) COMPLEX*16 array
189 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
190 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
191 * the local entries of the matrix A.
192 * Before entry with UPLO = 'U' or 'u', this array contains the
193 * local entries corresponding to the entries of the upper tri-
194 * angular submatrix sub( A ), and the local entries correspon-
195 * ding to the entries of the strictly lower triangular part of
196 * the submatrix sub( A ) are not referenced.
197 * Before entry with UPLO = 'L' or 'l', this array contains the
198 * local entries corresponding to the entries of the lower tri-
199 * angular submatrix sub( A ), and the local entries correspon-
200 * ding to the entries of the strictly upper triangular part of
201 * the submatrix sub( A ) are not referenced.
202 * Note that when DIAG = 'U' or 'u', the local entries corres-
203 * ponding to the diagonal elements of the submatrix sub( A )
204 * are not referenced either, but are assumed to be unity.
205 *
206 * IA (global input) INTEGER
207 * On entry, IA specifies A's global row index, which points to
208 * the beginning of the submatrix sub( A ).
209 *
210 * JA (global input) INTEGER
211 * On entry, JA specifies A's global column index, which points
212 * to the beginning of the submatrix sub( A ).
213 *
214 * DESCA (global and local input) INTEGER array
215 * On entry, DESCA is an integer array of dimension DLEN_. This
216 * is the array descriptor for the matrix A.
217 *
218 * X (local input) COMPLEX*16 array
219 * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
220 * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
221 * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least
222 * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
223 * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en-
224 * try, this array contains the local entries of the matrix X.
225 *
226 * IX (global input) INTEGER
227 * On entry, IX specifies X's global row index, which points to
228 * the beginning of the submatrix sub( X ).
229 *
230 * JX (global input) INTEGER
231 * On entry, JX specifies X's global column index, which points
232 * to the beginning of the submatrix sub( X ).
233 *
234 * DESCX (global and local input) INTEGER array
235 * On entry, DESCX is an integer array of dimension DLEN_. This
236 * is the array descriptor for the matrix X.
237 *
238 * INCX (global input) INTEGER
239 * On entry, INCX specifies the global increment for the
240 * elements of X. Only two values of INCX are supported in
241 * this version, namely 1 and M_X. INCX must not be zero.
242 *
243 * BETA (global input) DOUBLE PRECISION
244 * On entry, BETA specifies the scalar beta. When BETA is
245 * supplied as zero then the local entries of the array Y
246 * corresponding to the entries of the subvector sub( Y ) need
247 * not be set on input.
248 *
249 * Y (local input/local output) DOUBLE PRECISION array
250 * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
251 * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
252 * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least
253 * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
254 * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en-
255 * try, this array contains the local entries of the matrix Y.
256 * On exit, sub( Y ) is overwritten by the updated subvector.
257 *
258 * IY (global input) INTEGER
259 * On entry, IY specifies Y's global row index, which points to
260 * the beginning of the submatrix sub( Y ).
261 *
262 * JY (global input) INTEGER
263 * On entry, JY specifies Y's global column index, which points
264 * to the beginning of the submatrix sub( Y ).
265 *
266 * DESCY (global and local input) INTEGER array
267 * On entry, DESCY is an integer array of dimension DLEN_. This
268 * is the array descriptor for the matrix Y.
269 *
270 * INCY (global input) INTEGER
271 * On entry, INCY specifies the global increment for the
272 * elements of Y. Only two values of INCY are supported in
273 * this version, namely 1 and M_Y. INCY must not be zero.
274 *
275 * -- Written on April 1, 1998 by
276 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
277 *
278 * ---------------------------------------------------------------------
279 */
280 /*
281 * .. Local Scalars ..
282 */
283  char DiagA, TranOp, UploA, Yroc, * one, top;
284  int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb,
285  Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xi, Xj, YAfr,
286  YAld, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq,
287  Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb,
288  notran, nounit, npcol, nprow, size, upper, usiz;
289  double * tbeta;
290  PBTYP_T * type, * utyp;
291 /*
292 * .. Local Arrays ..
293 */
294  int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_],
295  Yd [DLEN_];
296  char * Aptr = NULL, * XA = NULL, * YA = NULL;
297 /* ..
298 * .. Executable Statements ..
299 *
300 */
301  upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
302  notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
303  nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT );
304  PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
305  PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
306  PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
307 #ifndef NO_ARGCHK
308 /*
309 * Test the input parameters
310 */
311  Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
312  if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) )
313  {
314  if( ( !upper ) && ( UploA != CLOWER ) )
315  {
316  PB_Cwarn( ctxt, __LINE__, "PZATRMV", "Illegal UPLO = %c\n", UploA );
317  info = -1;
318  }
319  else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) )
320  {
321  PB_Cwarn( ctxt, __LINE__, "PZATRMV", "Illegal TRANS = %c\n", TranOp );
322  info = -2;
323  }
324  else if( ( !nounit ) && ( DiagA != CUNIT ) )
325  {
326  PB_Cwarn( ctxt, __LINE__, "PZATRMV", "Illegal DIAG = %c\n", DiagA );
327  info = -3;
328  }
329  PB_Cchkmat( ctxt, "PZATRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info );
330  PB_Cchkvec( ctxt, "PZATRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 13, &info );
331  PB_Cchkvec( ctxt, "PZATRMV", "Y", *N, 4, Yi, Yj, Yd, *INCY, 19, &info );
332  }
333  if( info ) { PB_Cabort( ctxt, "PZATRMV", info ); return; }
334 #endif
335 /*
336 * Quick return if possible
337 */
338  if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) &&
339  ( BETA [REAL_PART] == ONE ) ) ) return;
340 /*
341 * Retrieve process grid information
342 */
343 #ifdef NO_ARGCHK
344  Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
345 #endif
346 /*
347 * Get type structure
348 */
349  type = PB_Cztypeset(); size = type->size;
350  utyp = PB_Cdtypeset(); usiz = type->usiz;
351 /*
352 * and when alpha is zero
353 */
354  if( ALPHA[REAL_PART] == ZERO )
355  {
356 /*
357 * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
358 */
359  PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj,
360  &Yrow, &Ycol );
361 
362  if( *INCY == Yd[M_] )
363  {
364 /*
365 * sub( Y ) resides in (a) process row(s)
366 */
367  if( ( myrow == Yrow ) || ( Yrow < 0 ) )
368  {
369 /*
370 * Make sure I own some data and scale sub( Y )
371 */
372  Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
373  npcol );
374  if( Ynq > 0 )
375  {
376  Yld = Yd[LLD_];
377  dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
378  Yjj, Yld, usiz ), &Yld );
379  }
380  }
381  }
382  else
383  {
384 /*
385 * sub( Y ) resides in (a) process column(s)
386 */
387  if( ( mycol == Ycol ) || ( Ycol < 0 ) )
388  {
389 /*
390 * Make sure I own some data and scale sub( Y )
391 */
392  Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
393  nprow );
394  if( Ynp > 0 )
395  {
396  dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
397  Yjj, Yd[LLD_], usiz ), INCY );
398  }
399  }
400  }
401  return;
402  }
403 /*
404 * Compute descriptor Ad0 for sub( A )
405 */
406  PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
407  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
408 
409  Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN );
410 
411  if( notran )
412  {
413 /*
414 * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A )
415 */
416  PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y),
417  Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr,
418  &YAsum, &YApbY );
419 /*
420 * Replicate sub( X ) in process rows spanned by sub( A ) -> XA
421 */
422  PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
423  ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
424  }
425  else
426  {
427 /*
428 * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A )
429 */
430  PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y),
431  Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr,
432  &YAsum, &YApbY );
433 /*
434 * Replicate sub( X ) in process columns spanned by sub( A ) -> XA
435 */
436  PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
437  ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
438  }
439 
440  one = type->one;
441 /*
442 * Local matrix-vector multiply iff I own some data
443 */
444  Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_];
445  Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_];
446  Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
447  Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
448 
449  if( ( Amp > 0 ) && ( Anq > 0 ) )
450  {
451  Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
452 
453  XAld = XAd[LLD_]; YAld = YAd[LLD_];
454 /*
455 * Scale YA in the case sub( Y ) has been reused
456 */
457  if( notran && !( YApbY ) )
458  {
459 /*
460 * YA resides in (a) process column(s)
461 */
462  if( ( mycol == YAd[CSRC_] ) || ( YAd[CSRC_] < 0 ) )
463  {
464 /*
465 * Make sure I own some data and scale YA
466 */
467  if( Amp > 0 )
468  dascal_( &Amp, ((char *) tbeta), YA, &ione );
469  }
470  }
471  else if( !( notran ) && !( YApbY ) )
472  {
473 /*
474 * YA resides in (a) process row(s)
475 */
476  if( ( myrow == YAd[RSRC_] ) || ( YAd[RSRC_] < 0 ) )
477  {
478 /*
479 * Make sure I own some data and scale YA
480 */
481  if( Anq > 0 )
482  dascal_( &Anq, ((char *) tbeta), YA, &YAld );
483  }
484  }
485 /*
486 * Computational partitioning size is computed as the product of the logical
487 * value returned by pilaenv_ and 2 * lcm( nprow, npcol )
488 */
489  nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) *
490  PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
491 
492  if( upper )
493  {
494  if( notran )
495  {
496  for( k = 0; k < *N; k += nb )
497  {
498  kb = *N - k; kb = MIN( kb, nb );
499  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
500  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
501  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
502  if( Akp > 0 && Anq0 > 0 )
503  {
504  zagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA),
505  Mptr( Aptr, 0, Akq, Ald, size ), &Ald,
506  Mptr( XA, 0, Akq, XAld, size ), &XAld, one,
507  YA, &ione );
508  }
509  PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1,
510  ((char *) ALPHA), Aptr, k, k, Ad0,
511  Mptr( XA, 0, Akq, XAld, size ), XAld,
512  Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv );
513  }
514  }
515  else
516  {
517  for( k = 0; k < *N; k += nb )
518  {
519  kb = *N - k; kb = MIN( kb, nb );
520  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
521  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
522  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
523  if( Akp > 0 && Anq0 > 0 )
524  {
525  zagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA),
526  Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione,
527  one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld );
528  }
529  PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1,
530  ((char *) ALPHA), Aptr, k, k, Ad0,
531  Mptr( XA, Akp, 0, XAld, size ), XAld,
532  Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv );
533  }
534  }
535  }
536  else
537  {
538  if( notran )
539  {
540  for( k = 0; k < *N; k += nb )
541  {
542  kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
543  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
544  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
545  PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1,
546  ((char *) ALPHA), Aptr, k, k, Ad0,
547  Mptr( XA, 0, Akq, XAld, size ), XAld,
548  Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv );
549  Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
550  Amp0 = Amp - Akp;
551  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
552  if( Amp0 > 0 && Anq0 > 0 )
553  {
554  zagemv_( TRANS, &Amp0, &Anq0, ((char *) ALPHA),
555  Mptr( Aptr, Akp, Akq, Ald, size ), &Ald,
556  Mptr( XA, 0, Akq, XAld, size ), &XAld, one,
557  Mptr( YA, Akp, 0, YAld, usiz ), &ione );
558  }
559  }
560  }
561  else
562  {
563  for( k = 0; k < *N; k += nb )
564  {
565  kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
566  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
567  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
568  PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1,
569  ((char *) ALPHA), Aptr, k, k, Ad0,
570  Mptr( XA, Akp, 0, XAld, size ), XAld,
571  Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv );
572  Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
573  Amp0 = Amp - Akp;
574  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
575  if( Amp0 > 0 && Anq0 > 0 )
576  {
577  zagemv_( TRANS, &Amp0, &Anq0, one,
578  Mptr( Aptr, Akp, Akq, Ald, size ), &Ald,
579  Mptr( XA, Akp, 0, XAld, size ), &ione, one,
580  Mptr( YA, 0, Akq, YAld, usiz ), &YAld );
581  }
582  }
583  }
584  }
585  }
586  if( XAfr ) free( XA );
587 
588  if( notran )
589  {
590 /*
591 * Combine the partial column results into YA
592 */
593  if( YAsum && ( Amp > 0 ) )
594  {
595  top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
596  Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow,
597  YAd[CSRC_] );
598  }
599  }
600  else
601  {
602 /*
603 * Combine the partial row results into YA
604 */
605  if( YAsum && ( Anq > 0 ) )
606  {
607  top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
608  Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_],
609  mycol );
610  }
611  }
612 /*
613 * sub( Y ) := beta * sub( Y ) + YA (if necessary)
614 */
615  if( YApbY )
616  {
617 /*
618 * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
619 */
620  PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow,
621  &Ycol );
622 
623  if( *INCY == Yd[M_] )
624  {
625 /*
626 * sub( Y ) resides in (a) process row(s)
627 */
628  if( ( myrow == Yrow ) || ( Yrow < 0 ) )
629  {
630 /*
631 * Make sure I own some data and scale sub( Y )
632 */
633  Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
634  npcol );
635  if( Ynq > 0 )
636  {
637  Yld = Yd[LLD_];
638  dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
639  Yjj, Yld, usiz ), &Yld );
640  }
641  }
642  }
643  else
644  {
645 /*
646 * sub( Y ) resides in (a) process column(s)
647 */
648  if( ( mycol == Ycol ) || ( Ycol < 0 ) )
649  {
650 /*
651 * Make sure I own some data and scale sub( Y )
652 */
653  Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
654  nprow );
655  if( Ynp > 0 )
656  {
657  dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
658  Yjj, Yd[LLD_], usiz ), INCY );
659  }
660  }
661  }
662 
663  if( notran )
664  {
665  PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, one,
666  ((char *) Y), Yi, Yj, Yd, &Yroc );
667  }
668  else
669  {
670  PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one,
671  ((char *) Y), Yi, Yj, Yd, &Yroc );
672  }
673  }
674  if( YAfr ) free( YA );
675 /*
676 * End of PZATRMV
677 */
678 }
M_
#define M_
Definition: PBtools.h:39
ROW
#define ROW
Definition: PBblacs.h:46
MB_
#define MB_
Definition: PBtools.h:43
PB_Cpaxpby
void PB_Cpaxpby()
PB_Cwarn
void PB_Cwarn()
NB_
#define NB_
Definition: PBtools.h:44
COLUMN
#define COLUMN
Definition: PBblacs.h:45
CSRC_
#define CSRC_
Definition: PBtools.h:46
PBblacs.h
PBtools.h
PBblas.h
CCOTRAN
#define CCOTRAN
Definition: PBblas.h:22
NOCONJG
#define NOCONJG
Definition: PBblas.h:45
REAL_PART
#define REAL_PART
Definition: pblas.h:135
PBTYP_T::type
char type
Definition: pblas.h:327
PBpblas.h
PB_Cztypeset
PBTYP_T * PB_Cztypeset()
Definition: PB_Cztypeset.c:19
DLEN_
#define DLEN_
Definition: PBtools.h:48
CUNIT
#define CUNIT
Definition: PBblas.h:32
PBTYP_T::usiz
int usiz
Definition: pblas.h:328
LLD_
#define LLD_
Definition: PBtools.h:47
PB_Cdescribe
void PB_Cdescribe()
PB_Cdtypeset
PBTYP_T * PB_Cdtypeset()
Definition: PB_Cdtypeset.c:19
F_CHAR_T
char * F_CHAR_T
Definition: pblas.h:118
CROW
#define CROW
Definition: PBblacs.h:21
ZERO
#define ZERO
Definition: PBtools.h:66
PB_Cchkvec
void PB_Cchkvec()
UPPER
#define UPPER
Definition: PBblas.h:52
dascal_
F_VOID_FCT dascal_()
IMB_
#define IMB_
Definition: PBtools.h:41
pilaenv_
int pilaenv_()
PB_Cabort
void PB_Cabort()
CLOWER
#define CLOWER
Definition: PBblas.h:25
LEFT
#define LEFT
Definition: PBblas.h:55
F2C_CHAR
#define F2C_CHAR(a)
Definition: pblas.h:120
PB_Ctzatrmv
void PB_Ctzatrmv()
TOP_GET
#define TOP_GET
Definition: PBblacs.h:50
PB_Ctop
char * PB_Ctop()
ONE
#define ONE
Definition: PBtools.h:64
RSRC_
#define RSRC_
Definition: PBtools.h:45
CNOTRAN
#define CNOTRAN
Definition: PBblas.h:18
PBTYP_T::one
char * one
Definition: pblas.h:331
PB_CargFtoC
void PB_CargFtoC()
zagemv_
F_VOID_FCT zagemv_()
COMBINE
#define COMBINE
Definition: PBblacs.h:49
PBTYP_T::size
int size
Definition: pblas.h:329
PB_Cinfog2l
void PB_Cinfog2l()
PB_Cchkmat
void PB_Cchkmat()
PB_Cnumroc
int PB_Cnumroc()
Cdgsum2d
void Cdgsum2d()
PB_CInV
void PB_CInV()
PB_CInOutV
void PB_CInOutV()
PB_Cptrm
void PB_Cptrm()
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
CCOLUMN
#define CCOLUMN
Definition: PBblacs.h:20
INB_
#define INB_
Definition: PBtools.h:42
LOWER
#define LOWER
Definition: PBblas.h:51
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
pzatrmv_
void pzatrmv_(F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int *N, double *ALPHA, double *A, int *IA, int *JA, int *DESCA, double *X, int *IX, int *JX, int *DESCX, int *INCX, double *BETA, double *Y, int *IY, int *JY, int *DESCY, int *INCY)
Definition: pzatrmv_.c:28
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
pblas.h
CUPPER
#define CUPPER
Definition: PBblas.h:26
CTRAN
#define CTRAN
Definition: PBblas.h:20
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CTXT_
#define CTXT_
Definition: PBtools.h:38
CNOUNIT
#define CNOUNIT
Definition: PBblas.h:33
PB_Clcm
int PB_Clcm()