SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzatrmv_.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "pblas.h"
14#include "PBpblas.h"
15#include "PBtools.h"
16#include "PBblacs.h"
17#include "PBblas.h"
18
19#ifdef __STDC__
20void pzatrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, Int * N,
21 double * ALPHA,
22 double * A, Int * IA, Int * JA, Int * DESCA,
23 double * X, Int * IX, Int * JX, Int * DESCX,
24 Int * INCX, double * BETA,
25 double * Y, Int * IY, Int * JY, Int * DESCY,
26 Int * INCY )
27#else
28void pzatrmv_( UPLO, TRANS, DIAG, N, ALPHA, A, IA, JA, DESCA, X, IX,
29 JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY )
30/*
31* .. Scalar Arguments ..
32*/
33 F_CHAR_T DIAG, TRANS, UPLO;
34 Int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
35 * N;
36 double * ALPHA, * BETA;
37/*
38* .. Array Arguments ..
39*/
40 Int * DESCA, * DESCX, * DESCY;
41 double * A, * X, * Y;
42#endif
43{
44/*
45* Purpose
46* =======
47*
48* PZATRMV performs one of the matrix-vector operations
49*
50* sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) +
51* abs( beta*sub( Y ) ),
52* or
53*
54* sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) +
55* abs( beta*sub( Y ) ),
56* or
57*
58* sub( Y ) := abs( alpha )*abs( conjg( sub( A )' ) )*abs( sub( X ) )
59* + abs( beta*sub( Y ) ),
60*
61* where
62*
63* sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1),
64*
65* sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X,
66* X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X,
67* and,
68*
69* sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y,
70* Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y.
71*
72* Alpha and beta are real scalars, sub( Y ) is a real subvector,
73* sub( X ) is a subvector and sub( A ) is an n by n triangular subma-
74* trix.
75*
76* Notes
77* =====
78*
79* A description vector is associated with each 2D block-cyclicly dis-
80* tributed matrix. This vector stores the information required to
81* establish the mapping between a matrix entry and its corresponding
82* process and memory location.
83*
84* In the following comments, the character _ should be read as
85* "of the distributed matrix". Let A be a generic term for any 2D
86* block cyclicly distributed matrix. Its description vector is DESC_A:
87*
88* NOTATION STORED IN EXPLANATION
89* ---------------- --------------- ------------------------------------
90* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
91* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
92* the NPROW x NPCOL BLACS process grid
93* A is distributed over. The context
94* itself is global, but the handle
95* (the integer value) may vary.
96* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
97* ted matrix A, M_A >= 0.
98* N_A (global) DESCA[ N_ ] The number of columns in the distri-
99* buted matrix A, N_A >= 0.
100* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
101* block of the matrix A, IMB_A > 0.
102* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
103* left block of the matrix A,
104* INB_A > 0.
105* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
106* bute the last M_A-IMB_A rows of A,
107* MB_A > 0.
108* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
109* bute the last N_A-INB_A columns of
110* A, NB_A > 0.
111* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
112* row of the matrix A is distributed,
113* NPROW > RSRC_A >= 0.
114* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
115* first column of A is distributed.
116* NPCOL > CSRC_A >= 0.
117* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
118* array storing the local blocks of
119* the distributed matrix A,
120* IF( Lc( 1, N_A ) > 0 )
121* LLD_A >= MAX( 1, Lr( 1, M_A ) )
122* ELSE
123* LLD_A >= 1.
124*
125* Let K be the number of rows of a matrix A starting at the global in-
126* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
127* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
128* receive if these K rows were distributed over NPROW processes. If K
129* is the number of columns of a matrix A starting at the global index
130* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
131* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
132* these K columns were distributed over NPCOL processes.
133*
134* The values of Lr() and Lc() may be determined via a call to the func-
135* tion PB_Cnumroc:
136* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
137* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
138*
139* Arguments
140* =========
141*
142* UPLO (global input) CHARACTER*1
143* On entry, UPLO specifies whether the submatrix sub( A ) is
144* an upper or lower triangular submatrix as follows:
145*
146* UPLO = 'U' or 'u' sub( A ) is an upper triangular
147* submatrix,
148*
149* UPLO = 'L' or 'l' sub( A ) is a lower triangular
150* submatrix.
151*
152* TRANS (global input) CHARACTER*1
153* On entry, TRANS specifies the operation to be performed as
154* follows:
155*
156* TRANS = 'N' or 'n'
157* sub( Y ) := |alpha|*|sub( A )|*|sub( X )| +
158* |beta*sub( Y )|.
159*
160* TRANS = 'T' or 't'
161* sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| +
162* |beta*sub( Y )|.
163*
164* TRANS = 'C' or 'c'
165* sub( Y ) := |alpha|*|conjg( sub( A )' )|*|sub( X )| +
166* |beta*sub( Y )|.
167*
168* DIAG (global input) CHARACTER*1
169* On entry, DIAG specifies whether or not sub( A ) is unit
170* triangular as follows:
171*
172* DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian-
173* gular,
174*
175* DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri-
176* angular.
177*
178* N (global input) INTEGER
179* On entry, N specifies the order of the submatrix sub( A ).
180* N must be at least zero.
181*
182* ALPHA (global input) DOUBLE PRECISION
183* On entry, ALPHA specifies the scalar alpha. When ALPHA is
184* supplied as zero then the local entries of the arrays A
185* and X corresponding to the entries of the submatrix sub( A )
186* and the subvector sub( X ) need not be set on input.
187*
188* A (local input) COMPLEX*16 array
189* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
190* at least Lc( 1, JA+N-1 ). Before entry, this array contains
191* the local entries of the matrix A.
192* Before entry with UPLO = 'U' or 'u', this array contains the
193* local entries corresponding to the entries of the upper tri-
194* angular submatrix sub( A ), and the local entries correspon-
195* ding to the entries of the strictly lower triangular part of
196* the submatrix sub( A ) are not referenced.
197* Before entry with UPLO = 'L' or 'l', this array contains the
198* local entries corresponding to the entries of the lower tri-
199* angular submatrix sub( A ), and the local entries correspon-
200* ding to the entries of the strictly upper triangular part of
201* the submatrix sub( A ) are not referenced.
202* Note that when DIAG = 'U' or 'u', the local entries corres-
203* ponding to the diagonal elements of the submatrix sub( A )
204* are not referenced either, but are assumed to be unity.
205*
206* IA (global input) INTEGER
207* On entry, IA specifies A's global row index, which points to
208* the beginning of the submatrix sub( A ).
209*
210* JA (global input) INTEGER
211* On entry, JA specifies A's global column index, which points
212* to the beginning of the submatrix sub( A ).
213*
214* DESCA (global and local input) INTEGER array
215* On entry, DESCA is an integer array of dimension DLEN_. This
216* is the array descriptor for the matrix A.
217*
218* X (local input) COMPLEX*16 array
219* On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
220* is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
221* MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least
222* Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
223* Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en-
224* try, this array contains the local entries of the matrix X.
225*
226* IX (global input) INTEGER
227* On entry, IX specifies X's global row index, which points to
228* the beginning of the submatrix sub( X ).
229*
230* JX (global input) INTEGER
231* On entry, JX specifies X's global column index, which points
232* to the beginning of the submatrix sub( X ).
233*
234* DESCX (global and local input) INTEGER array
235* On entry, DESCX is an integer array of dimension DLEN_. This
236* is the array descriptor for the matrix X.
237*
238* INCX (global input) INTEGER
239* On entry, INCX specifies the global increment for the
240* elements of X. Only two values of INCX are supported in
241* this version, namely 1 and M_X. INCX must not be zero.
242*
243* BETA (global input) DOUBLE PRECISION
244* On entry, BETA specifies the scalar beta. When BETA is
245* supplied as zero then the local entries of the array Y
246* corresponding to the entries of the subvector sub( Y ) need
247* not be set on input.
248*
249* Y (local input/local output) DOUBLE PRECISION array
250* On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
251* is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
252* MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least
253* Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
254* Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en-
255* try, this array contains the local entries of the matrix Y.
256* On exit, sub( Y ) is overwritten by the updated subvector.
257*
258* IY (global input) INTEGER
259* On entry, IY specifies Y's global row index, which points to
260* the beginning of the submatrix sub( Y ).
261*
262* JY (global input) INTEGER
263* On entry, JY specifies Y's global column index, which points
264* to the beginning of the submatrix sub( Y ).
265*
266* DESCY (global and local input) INTEGER array
267* On entry, DESCY is an integer array of dimension DLEN_. This
268* is the array descriptor for the matrix Y.
269*
270* INCY (global input) INTEGER
271* On entry, INCY specifies the global increment for the
272* elements of Y. Only two values of INCY are supported in
273* this version, namely 1 and M_Y. INCY must not be zero.
274*
275* -- Written on April 1, 1998 by
276* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
277*
278* ---------------------------------------------------------------------
279*/
280/*
281* .. Local Scalars ..
282*/
283 char DiagA, TranOp, UploA, Yroc, * one, top;
284 Int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb,
285 Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xi, Xj, YAfr,
286 YAld, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq,
287 Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb,
288 notran, nounit, npcol, nprow, size, upper, usiz;
289 double * tbeta;
290 PBTYP_T * type, * utyp;
291/*
292* .. Local Arrays ..
293*/
294 Int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_],
295 Yd [DLEN_];
296 char * Aptr = NULL, * XA = NULL, * YA = NULL;
297/* ..
298* .. Executable Statements ..
299*
300*/
301 upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
302 notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
303 nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT );
304 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
305 PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
306 PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
307#ifndef NO_ARGCHK
308/*
309* Test the input parameters
310*/
311 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
312 if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) )
313 {
314 if( ( !upper ) && ( UploA != CLOWER ) )
315 {
316 PB_Cwarn( ctxt, __LINE__, "PZATRMV", "Illegal UPLO = %c\n", UploA );
317 info = -1;
318 }
319 else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) )
320 {
321 PB_Cwarn( ctxt, __LINE__, "PZATRMV", "Illegal TRANS = %c\n", TranOp );
322 info = -2;
323 }
324 else if( ( !nounit ) && ( DiagA != CUNIT ) )
325 {
326 PB_Cwarn( ctxt, __LINE__, "PZATRMV", "Illegal DIAG = %c\n", DiagA );
327 info = -3;
328 }
329 PB_Cchkmat( ctxt, "PZATRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info );
330 PB_Cchkvec( ctxt, "PZATRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 13, &info );
331 PB_Cchkvec( ctxt, "PZATRMV", "Y", *N, 4, Yi, Yj, Yd, *INCY, 19, &info );
332 }
333 if( info ) { PB_Cabort( ctxt, "PZATRMV", info ); return; }
334#endif
335/*
336* Quick return if possible
337*/
338 if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) &&
339 ( BETA [REAL_PART] == ONE ) ) ) return;
340/*
341* Retrieve process grid information
342*/
343#ifdef NO_ARGCHK
344 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
345#endif
346/*
347* Get type structure
348*/
349 type = PB_Cztypeset(); size = type->size;
350 utyp = PB_Cdtypeset(); usiz = type->usiz;
351/*
352* and when alpha is zero
353*/
354 if( ALPHA[REAL_PART] == ZERO )
355 {
356/*
357* Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
358*/
359 PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj,
360 &Yrow, &Ycol );
361
362 if( *INCY == Yd[M_] )
363 {
364/*
365* sub( Y ) resides in (a) process row(s)
366*/
367 if( ( myrow == Yrow ) || ( Yrow < 0 ) )
368 {
369/*
370* Make sure I own some data and scale sub( Y )
371*/
372 Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
373 npcol );
374 if( Ynq > 0 )
375 {
376 Yld = Yd[LLD_];
377 dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
378 Yjj, Yld, usiz ), &Yld );
379 }
380 }
381 }
382 else
383 {
384/*
385* sub( Y ) resides in (a) process column(s)
386*/
387 if( ( mycol == Ycol ) || ( Ycol < 0 ) )
388 {
389/*
390* Make sure I own some data and scale sub( Y )
391*/
392 Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
393 nprow );
394 if( Ynp > 0 )
395 {
396 dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
397 Yjj, Yd[LLD_], usiz ), INCY );
398 }
399 }
400 }
401 return;
402 }
403/*
404* Compute descriptor Ad0 for sub( A )
405*/
406 PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
407 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
408
409 Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN );
410
411 if( notran )
412 {
413/*
414* Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A )
415*/
416 PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y),
417 Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr,
418 &YAsum, &YApbY );
419/*
420* Replicate sub( X ) in process rows spanned by sub( A ) -> XA
421*/
422 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
423 ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
424 }
425 else
426 {
427/*
428* Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A )
429*/
430 PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y),
431 Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr,
432 &YAsum, &YApbY );
433/*
434* Replicate sub( X ) in process columns spanned by sub( A ) -> XA
435*/
436 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
437 ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
438 }
439
440 one = type->one;
441/*
442* Local matrix-vector multiply iff I own some data
443*/
444 Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_];
445 Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_];
446 Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
447 Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
448
449 if( ( Amp > 0 ) && ( Anq > 0 ) )
450 {
451 Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
452
453 XAld = XAd[LLD_]; YAld = YAd[LLD_];
454/*
455* Scale YA in the case sub( Y ) has been reused
456*/
457 if( notran && !( YApbY ) )
458 {
459/*
460* YA resides in (a) process column(s)
461*/
462 if( ( mycol == YAd[CSRC_] ) || ( YAd[CSRC_] < 0 ) )
463 {
464/*
465* Make sure I own some data and scale YA
466*/
467 if( Amp > 0 )
468 dascal_( &Amp, ((char *) tbeta), YA, &ione );
469 }
470 }
471 else if( !( notran ) && !( YApbY ) )
472 {
473/*
474* YA resides in (a) process row(s)
475*/
476 if( ( myrow == YAd[RSRC_] ) || ( YAd[RSRC_] < 0 ) )
477 {
478/*
479* Make sure I own some data and scale YA
480*/
481 if( Anq > 0 )
482 dascal_( &Anq, ((char *) tbeta), YA, &YAld );
483 }
484 }
485/*
486* Computational partitioning size is computed as the product of the logical
487* value returned by pilaenv_ and 2 * lcm( nprow, npcol )
488*/
489 nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) *
490 PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
491
492 if( upper )
493 {
494 if( notran )
495 {
496 for( k = 0; k < *N; k += nb )
497 {
498 kb = *N - k; kb = MIN( kb, nb );
499 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
500 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
501 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
502 if( Akp > 0 && Anq0 > 0 )
503 {
504 zagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA),
505 Mptr( Aptr, 0, Akq, Ald, size ), &Ald,
506 Mptr( XA, 0, Akq, XAld, size ), &XAld, one,
507 YA, &ione );
508 }
509 PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1,
510 ((char *) ALPHA), Aptr, k, k, Ad0,
511 Mptr( XA, 0, Akq, XAld, size ), XAld,
512 Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv );
513 }
514 }
515 else
516 {
517 for( k = 0; k < *N; k += nb )
518 {
519 kb = *N - k; kb = MIN( kb, nb );
520 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
521 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
522 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
523 if( Akp > 0 && Anq0 > 0 )
524 {
525 zagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA),
526 Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione,
527 one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld );
528 }
529 PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1,
530 ((char *) ALPHA), Aptr, k, k, Ad0,
531 Mptr( XA, Akp, 0, XAld, size ), XAld,
532 Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv );
533 }
534 }
535 }
536 else
537 {
538 if( notran )
539 {
540 for( k = 0; k < *N; k += nb )
541 {
542 kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
543 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
544 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
545 PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1,
546 ((char *) ALPHA), Aptr, k, k, Ad0,
547 Mptr( XA, 0, Akq, XAld, size ), XAld,
548 Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv );
549 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
550 Amp0 = Amp - Akp;
551 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
552 if( Amp0 > 0 && Anq0 > 0 )
553 {
554 zagemv_( TRANS, &Amp0, &Anq0, ((char *) ALPHA),
555 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald,
556 Mptr( XA, 0, Akq, XAld, size ), &XAld, one,
557 Mptr( YA, Akp, 0, YAld, usiz ), &ione );
558 }
559 }
560 }
561 else
562 {
563 for( k = 0; k < *N; k += nb )
564 {
565 kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
566 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
567 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
568 PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1,
569 ((char *) ALPHA), Aptr, k, k, Ad0,
570 Mptr( XA, Akp, 0, XAld, size ), XAld,
571 Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv );
572 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
573 Amp0 = Amp - Akp;
574 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
575 if( Amp0 > 0 && Anq0 > 0 )
576 {
577 zagemv_( TRANS, &Amp0, &Anq0, one,
578 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald,
579 Mptr( XA, Akp, 0, XAld, size ), &ione, one,
580 Mptr( YA, 0, Akq, YAld, usiz ), &YAld );
581 }
582 }
583 }
584 }
585 }
586 if( XAfr ) free( XA );
587
588 if( notran )
589 {
590/*
591* Combine the partial column results into YA
592*/
593 if( YAsum && ( Amp > 0 ) )
594 {
595 top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
596 Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow,
597 YAd[CSRC_] );
598 }
599 }
600 else
601 {
602/*
603* Combine the partial row results into YA
604*/
605 if( YAsum && ( Anq > 0 ) )
606 {
607 top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
608 Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_],
609 mycol );
610 }
611 }
612/*
613* sub( Y ) := beta * sub( Y ) + YA (if necessary)
614*/
615 if( YApbY )
616 {
617/*
618* Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
619*/
620 PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow,
621 &Ycol );
622
623 if( *INCY == Yd[M_] )
624 {
625/*
626* sub( Y ) resides in (a) process row(s)
627*/
628 if( ( myrow == Yrow ) || ( Yrow < 0 ) )
629 {
630/*
631* Make sure I own some data and scale sub( Y )
632*/
633 Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
634 npcol );
635 if( Ynq > 0 )
636 {
637 Yld = Yd[LLD_];
638 dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
639 Yjj, Yld, usiz ), &Yld );
640 }
641 }
642 }
643 else
644 {
645/*
646* sub( Y ) resides in (a) process column(s)
647*/
648 if( ( mycol == Ycol ) || ( Ycol < 0 ) )
649 {
650/*
651* Make sure I own some data and scale sub( Y )
652*/
653 Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
654 nprow );
655 if( Ynp > 0 )
656 {
657 dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
658 Yjj, Yd[LLD_], usiz ), INCY );
659 }
660 }
661 }
662
663 if( notran )
664 {
665 PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, one,
666 ((char *) Y), Yi, Yj, Yd, &Yroc );
667 }
668 else
669 {
670 PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one,
671 ((char *) Y), Yi, Yj, Yd, &Yroc );
672 }
673 }
674 if( YAfr ) free( YA );
675/*
676* End of PZATRMV
677*/
678}
#define Int
Definition Bconfig.h:22
#define REAL_PART
Definition pblas.h:139
#define F2C_CHAR(a)
Definition pblas.h:124
#define C2F_CHAR(a)
Definition pblas.h:125
char * F_CHAR_T
Definition pblas.h:122
#define CCOLUMN
Definition PBblacs.h:20
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define COMBINE
Definition PBblacs.h:49
#define CROW
Definition PBblacs.h:21
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
void Cdgsum2d()
#define LEFT
Definition PBblas.h:55
#define CNOUNIT
Definition PBblas.h:33
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define CNOTRAN
Definition PBblas.h:18
#define CTRAN
Definition PBblas.h:20
#define LOWER
Definition PBblas.h:51
#define CCOTRAN
Definition PBblas.h:22
#define UPPER
Definition PBblas.h:52
#define CLOWER
Definition PBblas.h:25
#define CUNIT
Definition PBblas.h:32
#define pilaenv_
Definition PBpblas.h:44
#define pzatrmv_
Definition PBpblas.h:122
#define CTXT_
Definition PBtools.h:38
#define MB_
Definition PBtools.h:43
void PB_Cabort()
#define ONE
Definition PBtools.h:64
void PB_Cchkvec()
void PB_Cinfog2l()
void PB_Cptrm()
void PB_Cchkmat()
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
void PB_Cwarn()
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
char * PB_Ctop()
void PB_CInV()
PBTYP_T * PB_Cztypeset()
void PB_CInOutV()
void PB_Ctzatrmv()
#define RSRC_
Definition PBtools.h:45
#define M_
Definition PBtools.h:39
#define INB_
Definition PBtools.h:42
void PB_CargFtoC()
#define CSRC_
Definition PBtools.h:46
Int PB_Clcm()
#define IMB_
Definition PBtools.h:41
#define ZERO
Definition PBtools.h:66
#define dascal_
Definition PBtools.h:674
PBTYP_T * PB_Cdtypeset()
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44
#define zagemv_
Definition PBtools.h:717
void PB_Cpaxpby()
void PB_Cdescribe()
char type
Definition pblas.h:331
Int usiz
Definition pblas.h:332
Int size
Definition pblas.h:333
char * one
Definition pblas.h:336