SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pdgemv_.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "pblas.h"
14#include "PBpblas.h"
15#include "PBtools.h"
16#include "PBblacs.h"
17#include "PBblas.h"
18
19#ifdef __STDC__
20void pdgemv_( F_CHAR_T TRANS, Int * M, Int * N, double * ALPHA,
21 double * A, Int * IA, Int * JA, Int * DESCA,
22 double * X, Int * IX, Int * JX, Int * DESCX, Int * INCX,
23 double * BETA,
24 double * Y, Int * IY, Int * JY, Int * DESCY, Int * INCY )
25#else
26void pdgemv_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX,
27 INCX, BETA, Y, IY, JY, DESCY, INCY )
28/*
29* .. Scalar Arguments ..
30*/
31 F_CHAR_T TRANS;
32 Int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
33 * M, * N;
34 double * ALPHA, * BETA;
35/*
36* .. Array Arguments ..
37*/
38 Int * DESCA, * DESCX, * DESCY;
39 double * A, * X, * Y;
40#endif
41{
42/*
43* Purpose
44* =======
45*
46* PDGEMV performs one of the matrix-vector operations
47*
48* sub( Y ) := alpha*sub( A ) *sub( X ) + beta*sub( Y ), or
49* sub( Y ) := alpha*sub( A )'*sub( X ) + beta*sub( Y ),
50*
51* where
52*
53* sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1).
54*
55* When TRANS = 'N',
56*
57* sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X,
58* X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X,
59* and,
60*
61* sub( Y ) denotes Y(IY:IY,JY:JY+M-1), if INCY = M_Y,
62* Y(IY:IY+M-1,JY:JY), if INCY = 1 and INCY <> M_Y,
63* and, otherwise
64*
65* sub( X ) denotes X(IX:IX,JX:JX+M-1), if INCX = M_X,
66* X(IX:IX+M-1,JX:JX), if INCX = 1 and INCX <> M_X,
67* and,
68*
69* sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y,
70* Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y.
71*
72* Alpha and beta are scalars, and sub( X ) and sub( Y ) are subvectors
73* and sub( A ) is an m by n submatrix.
74*
75* Notes
76* =====
77*
78* A description vector is associated with each 2D block-cyclicly dis-
79* tributed matrix. This vector stores the information required to
80* establish the mapping between a matrix entry and its corresponding
81* process and memory location.
82*
83* In the following comments, the character _ should be read as
84* "of the distributed matrix". Let A be a generic term for any 2D
85* block cyclicly distributed matrix. Its description vector is DESC_A:
86*
87* NOTATION STORED IN EXPLANATION
88* ---------------- --------------- ------------------------------------
89* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
90* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
91* the NPROW x NPCOL BLACS process grid
92* A is distributed over. The context
93* itself is global, but the handle
94* (the integer value) may vary.
95* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
96* ted matrix A, M_A >= 0.
97* N_A (global) DESCA[ N_ ] The number of columns in the distri-
98* buted matrix A, N_A >= 0.
99* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
100* block of the matrix A, IMB_A > 0.
101* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
102* left block of the matrix A,
103* INB_A > 0.
104* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
105* bute the last M_A-IMB_A rows of A,
106* MB_A > 0.
107* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
108* bute the last N_A-INB_A columns of
109* A, NB_A > 0.
110* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
111* row of the matrix A is distributed,
112* NPROW > RSRC_A >= 0.
113* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
114* first column of A is distributed.
115* NPCOL > CSRC_A >= 0.
116* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
117* array storing the local blocks of
118* the distributed matrix A,
119* IF( Lc( 1, N_A ) > 0 )
120* LLD_A >= MAX( 1, Lr( 1, M_A ) )
121* ELSE
122* LLD_A >= 1.
123*
124* Let K be the number of rows of a matrix A starting at the global in-
125* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
126* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
127* receive if these K rows were distributed over NPROW processes. If K
128* is the number of columns of a matrix A starting at the global index
129* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
130* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
131* these K columns were distributed over NPCOL processes.
132*
133* The values of Lr() and Lc() may be determined via a call to the func-
134* tion PB_Cnumroc:
135* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
136* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
137*
138* Arguments
139* =========
140*
141* TRANS (global input) CHARACTER*1
142* On entry, TRANS specifies the operation to be performed as
143* follows:
144*
145* TRANS = 'N' or 'n'
146* sub( Y ) := alpha*sub( A ) * sub( X ) + beta*sub( Y ),
147*
148* TRANS = 'T' or 't',
149* sub( Y ) := alpha*sub( A )' * sub( X ) + beta*sub( Y ),
150*
151* TRANS = 'C' or 'c',
152* sub( Y ) := alpha*sub( A )' * sub( X ) + beta*sub( Y ).
153*
154* M (global input) INTEGER
155* On entry, M specifies the number of rows of the submatrix
156* sub( A ). M must be at least zero.
157*
158* N (global input) INTEGER
159* On entry, N specifies the number of columns of the submatrix
160* sub( A ). N must be at least zero.
161*
162* ALPHA (global input) DOUBLE PRECISION
163* On entry, ALPHA specifies the scalar alpha. When ALPHA is
164* supplied as zero then the local entries of the arrays A
165* and X corresponding to the entries of the submatrix sub( A )
166* and the subvector sub( X ) need not be set on input.
167*
168* A (local input) DOUBLE PRECISION array
169* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
170* at least Lc( 1, JA+N-1 ). Before entry, this array contains
171* the local entries of the matrix A.
172*
173* IA (global input) INTEGER
174* On entry, IA specifies A's global row index, which points to
175* the beginning of the submatrix sub( A ).
176*
177* JA (global input) INTEGER
178* On entry, JA specifies A's global column index, which points
179* to the beginning of the submatrix sub( A ).
180*
181* DESCA (global and local input) INTEGER array
182* On entry, DESCA is an integer array of dimension DLEN_. This
183* is the array descriptor for the matrix A.
184*
185* X (local input) DOUBLE PRECISION array
186* On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
187* is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
188* MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least
189* Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
190* Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en-
191* try, this array contains the local entries of the matrix X.
192*
193* IX (global input) INTEGER
194* On entry, IX specifies X's global row index, which points to
195* the beginning of the submatrix sub( X ).
196*
197* JX (global input) INTEGER
198* On entry, JX specifies X's global column index, which points
199* to the beginning of the submatrix sub( X ).
200*
201* DESCX (global and local input) INTEGER array
202* On entry, DESCX is an integer array of dimension DLEN_. This
203* is the array descriptor for the matrix X.
204*
205* INCX (global input) INTEGER
206* On entry, INCX specifies the global increment for the
207* elements of X. Only two values of INCX are supported in
208* this version, namely 1 and M_X. INCX must not be zero.
209*
210* BETA (global input) DOUBLE PRECISION
211* On entry, BETA specifies the scalar beta. When BETA is
212* supplied as zero then the local entries of the array Y
213* corresponding to the entries of the subvector sub( Y ) need
214* not be set on input.
215*
216* Y (local input/local output) DOUBLE PRECISION array
217* On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
218* is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
219* MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least
220* Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
221* Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en-
222* try, this array contains the local entries of the matrix Y.
223* On exit, sub( Y ) is overwritten by the updated subvector.
224*
225* IY (global input) INTEGER
226* On entry, IY specifies Y's global row index, which points to
227* the beginning of the submatrix sub( Y ).
228*
229* JY (global input) INTEGER
230* On entry, JY specifies Y's global column index, which points
231* to the beginning of the submatrix sub( Y ).
232*
233* DESCY (global and local input) INTEGER array
234* On entry, DESCY is an integer array of dimension DLEN_. This
235* is the array descriptor for the matrix Y.
236*
237* INCY (global input) INTEGER
238* On entry, INCY specifies the global increment for the
239* elements of Y. Only two values of INCY are supported in
240* this version, namely 1 and M_Y. INCY must not be zero.
241*
242* -- Written on April 1, 1998 by
243* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
244*
245* ---------------------------------------------------------------------
246*/
247/*
248* .. Local Scalars ..
249*/
250 char TrA, Yroc, * tbeta, top;
251 Int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb,
252 Anq, Arow, XAfr, Xi, Xj, YAfr, YApbY, YAsum, Ycol, Yi, Yii,
253 Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, mycol,
254 myrow, nota, npcol, nprow;
255 PBTYP_T * type;
256/*
257* .. Local Arrays ..
258*/
259 Int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_],
260 Yd [DLEN_];
261 char * XA = NULL, * YA = NULL;
262/* ..
263* .. Executable Statements ..
264*
265*/
266 nota = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
267 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
268 PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
269 PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
270#ifndef NO_ARGCHK
271/*
272* Test the input parameters
273*/
274 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
275 if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) )
276 {
277 if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) )
278 {
279 PB_Cwarn( ctxt, __LINE__, "PDGEMV", "Illegal TRANS=%c\n", TrA );
280 info = -1;
281 }
282 PB_Cchkmat( ctxt, "PDGEMV", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8,
283 &info );
284 if( nota )
285 {
286 PB_Cchkvec( ctxt, "PDGEMV", "X", *N, 3, Xi, Xj, Xd, *INCX, 12,
287 &info );
288 PB_Cchkvec( ctxt, "PDGEMV", "Y", *M, 2, Yi, Yj, Yd, *INCY, 18,
289 &info );
290 }
291 else
292 {
293 PB_Cchkvec( ctxt, "PDGEMV", "X", *M, 2, Xi, Xj, Xd, *INCX, 12,
294 &info );
295 PB_Cchkvec( ctxt, "PDGEMV", "Y", *N, 3, Yi, Yj, Yd, *INCY, 18,
296 &info );
297 }
298 }
299 if( info ) { PB_Cabort( ctxt, "PDGEMV", info ); return; }
300#endif
301/*
302* Quick return if possible
303*/
304 if( ( *M == 0 ) || ( *N == 0 ) ||
305 ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) )
306 return;
307/*
308* Retrieve process grid information
309*/
310#ifdef NO_ARGCHK
311 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
312#endif
313/*
314* Get type structure
315*/
316 type = PB_Cdtypeset();
317/*
318* When alpha is zero
319*/
320 if( ALPHA[REAL_PART] == ZERO )
321 {
322/*
323* Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
324*/
325 PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj,
326 &Yrow, &Ycol );
327
328 if( *INCY == Yd[M_] )
329 {
330/*
331* sub( Y ) resides in (a) process row(s)
332*/
333 if( ( myrow == Yrow ) || ( Yrow < 0 ) )
334 {
335/*
336* Make sure I own some data and scale sub( Y )
337*/
338 Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol,
339 Yd[CSRC_], npcol );
340 if( Ynq > 0 )
341 {
342 Yld = Yd[LLD_];
343 if( BETA[REAL_PART] == ZERO )
344 {
345 dset_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
346 Yjj, Yld, type->size ), &Yld );
347 }
348 else
349 {
350 dscal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
351 Yjj, Yld, type->size ), &Yld );
352 }
353 }
354 }
355 }
356 else
357 {
358/*
359* sub( Y ) resides in (a) process column(s)
360*/
361 if( ( mycol == Ycol ) || ( Ycol < 0 ) )
362 {
363/*
364* Make sure I own some data and scale sub( Y )
365*/
366 Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow,
367 Yd[RSRC_], nprow );
368 if( Ynp > 0 )
369 {
370 if( BETA[REAL_PART] == ZERO )
371 {
372 dset_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
373 Yjj, Yd[LLD_], type->size ), INCY );
374 }
375 else
376 {
377 dscal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
378 Yjj, Yd[LLD_], type->size ), INCY );
379 }
380 }
381 }
382 }
383 return;
384 }
385/*
386* Compute descriptor Ad0 for sub( A )
387*/
388 PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
389 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
390
391 Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN );
392
393 if( nota )
394 {
395/*
396* Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A )
397*/
398 PB_CInOutV( type, COLUMN, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y),
399 Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY );
400/*
401* Replicate sub( X ) in process rows spanned by sub( A ) -> XA
402*/
403 PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
404 ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
405/*
406* Local matrix-vector multiply iff I own some data
407*/
408 Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow );
409 Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol );
410 if( ( Amp > 0 ) && ( Anq > 0 ) )
411 {
412 dgemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *)A),
413 Aii, Ajj, Ald, type->size ), &Ald, XA, &XAd[LLD_], tbeta,
414 YA, &ione );
415 }
416 if( XAfr ) free( XA );
417/*
418* Combine the partial column results into YA
419*/
420 if( YAsum && ( Amp > 0 ) )
421 {
422 top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
423 Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow,
424 YAd[CSRC_] );
425 }
426/*
427* sub( Y ) := beta * sub( Y ) + YA (if necessary)
428*/
429 if( YApbY )
430 {
431 PB_Cpaxpby( type, NOCONJG, *M, 1, type->one, YA, 0, 0, YAd, COLUMN,
432 ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc );
433 }
434 }
435 else
436 {
437/*
438* Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A )
439*/
440 PB_CInOutV( type, ROW, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi,
441 Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY );
442/*
443* Replicate sub( X ) in process columns spanned by sub( A ) -> XA
444*/
445 PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
446 ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
447/*
448* Local matrix-vector multiply iff I own some data
449*/
450 Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow );
451 Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol );
452 if( ( Amp > 0 ) && ( Anq > 0 ) )
453 {
454 dgemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *)A),
455 Aii, Ajj, Ald, type->size ), &Ald, XA, &ione, tbeta,
456 YA, &YAd[LLD_] );
457 }
458 if( XAfr ) free( XA );
459/*
460* Combine the partial row results into YA
461*/
462 if( YAsum && ( Anq > 0 ) )
463 {
464 top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
465 Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_],
466 mycol );
467 }
468/*
469* sub( Y ) := beta * sub( Y ) + YA (if necessary)
470*/
471 if( YApbY )
472 {
473 PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, YA, 0, 0, YAd, ROW,
474 ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc );
475 }
476 }
477 if( YAfr ) free( YA );
478/*
479* End of PDGEMV
480*/
481}
#define Int
Definition Bconfig.h:22
#define REAL_PART
Definition pblas.h:139
#define F2C_CHAR(a)
Definition pblas.h:124
char * F_CHAR_T
Definition pblas.h:122
#define CCOLUMN
Definition PBblacs.h:20
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define COMBINE
Definition PBblacs.h:49
#define CROW
Definition PBblacs.h:21
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
void Cdgsum2d()
#define NOCONJG
Definition PBblas.h:45
#define dscal_
Definition PBblas.h:111
#define CNOTRAN
Definition PBblas.h:18
#define CTRAN
Definition PBblas.h:20
#define dgemv_
Definition PBblas.h:140
#define CCOTRAN
Definition PBblas.h:22
#define pdgemv_
Definition PBpblas.h:95
#define CTXT_
Definition PBtools.h:38
#define MB_
Definition PBtools.h:43
void PB_Cabort()
#define ONE
Definition PBtools.h:64
void PB_Cchkvec()
void PB_Cinfog2l()
void PB_Cchkmat()
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
void PB_Cwarn()
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
char * PB_Ctop()
void PB_CInV()
void PB_CInOutV()
#define RSRC_
Definition PBtools.h:45
#define M_
Definition PBtools.h:39
#define INB_
Definition PBtools.h:42
void PB_CargFtoC()
#define CSRC_
Definition PBtools.h:46
#define IMB_
Definition PBtools.h:41
#define ZERO
Definition PBtools.h:66
PBTYP_T * PB_Cdtypeset()
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44
#define dset_
Definition PBtools.h:664
void PB_Cpaxpby()
void PB_Cdescribe()
Int size
Definition pblas.h:333
char * one
Definition pblas.h:336