SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdatrmv_.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "pblas.h"
14#include "PBpblas.h"
15#include "PBtools.h"
16#include "PBblacs.h"
17#include "PBblas.h"
18
19#ifdef __STDC__
20void pdatrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, Int * N,
21 double * ALPHA,
22 double * A, Int * IA, Int * JA, Int * DESCA,
23 double * X, Int * IX, Int * JX, Int * DESCX,
24 Int * INCX, double * BETA,
25 double * Y, Int * IY, Int * JY, Int * DESCY,
26 Int * INCY )
27#else
28void pdatrmv_( UPLO, TRANS, DIAG, N, ALPHA, A, IA, JA, DESCA, X, IX,
29 JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY )
30/*
31* .. Scalar Arguments ..
32*/
33 F_CHAR_T DIAG, TRANS, UPLO;
34 Int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
35 * N;
36 double * ALPHA, * BETA;
37/*
38* .. Array Arguments ..
39*/
40 Int * DESCA, * DESCX, * DESCY;
41 double * A, * X, * Y;
42#endif
43{
44/*
45* Purpose
46* =======
47*
48* PDATRMV performs one of the matrix-vector operations
49*
50* sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) +
51* abs( beta*sub( Y ) ),
52* or
53*
54* sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) +
55* abs( beta*sub( Y ) ),
56*
57* where
58*
59* sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1),
60*
61* sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X,
62* X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X,
63* and,
64*
65* sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y,
66* Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y.
67*
68* Alpha and beta are real scalars, sub( Y ) is a real subvector,
69* sub( X ) is a subvector and sub( A ) is an n by n triangular subma-
70* trix.
71*
72* Notes
73* =====
74*
75* A description vector is associated with each 2D block-cyclicly dis-
76* tributed matrix. This vector stores the information required to
77* establish the mapping between a matrix entry and its corresponding
78* process and memory location.
79*
80* In the following comments, the character _ should be read as
81* "of the distributed matrix". Let A be a generic term for any 2D
82* block cyclicly distributed matrix. Its description vector is DESC_A:
83*
84* NOTATION STORED IN EXPLANATION
85* ---------------- --------------- ------------------------------------
86* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
87* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
88* the NPROW x NPCOL BLACS process grid
89* A is distributed over. The context
90* itself is global, but the handle
91* (the integer value) may vary.
92* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
93* ted matrix A, M_A >= 0.
94* N_A (global) DESCA[ N_ ] The number of columns in the distri-
95* buted matrix A, N_A >= 0.
96* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
97* block of the matrix A, IMB_A > 0.
98* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
99* left block of the matrix A,
100* INB_A > 0.
101* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
102* bute the last M_A-IMB_A rows of A,
103* MB_A > 0.
104* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
105* bute the last N_A-INB_A columns of
106* A, NB_A > 0.
107* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
108* row of the matrix A is distributed,
109* NPROW > RSRC_A >= 0.
110* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
111* first column of A is distributed.
112* NPCOL > CSRC_A >= 0.
113* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
114* array storing the local blocks of
115* the distributed matrix A,
116* IF( Lc( 1, N_A ) > 0 )
117* LLD_A >= MAX( 1, Lr( 1, M_A ) )
118* ELSE
119* LLD_A >= 1.
120*
121* Let K be the number of rows of a matrix A starting at the global in-
122* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
123* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
124* receive if these K rows were distributed over NPROW processes. If K
125* is the number of columns of a matrix A starting at the global index
126* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
127* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
128* these K columns were distributed over NPCOL processes.
129*
130* The values of Lr() and Lc() may be determined via a call to the func-
131* tion PB_Cnumroc:
132* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
133* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
134*
135* Arguments
136* =========
137*
138* UPLO (global input) CHARACTER*1
139* On entry, UPLO specifies whether the submatrix sub( A ) is
140* an upper or lower triangular submatrix as follows:
141*
142* UPLO = 'U' or 'u' sub( A ) is an upper triangular
143* submatrix,
144*
145* UPLO = 'L' or 'l' sub( A ) is a lower triangular
146* submatrix.
147*
148* TRANS (global input) CHARACTER*1
149* On entry, TRANS specifies the operation to be performed as
150* follows:
151*
152* TRANS = 'N' or 'n'
153* sub( Y ) := |alpha|*|sub( A )|*|sub( X )| +
154* |beta*sub( Y )|.
155*
156* TRANS = 'T' or 't'
157* sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| +
158* |beta*sub( Y )|.
159*
160* TRANS = 'C' or 'c'
161* sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| +
162* |beta*sub( Y )|.
163*
164* DIAG (global input) CHARACTER*1
165* On entry, DIAG specifies whether or not sub( A ) is unit
166* triangular as follows:
167*
168* DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian-
169* gular,
170*
171* DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri-
172* angular.
173*
174* N (global input) INTEGER
175* On entry, N specifies the order of the submatrix sub( A ).
176* N must be at least zero.
177*
178* ALPHA (global input) DOUBLE PRECISION
179* On entry, ALPHA specifies the scalar alpha. When ALPHA is
180* supplied as zero then the local entries of the arrays A
181* and X corresponding to the entries of the submatrix sub( A )
182* and the subvector sub( X ) need not be set on input.
183*
184* A (local input) DOUBLE PRECISION array
185* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
186* at least Lc( 1, JA+N-1 ). Before entry, this array contains
187* the local entries of the matrix A.
188* Before entry with UPLO = 'U' or 'u', this array contains the
189* local entries corresponding to the entries of the upper tri-
190* angular submatrix sub( A ), and the local entries correspon-
191* ding to the entries of the strictly lower triangular part of
192* the submatrix sub( A ) are not referenced.
193* Before entry with UPLO = 'L' or 'l', this array contains the
194* local entries corresponding to the entries of the lower tri-
195* angular submatrix sub( A ), and the local entries correspon-
196* ding to the entries of the strictly upper triangular part of
197* the submatrix sub( A ) are not referenced.
198* Note that when DIAG = 'U' or 'u', the local entries corres-
199* ponding to the diagonal elements of the submatrix sub( A )
200* are not referenced either, but are assumed to be unity.
201*
202* IA (global input) INTEGER
203* On entry, IA specifies A's global row index, which points to
204* the beginning of the submatrix sub( A ).
205*
206* JA (global input) INTEGER
207* On entry, JA specifies A's global column index, which points
208* to the beginning of the submatrix sub( A ).
209*
210* DESCA (global and local input) INTEGER array
211* On entry, DESCA is an integer array of dimension DLEN_. This
212* is the array descriptor for the matrix A.
213*
214* X (local input) DOUBLE PRECISION array
215* On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
216* is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
217* MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least
218* Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
219* Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en-
220* try, this array contains the local entries of the matrix X.
221*
222* IX (global input) INTEGER
223* On entry, IX specifies X's global row index, which points to
224* the beginning of the submatrix sub( X ).
225*
226* JX (global input) INTEGER
227* On entry, JX specifies X's global column index, which points
228* to the beginning of the submatrix sub( X ).
229*
230* DESCX (global and local input) INTEGER array
231* On entry, DESCX is an integer array of dimension DLEN_. This
232* is the array descriptor for the matrix X.
233*
234* INCX (global input) INTEGER
235* On entry, INCX specifies the global increment for the
236* elements of X. Only two values of INCX are supported in
237* this version, namely 1 and M_X. INCX must not be zero.
238*
239* BETA (global input) DOUBLE PRECISION
240* On entry, BETA specifies the scalar beta. When BETA is
241* supplied as zero then the local entries of the array Y
242* corresponding to the entries of the subvector sub( Y ) need
243* not be set on input.
244*
245* Y (local input/local output) DOUBLE PRECISION array
246* On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
247* is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
248* MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least
249* Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
250* Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en-
251* try, this array contains the local entries of the matrix Y.
252* On exit, sub( Y ) is overwritten by the updated subvector.
253*
254* IY (global input) INTEGER
255* On entry, IY specifies Y's global row index, which points to
256* the beginning of the submatrix sub( Y ).
257*
258* JY (global input) INTEGER
259* On entry, JY specifies Y's global column index, which points
260* to the beginning of the submatrix sub( Y ).
261*
262* DESCY (global and local input) INTEGER array
263* On entry, DESCY is an integer array of dimension DLEN_. This
264* is the array descriptor for the matrix Y.
265*
266* INCY (global input) INTEGER
267* On entry, INCY specifies the global increment for the
268* elements of Y. Only two values of INCY are supported in
269* this version, namely 1 and M_Y. INCY must not be zero.
270*
271* -- Written on April 1, 1998 by
272* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
273*
274* ---------------------------------------------------------------------
275*/
276/*
277* .. Local Scalars ..
278*/
279 char DiagA, TranOp, UploA, Yroc, * one, top;
280 Int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb,
281 Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xi, Xj, YAfr,
282 YAld, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq,
283 Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb,
284 notran, nounit, npcol, nprow, size, upper, usiz;
285 double * tbeta;
286 PBTYP_T * type, * utyp;
287/*
288* .. Local Arrays ..
289*/
290 Int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_],
291 Yd [DLEN_];
292 char * Aptr = NULL, * XA = NULL, * YA = NULL;
293/* ..
294* .. Executable Statements ..
295*
296*/
297 upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
298 notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
299 nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT );
300 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
301 PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
302 PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
303#ifndef NO_ARGCHK
304/*
305* Test the input parameters
306*/
307 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
308 if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) )
309 {
310 if( ( !upper ) && ( UploA != CLOWER ) )
311 {
312 PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal UPLO = %c\n", UploA );
313 info = -1;
314 }
315 else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) )
316 {
317 PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal TRANS = %c\n", TranOp );
318 info = -2;
319 }
320 else if( ( !nounit ) && ( DiagA != CUNIT ) )
321 {
322 PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal DIAG = %c\n", DiagA );
323 info = -3;
324 }
325 PB_Cchkmat( ctxt, "PDATRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info );
326 PB_Cchkvec( ctxt, "PDATRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 13, &info );
327 PB_Cchkvec( ctxt, "PDATRMV", "Y", *N, 4, Yi, Yj, Yd, *INCY, 19, &info );
328 }
329 if( info ) { PB_Cabort( ctxt, "PDATRMV", info ); return; }
330#endif
331/*
332* Quick return if possible
333*/
334 if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) &&
335 ( BETA [REAL_PART] == ONE ) ) ) return;
336/*
337* Retrieve process grid information
338*/
339#ifdef NO_ARGCHK
340 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
341#endif
342/*
343* Get type structure
344*/
345 type = utyp = PB_Cdtypeset();
346 size = usiz = type->size;
347/*
348* and when alpha is zero
349*/
350 if( ALPHA[REAL_PART] == ZERO )
351 {
352/*
353* Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
354*/
355 PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj,
356 &Yrow, &Ycol );
357
358 if( *INCY == Yd[M_] )
359 {
360/*
361* sub( Y ) resides in (a) process row(s)
362*/
363 if( ( myrow == Yrow ) || ( Yrow < 0 ) )
364 {
365/*
366* Make sure I own some data and scale sub( Y )
367*/
368 Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
369 npcol );
370 if( Ynq > 0 )
371 {
372 Yld = Yd[LLD_];
373 dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
374 Yjj, Yld, usiz ), &Yld );
375 }
376 }
377 }
378 else
379 {
380/*
381* sub( Y ) resides in (a) process column(s)
382*/
383 if( ( mycol == Ycol ) || ( Ycol < 0 ) )
384 {
385/*
386* Make sure I own some data and scale sub( Y )
387*/
388 Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
389 nprow );
390 if( Ynp > 0 )
391 {
392 dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
393 Yjj, Yd[LLD_], usiz ), INCY );
394 }
395 }
396 }
397 return;
398 }
399/*
400* Compute descriptor Ad0 for sub( A )
401*/
402 PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
403 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
404
405 Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN );
406
407 if( notran )
408 {
409/*
410* Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A )
411*/
412 PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y),
413 Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr,
414 &YAsum, &YApbY );
415/*
416* Replicate sub( X ) in process rows spanned by sub( A ) -> XA
417*/
418 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
419 ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
420 }
421 else
422 {
423/*
424* Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A )
425*/
426 PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y),
427 Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr,
428 &YAsum, &YApbY );
429/*
430* Replicate sub( X ) in process columns spanned by sub( A ) -> XA
431*/
432 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
433 ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
434 }
435
436 one = type->one;
437/*
438* Local matrix-vector multiply iff I own some data
439*/
440 Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_];
441 Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_];
442 Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
443 Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
444
445 if( ( Amp > 0 ) && ( Anq > 0 ) )
446 {
447 Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
448
449 XAld = XAd[LLD_]; YAld = YAd[LLD_];
450/*
451* Scale YA in the case sub( Y ) has been reused
452*/
453 if( notran && !( YApbY ) )
454 {
455/*
456* YA resides in (a) process column(s)
457*/
458 if( ( mycol == YAd[CSRC_] ) || ( YAd[CSRC_] < 0 ) )
459 {
460/*
461* Make sure I own some data and scale YA
462*/
463 if( Amp > 0 )
464 dascal_( &Amp, ((char *) tbeta), YA, &ione );
465 }
466 }
467 else if( !( notran ) && !( YApbY ) )
468 {
469/*
470* YA resides in (a) process row(s)
471*/
472 if( ( myrow == YAd[RSRC_] ) || ( YAd[RSRC_] < 0 ) )
473 {
474/*
475* Make sure I own some data and scale YA
476*/
477 if( Anq > 0 )
478 dascal_( &Anq, ((char *) tbeta), YA, &YAld );
479 }
480 }
481/*
482* Computational partitioning size is computed as the product of the logical
483* value returned by pilaenv_ and 2 * lcm( nprow, npcol )
484*/
485 nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) *
486 PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
487
488 if( upper )
489 {
490 if( notran )
491 {
492 for( k = 0; k < *N; k += nb )
493 {
494 kb = *N - k; kb = MIN( kb, nb );
495 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
496 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
497 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
498 if( Akp > 0 && Anq0 > 0 )
499 {
500 dagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA),
501 Mptr( Aptr, 0, Akq, Ald, size ), &Ald,
502 Mptr( XA, 0, Akq, XAld, size ), &XAld, one,
503 YA, &ione );
504 }
505 PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1,
506 ((char *) ALPHA), Aptr, k, k, Ad0,
507 Mptr( XA, 0, Akq, XAld, size ), XAld,
508 Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv );
509 }
510 }
511 else
512 {
513 for( k = 0; k < *N; k += nb )
514 {
515 kb = *N - k; kb = MIN( kb, nb );
516 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
517 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
518 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
519 if( Akp > 0 && Anq0 > 0 )
520 {
521 dagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA),
522 Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione,
523 one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld );
524 }
525 PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1,
526 ((char *) ALPHA), Aptr, k, k, Ad0,
527 Mptr( XA, Akp, 0, XAld, size ), XAld,
528 Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv );
529 }
530 }
531 }
532 else
533 {
534 if( notran )
535 {
536 for( k = 0; k < *N; k += nb )
537 {
538 kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
539 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
540 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
541 PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1,
542 ((char *) ALPHA), Aptr, k, k, Ad0,
543 Mptr( XA, 0, Akq, XAld, size ), XAld,
544 Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv );
545 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
546 Amp0 = Amp - Akp;
547 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
548 if( Amp0 > 0 && Anq0 > 0 )
549 {
550 dagemv_( TRANS, &Amp0, &Anq0, ((char *) ALPHA),
551 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald,
552 Mptr( XA, 0, Akq, XAld, size ), &XAld, one,
553 Mptr( YA, Akp, 0, YAld, usiz ), &ione );
554 }
555 }
556 }
557 else
558 {
559 for( k = 0; k < *N; k += nb )
560 {
561 kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
562 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
563 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
564 PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1,
565 ((char *) ALPHA), Aptr, k, k, Ad0,
566 Mptr( XA, Akp, 0, XAld, size ), XAld,
567 Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv );
568 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
569 Amp0 = Amp - Akp;
570 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
571 if( Amp0 > 0 && Anq0 > 0 )
572 {
573 dagemv_( TRANS, &Amp0, &Anq0, one,
574 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald,
575 Mptr( XA, Akp, 0, XAld, size ), &ione, one,
576 Mptr( YA, 0, Akq, YAld, usiz ), &YAld );
577 }
578 }
579 }
580 }
581 }
582 if( XAfr ) free( XA );
583
584 if( notran )
585 {
586/*
587* Combine the partial column results into YA
588*/
589 if( YAsum && ( Amp > 0 ) )
590 {
591 top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
592 Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow,
593 YAd[CSRC_] );
594 }
595 }
596 else
597 {
598/*
599* Combine the partial row results into YA
600*/
601 if( YAsum && ( Anq > 0 ) )
602 {
603 top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
604 Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_],
605 mycol );
606 }
607 }
608/*
609* sub( Y ) := beta * sub( Y ) + YA (if necessary)
610*/
611 if( YApbY )
612 {
613/*
614* Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
615*/
616 PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow,
617 &Ycol );
618
619 if( *INCY == Yd[M_] )
620 {
621/*
622* sub( Y ) resides in (a) process row(s)
623*/
624 if( ( myrow == Yrow ) || ( Yrow < 0 ) )
625 {
626/*
627* Make sure I own some data and scale sub( Y )
628*/
629 Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
630 npcol );
631 if( Ynq > 0 )
632 {
633 Yld = Yd[LLD_];
634 dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
635 Yjj, Yld, usiz ), &Yld );
636 }
637 }
638 }
639 else
640 {
641/*
642* sub( Y ) resides in (a) process column(s)
643*/
644 if( ( mycol == Ycol ) || ( Ycol < 0 ) )
645 {
646/*
647* Make sure I own some data and scale sub( Y )
648*/
649 Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
650 nprow );
651 if( Ynp > 0 )
652 {
653 dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
654 Yjj, Yd[LLD_], usiz ), INCY );
655 }
656 }
657 }
658
659 if( notran )
660 {
661 PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, one,
662 ((char *) Y), Yi, Yj, Yd, &Yroc );
663 }
664 else
665 {
666 PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one,
667 ((char *) Y), Yi, Yj, Yd, &Yroc );
668 }
669 }
670 if( YAfr ) free( YA );
671/*
672* End of PDATRMV
673*/
674}
#define Int
Definition Bconfig.h:22
#define REAL_PART
Definition pblas.h:139
#define F2C_CHAR(a)
Definition pblas.h:124
#define C2F_CHAR(a)
Definition pblas.h:125
char * F_CHAR_T
Definition pblas.h:122
#define CCOLUMN
Definition PBblacs.h:20
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define COMBINE
Definition PBblacs.h:49
#define CROW
Definition PBblacs.h:21
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
void Cdgsum2d()
#define LEFT
Definition PBblas.h:55
#define CNOUNIT
Definition PBblas.h:33
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define CNOTRAN
Definition PBblas.h:18
#define CTRAN
Definition PBblas.h:20
#define LOWER
Definition PBblas.h:51
#define CCOTRAN
Definition PBblas.h:22
#define UPPER
Definition PBblas.h:52
#define CLOWER
Definition PBblas.h:25
#define CUNIT
Definition PBblas.h:32
#define pdatrmv_
Definition PBpblas.h:120
#define pilaenv_
Definition PBpblas.h:44
#define CTXT_
Definition PBtools.h:38
#define MB_
Definition PBtools.h:43
void PB_Cabort()
#define ONE
Definition PBtools.h:64
void PB_Cchkvec()
void PB_Cinfog2l()
void PB_Cptrm()
void PB_Cchkmat()
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
void PB_Cwarn()
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
char * PB_Ctop()
void PB_CInV()
void PB_CInOutV()
void PB_Ctzatrmv()
#define RSRC_
Definition PBtools.h:45
#define M_
Definition PBtools.h:39
#define INB_
Definition PBtools.h:42
void PB_CargFtoC()
#define CSRC_
Definition PBtools.h:46
Int PB_Clcm()
#define IMB_
Definition PBtools.h:41
#define ZERO
Definition PBtools.h:66
#define dascal_
Definition PBtools.h:674
PBTYP_T * PB_Cdtypeset()
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44
#define dagemv_
Definition PBtools.h:715
void PB_Cpaxpby()
void PB_Cdescribe()
char type
Definition pblas.h:331
Int size
Definition pblas.h:333
char * one
Definition pblas.h:336