SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdamax_.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "pblas.h"
14#include "PBpblas.h"
15#include "PBtools.h"
16#include "PBblacs.h"
17#include "PBblas.h"
18
19#ifdef __STDC__
20void pdamax_( Int * N, double * AMAX, Int * INDX,
21 double * X, Int * IX, Int * JX, Int * DESCX, Int * INCX )
22#else
23void pdamax_( N, AMAX, INDX, X, IX, JX, DESCX, INCX )
24/*
25* .. Scalar Arguments ..
26*/
27 Int * INCX, * INDX, * IX, * JX, * N;
28 double * AMAX;
29/*
30* .. Array Arguments ..
31*/
32 Int * DESCX;
33 double * X;
34#endif
35{
36/*
37* Purpose
38* =======
39*
40* PDAMAX computes the global index of the maximum element in absolute
41* value of a subvector sub( X ). The global index is returned in INDX
42* and the value of that element is returned in AMAX,
43*
44* where
45*
46* sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
47* X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X.
48*
49* Notes
50* =====
51*
52* A description vector is associated with each 2D block-cyclicly dis-
53* tributed matrix. This vector stores the information required to
54* establish the mapping between a matrix entry and its corresponding
55* process and memory location.
56*
57* In the following comments, the character _ should be read as
58* "of the distributed matrix". Let A be a generic term for any 2D
59* block cyclicly distributed matrix. Its description vector is DESC_A:
60*
61* NOTATION STORED IN EXPLANATION
62* ---------------- --------------- ------------------------------------
63* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
64* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
65* the NPROW x NPCOL BLACS process grid
66* A is distributed over. The context
67* itself is global, but the handle
68* (the integer value) may vary.
69* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
70* ted matrix A, M_A >= 0.
71* N_A (global) DESCA[ N_ ] The number of columns in the distri-
72* buted matrix A, N_A >= 0.
73* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
74* block of the matrix A, IMB_A > 0.
75* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
76* left block of the matrix A,
77* INB_A > 0.
78* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
79* bute the last M_A-IMB_A rows of A,
80* MB_A > 0.
81* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
82* bute the last N_A-INB_A columns of
83* A, NB_A > 0.
84* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
85* row of the matrix A is distributed,
86* NPROW > RSRC_A >= 0.
87* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
88* first column of A is distributed.
89* NPCOL > CSRC_A >= 0.
90* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
91* array storing the local blocks of
92* the distributed matrix A,
93* IF( Lc( 1, N_A ) > 0 )
94* LLD_A >= MAX( 1, Lr( 1, M_A ) )
95* ELSE
96* LLD_A >= 1.
97*
98* Let K be the number of rows of a matrix A starting at the global in-
99* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
100* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
101* receive if these K rows were distributed over NPROW processes. If K
102* is the number of columns of a matrix A starting at the global index
103* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
104* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
105* these K columns were distributed over NPCOL processes.
106*
107* The values of Lr() and Lc() may be determined via a call to the func-
108* tion PB_Cnumroc:
109* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
110* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
111*
112* Arguments
113* =========
114*
115* N (global input) INTEGER
116* On entry, N specifies the length of the subvector sub( X ).
117* N must be at least zero.
118*
119* AMAX (global output) DOUBLE PRECISION array
120* On exit, AMAX specifies the largest entry in absolute value
121* of the subvector sub( X ) only in its scope (See below for
122* further details).
123*
124* INDX (global output) INTEGER
125* On exit, INDX specifies the global index of the maximum ele-
126* ment in absolute value of the subvector sub( X ) only in its
127* scope (See below for further details).
128*
129* X (local input) DOUBLE PRECISION array
130* On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
131* is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
132* MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least
133* Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
134* Before entry, this array contains the local entries of the
135* matrix X.
136*
137* IX (global input) INTEGER
138* On entry, IX specifies X's global row index, which points to
139* the beginning of the submatrix sub( X ).
140*
141* JX (global input) INTEGER
142* On entry, JX specifies X's global column index, which points
143* to the beginning of the submatrix sub( X ).
144*
145* DESCX (global and local input) INTEGER array
146* On entry, DESCX is an integer array of dimension DLEN_. This
147* is the array descriptor for the matrix X.
148*
149* INCX (global input) INTEGER
150* On entry, INCX specifies the global increment for the
151* elements of X. Only two values of INCX are supported in
152* this version, namely 1 and M_X. INCX must not be zero.
153*
154* Further Details
155* ===============
156*
157* When the result of a vector-oriented PBLAS call is a scalar, this
158* scalar is set only within the process scope which owns the vector(s)
159* being operated on. Let sub( X ) be a generic term for the input vec-
160* tor(s). Then, the processes owning the correct the answer is determi-
161* ned as follows: if an operation involves more than one vector, the
162* processes receiving the result will be the union of the following set
163* of processes for each vector:
164*
165* If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro-
166* cess row or process column owns the vector operand, therefore only
167* the process owning sub( X ) receives the correct result;
168*
169* If INCX = M_X, then sub( X ) is a vector distributed over a process
170* row. Each process in this row receives the result;
171*
172* If INCX = 1, then sub( X ) is a vector distributed over a process
173* column. Each process in this column receives the result;
174*
175* -- Written on April 1, 1998 by
176* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
177*
178* ---------------------------------------------------------------------
179*/
180/*
181* .. Local Scalars ..
182*/
183 char cbtop, cctop, rbtop, rctop;
184 Int Xcol, Xgindx, Xi, Xii, Ximb, Xinb, Xj, Xjj, Xlindx, Xld, Xmb,
185 Xnb, Xnp, Xnq, Xrow, Xsrc, ctxt, dist, dst, idumm, info, k,
186 maxpos, mycol, mydist, myrow, npcol, nprow, src;
187/*
188* .. Local Arrays ..
189*/
190 Int Xd[DLEN_];
191 double work[4];
192/* ..
193* .. Executable Statements ..
194*
195*/
196 PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
197#ifndef NO_ARGCHK
198/*
199* Test the input parameters
200*/
201 Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
202 if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) )
203 PB_Cchkvec( ctxt, "PDAMAX", "X", *N, 1, Xi, Xj, Xd, *INCX, 7, &info );
204 if( info ) { PB_Cabort( ctxt, "PDAMAX", info ); return; }
205#endif
206/*
207* Initialize INDX and AMAX
208*/
209 *INDX = 0; *AMAX = ZERO;
210/*
211* Quick return if possible
212*/
213 if( *N == 0 ) return;
214/*
215* Retrieve process grid information
216*/
217#ifdef NO_ARGCHK
218 Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
219#endif
220/*
221* Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol
222*/
223 PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj,
224 &Xrow, &Xcol );
225/*
226* Handle degenerate case separately, sub( X )'s scope is just one process
227*/
228 if( ( *INCX == 1 ) && ( Xd[M_] == 1 ) && ( *N == 1 ) )
229 {
230/*
231* Make sure I own some data and compute INDX and AMAX
232*/
233 if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) &&
234 ( ( mycol == Xcol ) || ( Xcol < 0 ) ) )
235 {
236 *INDX = *JX; *AMAX = *Mptr(X,Xii,Xjj,Xd[LLD_],1);
237 }
238 return;
239 }
240 else if( *INCX == Xd[M_] )
241 {
242/*
243* sub( X ) resides in (a) process row(s)
244*/
245 if( ( myrow == Xrow ) || ( Xrow < 0 ) )
246 {
247 rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
248
249 if( ( rctop == CTOP_DEFAULT ) || ( rctop == CTOP_TREE1 ) )
250 {
251/*
252* Inline the 1-tree combine for communication savings
253*/
254 Xinb = Xd[INB_ ]; Xnb = Xd[NB_ ]; Xsrc = Xd[CSRC_];
255 Xnq = PB_Cnumroc( *N, Xj, Xinb, Xnb, mycol, Xsrc, npcol );
256/*
257* Make sure I own some data and compute local INDX and AMAX
258*/
259 if( Xnq > 0 )
260 {
261 Xld = Xd[LLD_];
262 Xlindx = Xjj - 1 +
263 idamax_( &Xnq, ((char*)Mptr(X,Xii,Xjj,Xld,1)), &Xld );
264 Mindxl2g( Xgindx, Xlindx, Xinb, Xnb, mycol, Xsrc, npcol );
265 work[0] = *Mptr(X,Xii,Xlindx,Xld,1);
266 work[1] = ((double)( Xgindx+1 ));
267 }
268 else
269 {
270 work[0] = ZERO;
271 work[1] = ZERO;
272 }
273/*
274* Combine the local results using a 1-tree topology within process column 0
275* if npcol > 1 or Xcol >= 0, i.e sub( X ) is distributed.
276*/
277 if( ( npcol >= 2 ) && ( Xcol >= 0 ) )
278 {
279 mydist = mycol;
280 k = 1;
281l_10:
282 if( mydist & 1 )
283 {
284 dist = k * ( mydist - 1 );
285 dst = MPosMod( dist, npcol );
286 Cdgesd2d( ctxt, 2, 1, ((char*)work), 2, myrow, dst );
287 goto l_20;
288 }
289 else
290 {
291 dist = mycol + k;
292 src = MPosMod( dist, npcol );
293
294 if( mycol < src )
295 {
296 Cdgerv2d( ctxt, 2, 1, ((char*) &work[2]), 2, myrow,
297 src );
298 if( ABS( work[0] ) < ABS( work[2] ) )
299 { work[0] = work[2]; work[1] = work[3]; }
300 }
301 mydist >>= 1;
302 }
303 k <<= 1;
304
305 if( k < npcol ) goto l_10;
306l_20:
307/*
308* Process column 0 broadcasts the combined values of INDX and AMAX within
309* their process row.
310*/
311 rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
312 if( mycol == 0 )
313 {
314 Cdgebs2d( ctxt, ROW, &rbtop, 2, 1, ((char*)work), 2 );
315 }
316 else
317 {
318 Cdgebr2d( ctxt, ROW, &rbtop, 2, 1, ((char*)work), 2,
319 myrow, 0 );
320 }
321 }
322/*
323* Set INDX and AMAX to the replicated answers contained in work. If AMAX is
324* zero, then select a coherent INDX.
325*/
326 *AMAX = work[0];
327 *INDX = ( ( *AMAX == ZERO ) ? ( *JX ) : ( (Int)(work[1]) ) );
328 }
329 else
330 {
331/*
332* Otherwise use the current topology settings to combine the results
333*/
334 Xinb = Xd[INB_ ]; Xnb = Xd[NB_ ]; Xsrc = Xd[CSRC_];
335 Xnq = PB_Cnumroc( *N, Xj, Xinb, Xnb, mycol, Xsrc, npcol );
336/*
337* Make sure I own some data and compute local INDX and AMAX
338*/
339 if( Xnq > 0 )
340 {
341/*
342* Compute the local maximum and its corresponding local index
343*/
344 Xld = Xd[LLD_];
345 Xlindx = Xjj - 1 +
346 idamax_( &Xnq, ((char*)Mptr(X,Xii,Xjj,Xld,1)), &Xld );
347 *AMAX = *Mptr(X,Xii,Xlindx,Xld,1);
348 }
349 else
350 {
351 *AMAX = ZERO;
352 }
353
354 if( Xcol >= 0 )
355 {
356/*
357* Combine leave on all the local maximum if Xcol >= 0, i.e sub( X ) is
358* distributed
359*/
360 Cdgamx2d( ctxt, ROW, &rctop, 1, 1, ((char*)AMAX), 1,
361 &idumm, &maxpos, 1, -1, mycol );
362/*
363* Broadcast the corresponding global index
364*/
365 if( *AMAX != ZERO )
366 {
367 rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
368 if( mycol == maxpos )
369 {
370 Mindxl2g( Xgindx, Xlindx, Xinb, Xnb, mycol, Xsrc, npcol );
371 *INDX = Xgindx + 1;
372 Cigebs2d( ctxt, ROW, &rbtop, 1, 1, ((char*)INDX), 1 );
373 }
374 else
375 {
376 Cigebr2d( ctxt, ROW, &rbtop, 1, 1, ((char*)INDX), 1,
377 myrow, maxpos );
378 }
379 }
380 else
381 {
382/*
383* If AMAX is zero, then select a coherent INDX.
384*/
385 *INDX = *JX;
386 }
387 }
388 else
389 {
390/*
391* sub( X ) is not distributed. If AMAX is zero, then select a coherent INDX.
392*/
393 *INDX = ( ( *AMAX == ZERO ) ? ( *JX ) : Xlindx + 1 );
394 }
395 }
396 }
397 return;
398 }
399 else
400 {
401/*
402* sub( X ) resides in (a) process column(s)
403*/
404 if( ( mycol == Xcol ) || ( Xcol < 0 ) )
405 {
406 cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
407
408 if( ( cctop == CTOP_DEFAULT ) || ( cctop == CTOP_TREE1 ) )
409 {
410/*
411* Inline the 1-tree combine for communication savings
412*/
413 Ximb = Xd[IMB_ ]; Xmb = Xd[MB_ ]; Xsrc = Xd[RSRC_];
414 Xnp = PB_Cnumroc( *N, Xi, Ximb, Xmb, myrow, Xsrc, nprow );
415/*
416* Make sure I own some data and compute local INDX and AMAX
417*/
418 if( Xnp > 0 )
419 {
420 Xld = Xd[LLD_];
421 Xlindx = Xii - 1 +
422 idamax_( &Xnp, ((char*)Mptr(X,Xii,Xjj,Xld,1)), INCX );
423 Mindxl2g( Xgindx, Xlindx, Ximb, Xmb, myrow, Xsrc, nprow );
424 work[0] = *Mptr(X,Xlindx,Xjj,Xld,1);
425 work[1] = ((double)( Xgindx+1 ));
426 }
427 else
428 {
429 work[0] = ZERO;
430 work[1] = ZERO;
431 }
432/*
433* Combine the local results using a 1-tree topology within process row 0
434* if nprow > 1 or Xrow >= 0, i.e sub( X ) is distributed.
435*/
436 if( ( nprow >= 2 ) && ( Xrow >= 0 ) )
437 {
438 mydist = myrow;
439 k = 1;
440l_30:
441 if( mydist & 1 )
442 {
443 dist = k * ( mydist - 1 );
444 dst = MPosMod( dist, nprow );
445 Cdgesd2d( ctxt, 2, 1, ((char*)work), 2, dst, mycol );
446 goto l_40;
447 }
448 else
449 {
450 dist = myrow + k;
451 src = MPosMod( dist, nprow );
452
453 if( myrow < src )
454 {
455 Cdgerv2d( ctxt, 2, 1, ((char*) &work[2]), 2,
456 src, mycol );
457 if( ABS( work[0] ) < ABS( work[2] ) )
458 { work[0] = work[2]; work[1] = work[3]; }
459 }
460 mydist >>= 1;
461 }
462 k <<= 1;
463
464 if( k < nprow ) goto l_30;
465l_40:
466/*
467* Process row 0 broadcasts the combined values of INDX and AMAX within their
468* process column.
469*/
470 cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
471 if( myrow == 0 )
472 {
473 Cdgebs2d( ctxt, COLUMN, &cbtop, 2, 1, ((char*)work), 2 );
474 }
475 else
476 {
477 Cdgebr2d( ctxt, COLUMN, &cbtop, 2, 1, ((char*)work), 2,
478 0, mycol );
479 }
480 }
481/*
482* Set INDX and AMAX to the replicated answers contained in work. If AMAX is
483* zero, then select a coherent INDX.
484*/
485 *AMAX = work[0];
486 *INDX = ( ( *AMAX == ZERO ) ? ( *IX ) : ( (Int)(work[1]) ) );
487 }
488 else
489 {
490/*
491* Otherwise use the current topology settings to combine the results
492*/
493 Ximb = Xd[IMB_ ]; Xmb = Xd[MB_ ]; Xsrc = Xd[RSRC_];
494 Xnp = PB_Cnumroc( *N, Xi, Ximb, Xmb, myrow, Xsrc, nprow );
495/*
496* Make sure I own some data and compute local INDX and AMAX
497*/
498
499 if( Xnp > 0 )
500 {
501/*
502* Compute the local maximum and its corresponding local index
503*/
504 Xld = Xd[LLD_];
505 Xlindx = Xii - 1 +
506 idamax_( &Xnp, ((char*)Mptr(X,Xii,Xjj,Xld,1)), INCX );
507 *AMAX = *Mptr(X,Xlindx,Xjj,Xld,1);
508 }
509 else
510 {
511 *AMAX = ZERO;
512 }
513
514 if( Xrow >= 0 )
515 {
516/*
517* Combine leave on all the local maximum if Xrow >= 0, i.e sub( X ) is
518* distributed.
519*/
520 Cdgamx2d( ctxt, COLUMN, &cctop, 1, 1, ((char*)AMAX), 1,
521 &maxpos, &idumm, 1, -1, mycol );
522/*
523* Broadcast the corresponding global index
524*/
525 if( *AMAX != ZERO )
526 {
527 cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
528 if( myrow == maxpos )
529 {
530 Mindxl2g( Xgindx, Xlindx, Ximb, Xmb, myrow, Xsrc, nprow );
531 *INDX = Xgindx + 1;
532 Cigebs2d( ctxt, COLUMN, &cbtop, 1, 1, ((char*)INDX), 1 );
533 }
534 else
535 {
536 Cigebr2d( ctxt, COLUMN, &cbtop, 1, 1, ((char*)INDX), 1,
537 maxpos, mycol );
538 }
539 }
540 else
541 {
542/*
543* If AMAX is zero, then select a coherent INDX.
544*/
545 *INDX = *IX;
546 }
547 }
548 else
549 {
550/*
551* sub( X ) is not distributed. If AMAX is zero, then select a coherent INDX.
552*/
553 *INDX = ( ( *AMAX == ZERO ) ? ( *IX ) : Xlindx + 1 );
554 }
555 }
556 }
557 return;
558 }
559/*
560* End of PDAMAX
561*/
562}
#define Int
Definition Bconfig.h:22
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
void Cdgebr2d()
#define COMBINE
Definition PBblacs.h:49
void Cdgerv2d()
void Cdgebs2d()
void Cdgamx2d()
void Cigebs2d()
#define CTOP_TREE1
Definition PBblacs.h:34
void Cdgesd2d()
#define ROW
Definition PBblacs.h:46
#define CTOP_DEFAULT
Definition PBblacs.h:26
void Cblacs_gridinfo()
#define BCAST
Definition PBblacs.h:48
void Cigebr2d()
#define idamax_
Definition PBblas.h:135
#define pdamax_
Definition PBpblas.h:90
#define CTXT_
Definition PBtools.h:38
#define MB_
Definition PBtools.h:43
void PB_Cabort()
void PB_Cchkvec()
void PB_Cinfog2l()
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
char * PB_Ctop()
#define RSRC_
Definition PBtools.h:45
#define Mindxl2g(ig_, il_, inb_, nb_, proc_, srcproc_, nprocs_)
Definition PBtools.h:170
#define M_
Definition PBtools.h:39
#define INB_
Definition PBtools.h:42
#define MPosMod(I, d)
Definition PBtools.h:95
#define ABS(a_)
Definition PBtools.h:75
void PB_CargFtoC()
#define CSRC_
Definition PBtools.h:46
#define IMB_
Definition PBtools.h:41
#define ZERO
Definition PBtools.h:66
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44