ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlared1d.f
Go to the documentation of this file.
1  SUBROUTINE pdlared1d( N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK )
2 *
3 * -- ScaLAPACK routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * December 12, 2005
7 *
8 * .. Scalar Arguments ..
9  INTEGER IA, JA, LWORK, N
10 * ..
11 * .. Array Arguments ..
12  INTEGER DESC( * )
13  DOUBLE PRECISION BYALL( * ), BYCOL( * ), WORK( LWORK )
14 * ..
15 *
16 * Purpose
17 * =======
18 *
19 * PDLARED1D redistributes a 1D array
20 *
21 * It assumes that the input array, BYCOL, is distributed across
22 * rows and that all process columns contain the same copy of
23 * BYCOL. The output array, BYALL, will be identical on all processes
24 * and will contain the entire array.
25 *
26 * Notes
27 * =====
28 *
29 * Each global data object is described by an associated description
30 * vector. This vector stores the information required to establish
31 * the mapping between an object element and its corresponding process
32 * and memory location.
33 *
34 * Let A be a generic term for any 2D block cyclicly distributed array.
35 * Such a global array has an associated description vector DESCA.
36 * In the following comments, the character _ should be read as
37 * "of the global array".
38 *
39 * NOTATION STORED IN EXPLANATION
40 * --------------- -------------- --------------------------------------
41 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
42 * DTYPE_A = 1.
43 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
44 * the BLACS process grid A is distribu-
45 * ted over. The context itself is glo-
46 * bal, but the handle (the integer
47 * value) may vary.
48 * M_A (global) DESCA( M_ ) The number of rows in the global
49 * array A.
50 * N_A (global) DESCA( N_ ) The number of columns in the global
51 * array A.
52 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
53 * the rows of the array.
54 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
55 * the columns of the array.
56 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
57 * row of the array A is distributed.
58 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
59 * first column of the array A is
60 * distributed.
61 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
62 * array. LLD_A >= MAX(1,LOCr(M_A)).
63 *
64 * Let K be the number of rows or columns of a distributed matrix,
65 * and assume that its process grid has dimension p x q.
66 * LOCr( K ) denotes the number of elements of K that a process
67 * would receive if K were distributed over the p processes of its
68 * process column.
69 * Similarly, LOCc( K ) denotes the number of elements of K that a
70 * process would receive if K were distributed over the q processes of
71 * its process row.
72 * The values of LOCr() and LOCc() may be determined via a call to the
73 * ScaLAPACK tool function, NUMROC:
74 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
75 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
76 * An upper bound for these quantities may be computed by:
77 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
78 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
79 *
80 * Arguments
81 * =========
82 *
83 * NP = Number of local rows in BYCOL()
84 *
85 * N (global input) INTEGER
86 * The number of elements to be redistributed. N >= 0.
87 *
88 * IA (global input) INTEGER
89 * IA must be equal to 1
90 *
91 * JA (global input) INTEGER
92 * JA must be equal to 1
93 *
94 * DESC (global/local input) INTEGER Array of dimension DLEN_
95 * A 2D array descriptor, which describes BYCOL
96 *
97 * BYCOL (local input) distributed block cyclic DOUBLE PRECISION array
98 * global dimension (N), local dimension (NP)
99 * BYCOL is distributed across the process rows
100 * All process columns are assumed to contain the same value
101 *
102 * BYALL (global output) DOUBLE PRECISION global dimension( N )
103 * local dimension (N)
104 * BYALL is exactly duplicated on all processes
105 * It contains the same values as BYCOL, but it is replicated
106 * across all processes rather than being distributed
107 *
108 * BYALL(i) = BYCOL( NUMROC(i,DESC( NB_ ),MYROW,0,NPROW ) on the procs
109 * whose MYROW == mod((i-1)/DESC( NB_ ),NPROW)
110 *
111 * WORK (local workspace) DOUBLE PRECISION dimension (LWORK)
112 * Used to hold the buffers sent from one process to another
113 *
114 * LWORK (local input) INTEGER size of WORK array
115 * LWORK >= NUMROC(N, DESC( NB_ ), 0, 0, NPCOL)
116 *
117 *
118 * .. Parameters ..
119  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
120  $ MB_, NB_, RSRC_, CSRC_, LLD_
121  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
122  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
123  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
124 * ..
125 * .. Local Scalars ..
126  INTEGER ALLI, BUFLEN, I, II, MYCOL, MYROW, NB, NPCOL,
127  $ NPROW, PCOL
128 * ..
129 * .. External Functions ..
130 *
131  INTEGER NUMROC
132  EXTERNAL numroc
133 * ..
134 * .. External Subroutines ..
135 *
136  EXTERNAL blacs_gridinfo, dcopy, dgebr2d, dgebs2d
137 * ..
138 * .. Intrinsic Functions ..
139  INTRINSIC min
140 * ..
141 * .. Executable Statements ..
142 * This is just to keep ftnchek happy
143  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
144  $ rsrc_.LT.0 )RETURN
145 *
146  CALL blacs_gridinfo( desc( ctxt_ ), nprow, npcol, myrow, mycol )
147  nb = desc( mb_ )
148 *
149 *
150  DO 30 pcol = 0, npcol - 1
151  buflen = numroc( n, nb, pcol, 0, npcol )
152  IF( mycol.EQ.pcol ) THEN
153  CALL dcopy( buflen, bycol, 1, work, 1 )
154  CALL dgebs2d( desc( ctxt_ ), 'R', ' ', 1, buflen, work, 1 )
155  ELSE
156  CALL dgebr2d( desc( ctxt_ ), 'R', ' ', 1, buflen, work, 1,
157  $ myrow, pcol )
158  END IF
159 *
160  alli = pcol*nb
161  DO 20 ii = 1, buflen, nb
162  DO 10 i = 1, min( nb, buflen-ii+1 )
163  byall( alli+i ) = work( ii-1+i )
164  10 CONTINUE
165  alli = alli + nb*npcol
166  20 CONTINUE
167  30 CONTINUE
168 *
169  RETURN
170 *
171 * End of PDLARED1D
172 *
173  END
pdlared1d
subroutine pdlared1d(N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK)
Definition: pdlared1d.f:2
min
#define min(A, B)
Definition: pcgemr.c:181