1 COMPLEX FUNCTION pclatra( N, A, IA, JA, DESCA )
3 * -- scalapack auxiliary routine(version 1.7) --
4 * university of tennessee, knoxville, oak ridge national laboratory,
5 * and university of california, berkeley.
8 * .. scalar arguments ..
11 * .. array arguments ..
19 *
pclatra computes the trace of an n-by-n distributed matrix sub( a )
20 * denoting a( ia:ia+n-1, ja:ja+n-1 ). the result is left on every
21 * process of the grid.
26 * each global
data object is described by an
associated description
27 * vector. this vector stores the information required to establish
28 * the mapping between an object element and its corresponding process
29 * and memory location.
31 * let a be a generic term for any 2d block cyclicly distributed array.
32 * such a global array has an
associated description vector desca.
33 * in the following comments, the
character _ should be read as
34 *
"of the global array".
36 * notation stored in explanation
37 * --------------- -------------- --------------------------------------
38 * dtype_a(global) desca( dtype_ )the descriptor type. in this
case,
40 * ctxt_a(global) desca( ctxt_ ) the blacs context handle, indicating
41 * the blacs process grid a is distribu-
42 * ted over. the context itself is glo-
43 * bal, but the handle(the integer
45 * m_a(global) desca( m_ ) the number of rows in the global
47 * n_a(global) desca( n_ ) the number of columns in the global
49 * mb_a(global) desca( mb_ ) the blocking factor used to distribute
50 * the rows of the array.
51 * nb_a(global) desca( nb_ ) the blocking factor used to distribute
52 * the columns of the array.
53 * rsrc_a(global) desca( rsrc_ ) the process row over which the first
54 * row of the array a is distributed.
55 * csrc_a(global) desca( csrc_ ) the process column over which the
56 * first column of the array a is
58 * lld_a(local) desca( lld_ ) the leading dimension of the local
59 * array. lld_a >=
max(1,locr(m_a)).
61 * let k be the number of rows or columns of a distributed matrix,
62 * and assume that its process grid has dimension p x q.
63 * locr( k ) denotes the number of elements of k that a process
64 * would receive
if k were distributed over the p processes of its
66 * similarly, locc( k ) denotes the number of elements of k that a
67 * process would receive
if k were distributed over the q processes of
69 * the values of locr() and locc() may be determined via a
call to the
70 * scalapack tool function,
numroc:
71 * locr( m ) =
numroc( m, mb_a, myrow, rsrc_a, nprow ),
72 * locc( n ) =
numroc( n, nb_a, mycol, csrc_a, npcol ).
73 * an upper bound for these quantities may be computed by:
74 * locr( m ) <= ceil( ceil(m/mb_a)/nprow )*mb_a
75 * locc( n ) <= ceil( ceil(n/nb_a)/npcol )*nb_a
80 * n(global input) integer
81 * the number of rows and columns to be operated on i.e the
82 * order of the distributed submatrix sub( a ). n >= 0.
84 * a(local input)
COMPLEX pointer into the local memory
85 * to an array of
dimension ( LLD_A, LOCc(JA+N-1) ). this array
86 *
contains the local pieces of the distributed matrix the trace
89 * ia(global input) integer
90 * the row index in the global array a indicating the first
93 * ja(global input) integer
94 * the column index in the global array a indicating the
95 * first column of sub( a ).
97 * desca(global and local input)
INTEGER array of dimension dlen_.
98 * the array descriptor for the distributed matrix a.
100 * ====================================================================
103 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
104 $ lld_, mb_, m_, nb_, n_, rsrc_
105 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
106 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
107 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
109 parameter( zero = 0.0e+0 )
111 * .. local scalars ..
112 INTEGER icurcol, icurrow, ii, ioffa, j, jb, jj, jn,
113 $ lda, ll, mycol, myrow, npcol, nprow
116 * ..
External subroutines ..
117 EXTERNAL blacs_gridinfo, cgsum2d,
infog2l
119 * ..
External functions ..
123 * ..
Intrinsic functions ..
126 * .. executable statements ..
128 * get grid parameters
130 CALL blacs_gridinfo( desca( ctxt_ ), nprow, npcol, myrow, mycol )
138 CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, ii, jj,
141 jn =
min(
iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
144 ioffa = ii + ( jj - 1 ) * lda
146 * handle first diagonal block separately
148 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol )
THEN
149 DO 10 ll = ioffa, ioffa + (jb-1)*(lda+1), lda+1
150 trace = trace + a( ll )
153 IF( myrow.EQ.icurrow )
155 IF( mycol.EQ.icurcol )
156 $ ioffa = ioffa + jb*lda
157 icurrow = mod( icurrow+1, nprow )
158 icurcol = mod( icurcol+1, npcol )
160 * loop over the remaining block of columns
162 DO 30 j = jn+1, ja+n-1, desca( nb_ )
163 jb =
min( ja+n-j, desca( nb_ ) )
165 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol )
THEN
166 DO 20 ll = ioffa, ioffa + (jb-1)*(lda+1), lda+1
167 trace = trace + a( ll )
170 IF( myrow.EQ.icurrow )
172 IF( mycol.EQ.icurcol )
173 $ ioffa = ioffa + jb*lda
174 icurrow = mod( icurrow+1, nprow )
175 icurcol = mod( icurcol+1, npcol )
178 CALL cgsum2d( desca( ctxt_ ),
'All',
' ', 1, 1, trace, 1, -1,