1 COMPLEX FUNCTION pclatra( N, A, IA, JA, DESCA )
3* -- scalapack auxiliary routine(version 1.7) --
4* university of tennessee, knoxville, oak ridge national laboratory,
5* and university of california, berkeley.
8* .. scalar arguments ..
11* .. array arguments ..
19*
pclatra computes the trace of an n-by-n distributed matrix sub( a )
20* denoting a( ia:ia+n-1, ja:ja+n-1 ). the result is left on every
26* each global
data object is described by an
associated description
27* vector. this vector stores the information required to establish
28* the mapping between an object element and its corresponding process
31* let a be a generic term for any 2d block cyclicly distributed array.
32* such a global array has an
associated description vector desca.
33* in the following comments, the
character _ should be read as
34*
"of the global array".
36* notation stored in explanation
37* --------------- -------------- --------------------------------------
38* dtype_a(global) desca( dtype_ )the descriptor type. in this
case,
40* ctxt_a(global) desca( ctxt_ ) the blacs context handle, indicating
41* the blacs process grid a is distribu-
42* ted over. the context itself is glo-
43* bal, but the handle(the integer
45* m_a(global) desca( m_ ) the number of rows in the global
47* n_a(global) desca( n_ ) the number of columns in the global
49* mb_a(global) desca( mb_ ) the blocking factor used to distribute
50* the rows of the array.
51* nb_a(global) desca( nb_ ) the blocking factor used to distribute
52* the columns of the array.
53* rsrc_a(global) desca( rsrc_ ) the process row over which the first
54* row of the array a is distributed.
55* csrc_a(global) desca( csrc_ ) the process column over which the
56* first column of the array a is
58* lld_a(local) desca( lld_ ) the leading dimension of the local
59* array. lld_a >=
max(1,locr(m_a)).
61* let k be the number of rows or columns of a distributed matrix,
62* and assume that its process grid has dimension p x q.
63* locr( k ) denotes the number of elements of k that a process
64* would receive
if k were distributed over the p processes of its
66* similarly, locc( k ) denotes the number of elements of k that a
67* process would receive
if k were distributed over the q processes of
69* the values of locr() and locc() may be determined via a
call to the
70* scalapack tool function,
numroc:
71* locr( m ) =
numroc( m, mb_a, myrow, rsrc_a, nprow ),
72* locc( n ) =
numroc( n, nb_a, mycol, csrc_a, npcol ).
73* an upper bound for these quantities may be computed by:
74* locr( m ) <= ceil( ceil(m/mb_a)/nprow )*mb_a
75* locc( n ) <= ceil( ceil(n/nb_a)/npcol )*nb_a
80* n(global input) integer
81* the number of rows and columns to be operated on i.e the
82* order of the distributed submatrix sub( a ). n >= 0.
84* a(local input)
COMPLEX pointer into the local memory
85* to an array of
dimension ( LLD_A, LOCc(JA+N-1) ). this array
86*
contains the local pieces of the distributed matrix the trace
89* ia(global input) integer
90* the row index in the global array a indicating the first
93* ja(global input) integer
94* the column index in the global array a indicating the
95* first column of sub( a ).
97* desca(global and local input)
INTEGER array of dimension dlen_.
98* the array descriptor for the distributed matrix a.
100* ====================================================================
103 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
104 $ lld_, mb_, m_, nb_, n_, rsrc_
105 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
106 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
107 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
109 parameter( zero = 0.0e+0 )
112 INTEGER icurcol, icurrow, ii, ioffa, j, jb, jj, jn,
113 $ lda, ll, mycol, myrow, npcol, nprow
116* ..
External subroutines ..
117 EXTERNAL blacs_gridinfo, cgsum2d,
infog2l
119* ..
External functions ..
123* ..
Intrinsic functions ..
126* .. executable statements ..
130 CALL blacs_gridinfo( desca( ctxt_ ), nprow, npcol, myrow, mycol )
138 CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, ii, jj,
141 jn =
min(
iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
144 ioffa = ii + ( jj - 1 ) * lda
146* handle first diagonal block separately
148 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol )
THEN
149 DO 10 ll = ioffa, ioffa + (jb-1)*(lda+1), lda+1
150 trace = trace + a( ll )
153 IF( myrow.EQ.icurrow )
155 IF( mycol.EQ.icurcol )
156 $ ioffa = ioffa + jb*lda
157 icurrow = mod( icurrow+1, nprow )
158 icurcol = mod( icurcol+1, npcol )
160* loop over the remaining block of columns
162 DO 30 j = jn+1, ja+n-1, desca( nb_ )
163 jb =
min( ja+n-j, desca( nb_ ) )
165 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol )
THEN
166 DO 20 ll = ioffa, ioffa + (jb-1)*(lda+1), lda+1
167 trace = trace + a( ll )
170 IF( myrow.EQ.icurrow )
172 IF( mycol.EQ.icurcol )
173 $ ioffa = ioffa + jb*lda
174 icurrow = mod( icurrow+1, nprow )
175 icurcol = mod( icurcol+1, npcol )
178 CALL cgsum2d( desca( ctxt_ ),
'All',
' ', 1, 1, trace, 1, -1,