ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlamve.f
Go to the documentation of this file.
1  SUBROUTINE pdlamve( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB,
2  $ DESCB, DWORK )
3 *
4 * Contribution from the Department of Computing Science and HPC2N,
5 * Umea University, Sweden
6 *
7 * -- ScaLAPACK auxiliary routine (version 2.0.2) --
8 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
9 * May 1 2012
10 *
11  IMPLICIT NONE
12 *
13 * .. Scalar Arguments ..
14  CHARACTER UPLO
15  INTEGER IA, IB, JA, JB, M, N
16 * ..
17 * .. Array Arguments ..
18  INTEGER DESCA( * ), DESCB( * )
19  DOUBLE PRECISION A( * ), B( * ), DWORK( * )
20 * ..
21 *
22 * Purpose
23 * =======
24 *
25 * PDLAMVE copies all or part of a distributed matrix A to another
26 * distributed matrix B. There is no alignment assumptions at all
27 * except that A and B are of the same size.
28 *
29 * Notes
30 * =====
31 *
32 * Each global data object is described by an associated description
33 * vector. This vector stores the information required to establish
34 * the mapping between an object element and its corresponding process
35 * and memory location.
36 *
37 * Let A be a generic term for any 2D block cyclicly distributed array.
38 * Such a global array has an associated description vector DESCA.
39 * In the following comments, the character _ should be read as
40 * "of the global array".
41 *
42 * NOTATION STORED IN EXPLANATION
43 * --------------- -------------- --------------------------------------
44 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
45 * DTYPE_A = 1.
46 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
47 * the BLACS process grid A is distribu-
48 * ted over. The context itself is glo-
49 * bal, but the handle (the integer
50 * value) may vary.
51 * M_A (global) DESCA( M_ ) The number of rows in the global
52 * array A.
53 * N_A (global) DESCA( N_ ) The number of columns in the global
54 * array A.
55 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
56 * the rows of the array.
57 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
58 * the columns of the array.
59 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60 * row of the array A is distributed.
61 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
62 * first column of the array A is
63 * distributed.
64 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
65 * array. LLD_A >= MAX(1,LOCr(M_A)).
66 *
67 * Let K be the number of rows or columns of a distributed matrix,
68 * and assume that its process grid has dimension p x q.
69 * LOCr( K ) denotes the number of elements of K that a process
70 * would receive if K were distributed over the p processes of its
71 * process column.
72 * Similarly, LOCc( K ) denotes the number of elements of K that a
73 * process would receive if K were distributed over the q processes of
74 * its process row.
75 * The values of LOCr() and LOCc() may be determined via a call to the
76 * ScaLAPACK tool function, NUMROC:
77 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
78 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
79 * An upper bound for these quantities may be computed by:
80 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
81 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
82 *
83 * Arguments
84 * =========
85 *
86 * UPLO (global input) CHARACTER
87 * Specifies the part of the distributed matrix sub( A ) to be
88 * copied:
89 * = 'U': Upper triangular part is copied; the strictly
90 * lower triangular part of sub( A ) is not referenced;
91 * = 'L': Lower triangular part is copied; the strictly
92 * upper triangular part of sub( A ) is not referenced;
93 * Otherwise: All of the matrix sub( A ) is copied.
94 *
95 * M (global input) INTEGER
96 * The number of rows to be operated on i.e the number of rows
97 * of the distributed submatrix sub( A ). M >= 0.
98 *
99 * N (global input) INTEGER
100 * The number of columns to be operated on i.e the number of
101 * columns of the distributed submatrix sub( A ). N >= 0.
102 *
103 * A (local input) DOUBLE PRECISION pointer into the local memory
104 * to an array of dimension (LLD_A, LOCc(JA+N-1) ). This array
105 * contains the local pieces of the distributed matrix sub( A )
106 * to be copied from.
107 *
108 * IA (global input) INTEGER
109 * The row index in the global array A indicating the first
110 * row of sub( A ).
111 *
112 * JA (global input) INTEGER
113 * The column index in the global array A indicating the
114 * first column of sub( A ).
115 *
116 * DESCA (global and local input) INTEGER array of dimension DLEN_.
117 * The array descriptor for the distributed matrix A.
118 *
119 * B (local output) DOUBLE PRECISION pointer into the local memory
120 * to an array of dimension (LLD_B, LOCc(JB+N-1) ). This array
121 * contains on exit the local pieces of the distributed matrix
122 * sub( B ).
123 *
124 * IB (global input) INTEGER
125 * The row index in the global array B indicating the first
126 * row of sub( B ).
127 *
128 * JB (global input) INTEGER
129 * The column index in the global array B indicating the
130 * first column of sub( B ).
131 *
132 * DESCB (global and local input) INTEGER array of dimension DLEN_.
133 * The array descriptor for the distributed matrix B.
134 *
135 * DWORK (local workspace) DOUBLE PRECISION array
136 * If UPLO = 'U' or UPLO = 'L' and number of processors > 1,
137 * the length of DWORK is at least as large as the length of B.
138 * Otherwise, DWORK is not referenced.
139 *
140 * =====================================================================
141 *
142 * .. Parameters ..
143  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
144  $ lld_, mb_, m_, nb_, n_, rsrc_
145  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
146  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
147  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
148 * ..
149 * .. Local Scalars ..
150  LOGICAL UPPER, LOWER, FULL
151  INTEGER ICTXT, NPROW, NPCOL, MYROW, MYCOL, MYPROC,
152  $ nprocs, arows, acols, k, sproc, srsrc, scsrc,
153  $ rproc, rrsrc, rcsrc, count, j, i, iia, jja,
154  $ iib, jjb, brsrc, bcsrc, rarows, racols,
155  $ index, idum, numrec, numsnd
156 * ..
157 * .. External Subroutines ..
158  EXTERNAL dlamov, infog2l
159 * ..
160 * .. External Functions ..
161  LOGICAL LSAME
162  INTEGER ICEIL, NUMROC, INDXL2G
163  EXTERNAL iceil, lsame, numroc, indxl2g
164 * ..
165 * .. Intrinsic Functions ..
166  INTRINSIC min, mod
167 * ..
168 * .. Executable Statements ..
169 *
170 * Find underlying mesh properties.
171 *
172  ictxt = desca( ctxt_ )
173  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
174 *
175 * Decode input parameters.
176 *
177  upper = lsame( uplo, 'U' )
178  IF( .NOT. upper ) lower = lsame( uplo, 'L' )
179  full = (.NOT. upper) .AND. (.NOT. lower)
180 *
181 * Assign indiviual numbers based on column major ordering.
182 *
183  nprocs = nprow*npcol
184 *
185 * Do redistribution operation.
186 *
187  IF( nprocs.EQ.1 ) THEN
188  CALL dlamov( uplo, m, n, a((ja-1)*desca(lld_)+ia),
189  $ desca(lld_), b((jb-1)*descb(lld_)+ib),
190  $ descb(lld_) )
191  ELSEIF( full ) THEN
192  CALL pdgemr2d( m, n, a, ia, ja, desca, b, ib, jb, descb,
193  $ ictxt )
194  ELSE
195  CALL pdgemr2d( m, n, a, ia, ja, desca, dwork, ib, jb, descb,
196  $ ictxt )
197  CALL pdlacpy( uplo, m, n, dwork, ib, jb, descb, b, ib, jb,
198  $ descb )
199  END IF
200 *
201  RETURN
202 *
203 * End of PDLAMVE
204 *
205  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pdlamve
subroutine pdlamve(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB, DWORK)
Definition: pdlamve.f:3
pdlacpy
subroutine pdlacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pdlacpy.f:3
min
#define min(A, B)
Definition: pcgemr.c:181