ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlacpy.f
Go to the documentation of this file.
1  SUBROUTINE pdlacpy( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB,
2  $ DESCB )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER UPLO
11  INTEGER IA, IB, JA, JB, M, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * ), DESCB( * )
15  DOUBLE PRECISION A( * ), B( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PDLACPY copies all or part of a distributed matrix A to another
22 * distributed matrix B. No communication is performed, PDLACPY
23 * performs a local copy sub( A ) := sub( B ), where sub( A ) denotes
24 * A(IA:IA+M-1,JA:JA+N-1) and sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1).
25 *
26 * Notes
27 * =====
28 *
29 * Each global data object is described by an associated description
30 * vector. This vector stores the information required to establish
31 * the mapping between an object element and its corresponding process
32 * and memory location.
33 *
34 * Let A be a generic term for any 2D block cyclicly distributed array.
35 * Such a global array has an associated description vector DESCA.
36 * In the following comments, the character _ should be read as
37 * "of the global array".
38 *
39 * NOTATION STORED IN EXPLANATION
40 * --------------- -------------- --------------------------------------
41 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
42 * DTYPE_A = 1.
43 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
44 * the BLACS process grid A is distribu-
45 * ted over. The context itself is glo-
46 * bal, but the handle (the integer
47 * value) may vary.
48 * M_A (global) DESCA( M_ ) The number of rows in the global
49 * array A.
50 * N_A (global) DESCA( N_ ) The number of columns in the global
51 * array A.
52 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
53 * the rows of the array.
54 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
55 * the columns of the array.
56 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
57 * row of the array A is distributed.
58 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
59 * first column of the array A is
60 * distributed.
61 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
62 * array. LLD_A >= MAX(1,LOCr(M_A)).
63 *
64 * Let K be the number of rows or columns of a distributed matrix,
65 * and assume that its process grid has dimension p x q.
66 * LOCr( K ) denotes the number of elements of K that a process
67 * would receive if K were distributed over the p processes of its
68 * process column.
69 * Similarly, LOCc( K ) denotes the number of elements of K that a
70 * process would receive if K were distributed over the q processes of
71 * its process row.
72 * The values of LOCr() and LOCc() may be determined via a call to the
73 * ScaLAPACK tool function, NUMROC:
74 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
75 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
76 * An upper bound for these quantities may be computed by:
77 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
78 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
79 *
80 * Arguments
81 * =========
82 *
83 * UPLO (global input) CHARACTER
84 * Specifies the part of the distributed matrix sub( A ) to be
85 * copied:
86 * = 'U': Upper triangular part is copied; the strictly
87 * lower triangular part of sub( A ) is not referenced;
88 * = 'L': Lower triangular part is copied; the strictly
89 * upper triangular part of sub( A ) is not referenced;
90 * Otherwise: All of the matrix sub( A ) is copied.
91 *
92 * M (global input) INTEGER
93 * The number of rows to be operated on i.e the number of rows
94 * of the distributed submatrix sub( A ). M >= 0.
95 *
96 * N (global input) INTEGER
97 * The number of columns to be operated on i.e the number of
98 * columns of the distributed submatrix sub( A ). N >= 0.
99 *
100 * A (local input) DOUBLE PRECISION pointer into the local memory
101 * to an array of dimension (LLD_A, LOCc(JA+N-1) ). This array
102 * contains the local pieces of the distributed matrix sub( A )
103 * to be copied from.
104 *
105 * IA (global input) INTEGER
106 * The row index in the global array A indicating the first
107 * row of sub( A ).
108 *
109 * JA (global input) INTEGER
110 * The column index in the global array A indicating the
111 * first column of sub( A ).
112 *
113 * DESCA (global and local input) INTEGER array of dimension DLEN_.
114 * The array descriptor for the distributed matrix A.
115 *
116 * B (local output) DOUBLE PRECISION pointer into the local memory
117 * to an array of dimension (LLD_B, LOCc(JB+N-1) ). This array
118 * contains on exit the local pieces of the distributed matrix
119 * sub( B ) set as follows:
120 *
121 * if UPLO = 'U', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1),
122 * 1<=i<=j, 1<=j<=N;
123 * if UPLO = 'L', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1),
124 * j<=i<=M, 1<=j<=N;
125 * otherwise, B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1),
126 * 1<=i<=M, 1<=j<=N.
127 *
128 * IB (global input) INTEGER
129 * The row index in the global array B indicating the first
130 * row of sub( B ).
131 *
132 * JB (global input) INTEGER
133 * The column index in the global array B indicating the
134 * first column of sub( B ).
135 *
136 * DESCB (global and local input) INTEGER array of dimension DLEN_.
137 * The array descriptor for the distributed matrix B.
138 *
139 * =====================================================================
140 *
141 * .. Parameters ..
142  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
143  $ lld_, mb_, m_, nb_, n_, rsrc_
144  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
145  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
146  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
147 * ..
148 * .. Local Scalars ..
149  INTEGER I, IAA, IBB, IBLK, IN, ITMP, J, JAA, JBB,
150  $ jblk, jn, jtmp
151 * ..
152 * .. External Subroutines ..
153  EXTERNAL pdlacp2
154 * ..
155 * .. External Functions ..
156  LOGICAL LSAME
157  INTEGER ICEIL
158  EXTERNAL iceil, lsame
159 * ..
160 * .. Intrinsic Functions ..
161  INTRINSIC min, mod
162 * ..
163 * .. Executable Statements ..
164 *
165  IF( m.EQ.0 .OR. n.EQ.0 )
166  $ RETURN
167 *
168  in = min( iceil( ia, desca( mb_ ) ) * desca( mb_ ), ia+m-1 )
169  jn = min( iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
170 *
171  IF( m.LE.( desca( mb_ ) - mod( ia-1, desca( mb_ ) ) ) .OR.
172  $ n.LE.( desca( nb_ ) - mod( ja-1, desca( nb_ ) ) ) ) THEN
173  CALL pdlacp2( uplo, m, n, a, ia, ja, desca,
174  $ b, ib, jb, descb )
175  ELSE
176 *
177  IF( lsame( uplo, 'U' ) ) THEN
178  CALL pdlacp2( uplo, in-ia+1, n, a, ia, ja, desca,
179  $ b, ib, jb, descb )
180  DO 10 i = in+1, ia+m-1, desca( mb_ )
181  itmp = i-ia
182  iblk = min( desca( mb_ ), m-itmp )
183  ibb = ib + itmp
184  jbb = jb + itmp
185  jaa = ja + itmp
186  CALL pdlacp2( uplo, iblk, n-itmp, a, i, jaa, desca,
187  $ b, ibb, jbb, descb )
188  10 CONTINUE
189  ELSE IF( lsame( uplo, 'L' ) ) THEN
190  CALL pdlacp2( uplo, m, jn-ja+1, a, ia, ja, desca,
191  $ b, ib, jb, descb )
192  DO 20 j = jn+1, ja+n-1, desca( nb_ )
193  jtmp = j-ja
194  jblk = min( desca( nb_ ), n-jtmp )
195  ibb = ib + jtmp
196  jbb = jb + jtmp
197  iaa = ia + jtmp
198  CALL pdlacp2( uplo, m-jtmp, jblk, a, iaa, j, desca,
199  $ b, ibb, jbb, descb )
200  20 CONTINUE
201  ELSE
202  IF( m.LE.n ) THEN
203  CALL pdlacp2( uplo, in-ia+1, n, a, ia, ja, desca,
204  $ b, ib, jb, descb )
205  DO 30 i = in+1, ia+m-1, desca( mb_ )
206  itmp = i-ia
207  iblk = min( desca( mb_ ), m-itmp )
208  ibb = ib+itmp
209  CALL pdlacp2( uplo, iblk, n, a, i, ja, desca,
210  $ b, ibb, jb, descb )
211  30 CONTINUE
212  ELSE
213  CALL pdlacp2( uplo, m, jn-ja+1, a, ia, ja, desca,
214  $ b, ib, jb, descb )
215  DO 40 j = jn+1, ja+n-1, desca( nb_ )
216  jtmp = j-ja
217  jblk = min( desca( nb_ ), n-jtmp )
218  jbb = jb+jtmp
219  CALL pdlacp2( uplo, m, jblk, a, ia, j, desca,
220  $ b, ib, jbb, descb )
221  40 CONTINUE
222  END IF
223  END IF
224 *
225  END IF
226 *
227  RETURN
228 *
229 * End of PDLACPY
230 *
231  END
pdlacp2
subroutine pdlacp2(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pdlacp2.f:3
pdlacpy
subroutine pdlacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pdlacpy.f:3
min
#define min(A, B)
Definition: pcgemr.c:181