ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlacp2.f
Go to the documentation of this file.
1  SUBROUTINE pdlacp2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB,
2  $ DESCB )
3 *
4 * -- ScaLAPACK routine (version 2.0.2) --
5 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6 * May 1 2012
7 *
8 * .. Scalar Arguments ..
9  CHARACTER UPLO
10  INTEGER IA, IB, JA, JB, M, N
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * ), DESCB( * )
14  DOUBLE PRECISION A( * ), B( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PDLACP2 copies all or part of a distributed matrix A to another
21 * distributed matrix B. No communication is performed, PDLACP2
22 * performs a local copy sub( A ) := sub( B ), where sub( A ) denotes
23 * A(IA:IA+M-1,JA:JA+N-1) and sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1).
24 * PDLACP2 requires that only dimension of the matrix operands is
25 * distributed.
26 *
27 * Notes
28 * =====
29 *
30 * Each global data object is described by an associated description
31 * vector. This vector stores the information required to establish
32 * the mapping between an object element and its corresponding process
33 * and memory location.
34 *
35 * Let A be a generic term for any 2D block cyclicly distributed array.
36 * Such a global array has an associated description vector DESCA.
37 * In the following comments, the character _ should be read as
38 * "of the global array".
39 *
40 * NOTATION STORED IN EXPLANATION
41 * --------------- -------------- --------------------------------------
42 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43 * DTYPE_A = 1.
44 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
45 * the BLACS process grid A is distribu-
46 * ted over. The context itself is glo-
47 * bal, but the handle (the integer
48 * value) may vary.
49 * M_A (global) DESCA( M_ ) The number of rows in the global
50 * array A.
51 * N_A (global) DESCA( N_ ) The number of columns in the global
52 * array A.
53 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
54 * the rows of the array.
55 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
56 * the columns of the array.
57 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
58 * row of the array A is distributed.
59 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60 * first column of the array A is
61 * distributed.
62 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63 * array. LLD_A >= MAX(1,LOCr(M_A)).
64 *
65 * Let K be the number of rows or columns of a distributed matrix,
66 * and assume that its process grid has dimension p x q.
67 * LOCr( K ) denotes the number of elements of K that a process
68 * would receive if K were distributed over the p processes of its
69 * process column.
70 * Similarly, LOCc( K ) denotes the number of elements of K that a
71 * process would receive if K were distributed over the q processes of
72 * its process row.
73 * The values of LOCr() and LOCc() may be determined via a call to the
74 * ScaLAPACK tool function, NUMROC:
75 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77 * An upper bound for these quantities may be computed by:
78 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80 *
81 * Arguments
82 * =========
83 *
84 * UPLO (global input) CHARACTER
85 * Specifies the part of the distributed matrix sub( A ) to be
86 * copied:
87 * = 'U': Upper triangular part is copied; the strictly
88 * lower triangular part of sub( A ) is not referenced;
89 * = 'L': Lower triangular part is copied; the strictly
90 * upper triangular part of sub( A ) is not referenced;
91 * Otherwise: All of the matrix sub( A ) is copied.
92 *
93 * M (global input) INTEGER
94 * The number of rows to be operated on i.e the number of rows
95 * of the distributed submatrix sub( A ). M >= 0.
96 *
97 * N (global input) INTEGER
98 * The number of columns to be operated on i.e the number of
99 * columns of the distributed submatrix sub( A ). N >= 0.
100 *
101 * A (local input) DOUBLE PRECISION pointer into the local memory
102 * to an array of dimension (LLD_A, LOCc(JA+N-1) ). This array
103 * contains the local pieces of the distributed matrix sub( A )
104 * to be copied from.
105 *
106 * IA (global input) INTEGER
107 * The row index in the global array A indicating the first
108 * row of sub( A ).
109 *
110 * JA (global input) INTEGER
111 * The column index in the global array A indicating the
112 * first column of sub( A ).
113 *
114 * DESCA (global and local input) INTEGER array of dimension DLEN_.
115 * The array descriptor for the distributed matrix A.
116 *
117 * B (local output) DOUBLE PRECISION pointer into the local memory
118 * to an array of dimension (LLD_B, LOCc(JB+N-1) ). This array
119 * contains on exit the local pieces of the distributed matrix
120 * sub( B ) set as follows:
121 *
122 * if UPLO = 'U', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1),
123 * 1<=i<=j, 1<=j<=N;
124 * if UPLO = 'L', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1),
125 * j<=i<=M, 1<=j<=N;
126 * otherwise, B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1),
127 * 1<=i<=M, 1<=j<=N.
128 *
129 * IB (global input) INTEGER
130 * The row index in the global array B indicating the first
131 * row of sub( B ).
132 *
133 * JB (global input) INTEGER
134 * The column index in the global array B indicating the
135 * first column of sub( B ).
136 *
137 * DESCB (global and local input) INTEGER array of dimension DLEN_.
138 * The array descriptor for the distributed matrix B.
139 *
140 * =====================================================================
141 *
142 * .. Parameters ..
143  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
144  $ lld_, mb_, m_, nb_, n_, rsrc_
145  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
146  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
147  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
148 * ..
149 * .. Local Scalars ..
150  INTEGER HEIGHT, IACOL, IAROW, IBASE, IBCOL, IBROW,
151  $ icoffa, iia, iiaa, iib, iibb, iibega, iibegb,
152  $ iienda, iinxta, iinxtb, ileft, iright, iroffa,
153  $ itop, jja, jjaa, jjb, jjbb, jjbega, jjbegb,
154  $ jjenda, jjnxta, jjnxtb, lda, ldb, mba, mp,
155  $ mpaa, mycol, mydist, myrow, nba, npcol, nprow,
156  $ nq, nqaa, wide
157 * ..
158 * .. External Subroutines ..
159  EXTERNAL blacs_gridinfo, dlamov, infog2l
160 * ..
161 * .. External Functions ..
162  LOGICAL LSAME
163  INTEGER ICEIL, NUMROC
164  EXTERNAL iceil, lsame, numroc
165 * ..
166 * .. Intrinsic Functions ..
167  INTRINSIC max, min, mod
168 * ..
169 * .. Executable Statements ..
170 *
171  IF( m.EQ.0 .OR. n.EQ.0 )
172  $ RETURN
173 *
174 * Get grid parameters
175 *
176  CALL blacs_gridinfo( desca( ctxt_ ), nprow, npcol, myrow, mycol )
177 *
178  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
179  $ iarow, iacol )
180  CALL infog2l( ib, jb, descb, nprow, npcol, myrow, mycol, iib, jjb,
181  $ ibrow, ibcol )
182 *
183  mba = desca( mb_ )
184  nba = desca( nb_ )
185  lda = desca( lld_ )
186  iroffa = mod( ia-1, mba )
187  icoffa = mod( ja-1, nba )
188  ldb = descb( lld_ )
189 *
190  IF( n.LE.( nba-icoffa ) ) THEN
191 *
192 * It is assumed that the local columns JJA:JJA+N-1 of the matrix
193 * A are in the same process column (IACOL).
194 *
195 * N
196 * JJA JJA+N-1
197 * / --------------------- \
198 * IROFFA| | | |
199 * \ |...................| | ( IAROW )
200 * IIA |x | | MBA = DESCA( MB_ )
201 * | x | |
202 * |--x----------------| /
203 * | x |
204 * | x | ITOP
205 * | x | |
206 * | x | /-------\
207 * |-------x-----------| |-------x-----------|
208 * | x | | x |
209 * | x | | x |
210 * | x | | x |
211 * | x | | x |
212 * |------------x------| |------------x------|
213 * | x | \____________/
214 * | x | |
215 * | x | IBASE
216 * | x |
217 * |-----------------x-| Local picture
218 * | x|
219 * | |
220 * | |
221 * | |
222 * |-------------------|
223 * | |
224 * . .
225 * . .
226 * . (IACOL) .
227 *
228  IF( mycol.EQ.iacol ) THEN
229 *
230  mp = numroc( m+iroffa, mba, myrow, iarow, nprow )
231  IF( mp.LE.0 )
232  $ RETURN
233  IF( myrow.EQ.iarow )
234  $ mp = mp - iroffa
235  mydist = mod( myrow-iarow+nprow, nprow )
236  itop = mydist * mba - iroffa
237 *
238  IF( lsame( uplo, 'U' ) ) THEN
239 *
240  itop = max( 0, itop )
241  iibega = iia
242  iienda = iia + mp - 1
243  iinxta = min( iceil( iibega, mba ) * mba, iienda )
244  iibegb = iib
245  iinxtb = iibegb + iinxta - iibega
246 *
247  10 CONTINUE
248  IF( ( n-itop ).GT.0 ) THEN
249  CALL dlamov( uplo, iinxta-iibega+1, n-itop,
250  $ a( iibega+(jja+itop-1)*lda ), lda,
251  $ b( iibegb+(jjb+itop-1)*ldb ), ldb )
252  mydist = mydist + nprow
253  itop = mydist * mba - iroffa
254  iibega = iinxta + 1
255  iinxta = min( iinxta+mba, iienda )
256  iibegb = iinxtb + 1
257  iinxtb = iibegb + iinxta - iibega
258  GO TO 10
259  END IF
260 *
261  ELSE IF( lsame( uplo, 'L' ) ) THEN
262 *
263  mpaa = mp
264  iiaa = iia
265  jjaa = jja
266  iibb = iib
267  jjbb = jjb
268  ibase = min( itop + mba, n )
269  itop = min( max( 0, itop ), n )
270 *
271  20 CONTINUE
272  IF( jjaa.LE.( jja+n-1 ) ) THEN
273  height = ibase - itop
274  CALL dlamov( 'All', mpaa, itop-jjaa+jja,
275  $ a( iiaa+(jjaa-1)*lda ), lda,
276  $ b( iibb+(jjbb-1)*ldb ), ldb )
277  CALL dlamov( uplo, mpaa, height,
278  $ a( iiaa+(jja+itop-1)*lda ), lda,
279  $ b( iibb+(jjb+itop-1)*ldb ), ldb )
280  mpaa = max( 0, mpaa - height )
281  iiaa = iiaa + height
282  jjaa = jja + ibase
283  iibb = iibb + height
284  jjbb = jjb + ibase
285  mydist = mydist + nprow
286  itop = mydist * mba - iroffa
287  ibase = min( itop + mba, n )
288  itop = min( itop, n )
289  GO TO 20
290  END IF
291 *
292  ELSE
293 *
294  CALL dlamov( 'All', mp, n, a( iia+(jja-1)*lda ),
295  $ lda, b( iib+(jjb-1)*ldb ), ldb )
296 *
297  END IF
298 *
299  END IF
300 *
301  ELSE IF( m.LE.( mba-iroffa ) ) THEN
302 *
303 * It is assumed that the local rows IIA:IIA+M-1 of the matrix A
304 * are in the same process row (IAROW).
305 *
306 * ICOFFA
307 * / \JJA
308 * IIA ------------------ .... --------
309 * | .x | | | / | | \
310 * | . x | | | ILEFT| | | |
311 * | . x | | | | | |
312 * | . x | | \ x | |
313 * | . |x | | |x | | IRIGHT
314 * | . | x | | | x | |
315 * (IAROW) | . | x | | | x | |
316 * | . | x| | | x| |
317 * | . | x | | x /
318 * | . | |x | | |
319 * | . | | x | | |
320 * | . | | x | | |
321 * | . | | x| | |
322 * IIA+M-1 ------------------ .... -------
323 * NB_A
324 * (IACOL) Local picture
325 *
326  IF( myrow.EQ.iarow ) THEN
327 *
328  nq = numroc( n+icoffa, nba, mycol, iacol, npcol )
329  IF( nq.LE.0 )
330  $ RETURN
331  IF( mycol.EQ.iacol )
332  $ nq = nq - icoffa
333  mydist = mod( mycol-iacol+npcol, npcol )
334  ileft = mydist * nba - icoffa
335 *
336  IF( lsame( uplo, 'L' ) ) THEN
337 *
338  ileft = max( 0, ileft )
339  jjbega = jja
340  jjenda = jja + nq - 1
341  jjnxta = min( iceil( jjbega, nba ) * nba, jjenda )
342  jjbegb = jjb
343  jjnxtb = jjbegb + jjnxta - jjbega
344 *
345  30 CONTINUE
346  IF( ( m-ileft ).GT.0 ) THEN
347  CALL dlamov( uplo, m-ileft, jjnxta-jjbega+1,
348  $ a( iia+ileft+(jjbega-1)*lda ), lda,
349  $ b( iib+ileft+(jjbegb-1)*ldb ), ldb )
350  mydist = mydist + npcol
351  ileft = mydist * nba - icoffa
352  jjbega = jjnxta +1
353  jjnxta = min( jjnxta+nba, jjenda )
354  jjbegb = jjnxtb +1
355  jjnxtb = jjbegb + jjnxta - jjbega
356  GO TO 30
357  END IF
358 *
359  ELSE IF( lsame( uplo, 'U' ) ) THEN
360 *
361  nqaa = nq
362  iiaa = iia
363  jjaa = jja
364  iibb = iib
365  jjbb = jjb
366  iright = min( ileft + nba, m )
367  ileft = min( max( 0, ileft ), m )
368 *
369  40 CONTINUE
370  IF( iiaa.LE.( iia+m-1 ) ) THEN
371  wide = iright - ileft
372  CALL dlamov( 'All', ileft-iiaa+iia, nqaa,
373  $ a( iiaa+(jjaa-1)*lda ), lda,
374  $ b( iibb+(jjbb-1)*ldb ), ldb )
375  CALL dlamov( uplo, wide, nqaa,
376  $ a( iia+ileft+(jjaa-1)*lda ), lda,
377  $ b( iib+ileft+(jjbb-1)*ldb ), ldb )
378  nqaa = max( 0, nqaa - wide )
379  iiaa = iia + iright
380  jjaa = jjaa + wide
381  iibb = iib + iright
382  jjbb = jjbb + wide
383  mydist = mydist + npcol
384  ileft = mydist * nba - icoffa
385  iright = min( ileft + nba, m )
386  ileft = min( ileft, m )
387  GO TO 40
388  END IF
389 *
390  ELSE
391 *
392  CALL dlamov( 'All', m, nq, a( iia+(jja-1)*lda ),
393  $ lda, b( iib+(jjb-1)*ldb ), ldb )
394 *
395  END IF
396 *
397  END IF
398 *
399  END IF
400 *
401  RETURN
402 *
403 * End of PDLACP2
404 *
405  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pdlacp2
subroutine pdlacp2(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pdlacp2.f:3
min
#define min(A, B)
Definition: pcgemr.c:181