ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdpotrrv.f
Go to the documentation of this file.
1  SUBROUTINE pdpotrrv( UPLO, N, A, IA, JA, DESCA, WORK )
2 *
3 * -- ScaLAPACK routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 28, 2001
7 *
8 * .. Scalar Arguments ..
9  CHARACTER UPLO
10  INTEGER IA, JA, N
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * )
14  DOUBLE PRECISION A( * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PDPOTRRV recomputes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from L or U
21 * computed by PDPOTRF. The routine performs the Cholesky factorization
22 * in reverse.
23 *
24 * Notes
25 * =====
26 *
27 * Each global data object is described by an associated description
28 * vector. This vector stores the information required to establish
29 * the mapping between an object element and its corresponding process
30 * and memory location.
31 *
32 * Let A be a generic term for any 2D block cyclicly distributed array.
33 * Such a global array has an associated description vector DESCA.
34 * In the following comments, the character _ should be read as
35 * "of the global array".
36 *
37 * NOTATION STORED IN EXPLANATION
38 * --------------- -------------- --------------------------------------
39 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
40 * DTYPE_A = 1.
41 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42 * the BLACS process grid A is distribu-
43 * ted over. The context itself is glo-
44 * bal, but the handle (the integer
45 * value) may vary.
46 * M_A (global) DESCA( M_ ) The number of rows in the global
47 * array A.
48 * N_A (global) DESCA( N_ ) The number of columns in the global
49 * array A.
50 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
51 * the rows of the array.
52 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
53 * the columns of the array.
54 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55 * row of the array A is distributed.
56 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57 * first column of the array A is
58 * distributed.
59 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
60 * array. LLD_A >= MAX(1,LOCr(M_A)).
61 *
62 * Let K be the number of rows or columns of a distributed matrix,
63 * and assume that its process grid has dimension p x q.
64 * LOCr( K ) denotes the number of elements of K that a process
65 * would receive if K were distributed over the p processes of its
66 * process column.
67 * Similarly, LOCc( K ) denotes the number of elements of K that a
68 * process would receive if K were distributed over the q processes of
69 * its process row.
70 * The values of LOCr() and LOCc() may be determined via a call to the
71 * ScaLAPACK tool function, NUMROC:
72 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74 * An upper bound for these quantities may be computed by:
75 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77 *
78 * Arguments
79 * =========
80 *
81 * UPLO (global input) CHARACTER
82 * Specifies whether the upper or lower triangular part of the
83 * symmetric distributed matrix sub( A ) is stored:
84 * stored:
85 * = 'U': Upper triangular
86 * = 'L': Lower triangular
87 *
88 * N (global input) INTEGER
89 * The number of rows and columns to be operated on, i.e. the
90 * order of the distributed submatrix sub( A ). N >= 0.
91 *
92 * A (local input/local output) DOUBLE PRECISION pointer into the
93 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
94 * On entry, the local pieces of the factors L or U of the
95 * distributed matrix sub( A ) from the Cholesky factorization.
96 * On exit, the original distributed matrix sub( A ) is
97 * restored.
98 *
99 * IA (global input) INTEGER
100 * The row index in the global array A indicating the first
101 * row of sub( A ).
102 *
103 * JA (global input) INTEGER
104 * The column index in the global array A indicating the
105 * first column of sub( A ).
106 *
107 * DESCA (global and local input) INTEGER array of dimension DLEN_.
108 * The array descriptor for the distributed matrix A.
109 *
110 * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
111 * LWORK >= MB_A*NB_A.
112 *
113 * =====================================================================
114 *
115 * .. Parameters ..
116  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
117  $ LLD_, MB_, M_, NB_, N_, RSRC_
118  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
119  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
120  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
121  DOUBLE PRECISION ONE, ZERO
122  parameter( one = 1.0d+0, zero = 0.0d+0 )
123 * ..
124 * .. Local Scalars ..
125  LOGICAL UPPER
126  CHARACTER COLBTOP, ROWBTOP
127  INTEGER IACOL, IAROW, ICTXT, IL, J, JB, JL, JN, MYCOL,
128  $ MYROW, NPCOL, NPROW
129 * .. Local Arrays ..
130  INTEGER DESCW( DLEN_ )
131 * ..
132 * .. External Subroutines ..
133  EXTERNAL blacs_gridinfo, descset, pdlacpy, pdlaset,
134  $ pdsyrk, pdtrmm, pb_topget, pb_topset
135 * ..
136 * .. External Functions ..
137  LOGICAL LSAME
138  INTEGER ICEIL, INDXG2P
139  EXTERNAL iceil, indxg2p, lsame
140 * ..
141 * .. Intrinsic Functions ..
142  INTRINSIC min, mod
143 * ..
144 * .. Executable Statements ..
145 *
146 * Get grid parameters
147 *
148  ictxt = desca( ctxt_ )
149  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
150 *
151  CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
152  CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
153 *
154  upper = lsame( uplo, 'U' )
155  jn = min( iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
156  jl = max( ( ( ja+n-2 ) / desca( nb_ ) ) * desca( nb_ ) + 1, ja )
157  il = max( ( ( ia+n-2 ) / desca( mb_ ) ) * desca( mb_ ) + 1, ia )
158  iarow = indxg2p( il, desca( mb_ ), myrow, desca( rsrc_ ), nprow )
159  iacol = indxg2p( jl, desca( nb_ ), mycol, desca( csrc_ ), npcol )
160 *
161 * Define array descriptor for working array WORK
162 *
163  CALL descset( descw, desca( mb_ ), desca( nb_ ), desca( mb_ ),
164  $ desca( nb_ ), iarow, iacol, ictxt, desca( mb_ ) )
165 *
166  IF ( upper ) THEN
167 *
168 * Compute A from the Cholesky factor U : A = U'*U.
169 *
170  CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', ' ' )
171  CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', 'S-ring' )
172 *
173  DO 10 j = jl, jn+1, -desca( nb_ )
174 *
175  jb = min( ja+n-j, desca( nb_ ) )
176 *
177 * Update the trailing matrix, A = A + U'*U
178 *
179  CALL pdsyrk( 'Upper', 'Transpose', ja+n-j-jb, jb, one, a, il,
180  $ j+jb, desca, one, a, il+jb, j+jb, desca )
181 *
182 * Copy current diagonal block of A into workspace
183 *
184  CALL pdlacpy( 'All', jb, jb, a, il, j, desca, work, 1, 1,
185  $ descw )
186 *
187 * Zero strict lower triangular part of diagonal block, to make
188 * it U1.
189 *
190  CALL pdlaset( 'Lower', jb-1, jb, zero, zero, a, il+1, j,
191  $ desca )
192 *
193 * Update the row panel U with the triangular matrix
194 *
195  CALL pdtrmm( 'Left', 'Upper', 'Transpose', 'Non-Unit', jb,
196  $ n-j+ja, one, work, 1, 1, descw, a, il, j,
197  $ desca )
198 *
199 * Restore the strict lower triangular part of diagonal block.
200 *
201  CALL pdlacpy( 'Lower', jb-1, jb, work, 2, 1, descw, a,
202  $ il+1, j, desca )
203 *
204  il = il - desca( mb_ )
205  descw( rsrc_ ) = mod( descw( rsrc_ ) + nprow - 1, nprow )
206  descw( csrc_ ) = mod( descw( csrc_ ) + npcol - 1, npcol )
207 *
208  10 CONTINUE
209 *
210 * Handle first block separately
211 *
212  jb = min( jn-ja+1, desca( nb_ ) )
213 *
214 * Update the trailing matrix, A = A + U'*U
215 *
216  CALL pdsyrk( 'Upper', 'Transpose', n-jb, jb, one, a, ia, ja+jb,
217  $ desca, one, a, ia+jb, ja+jb, desca )
218 *
219 * Copy current diagonal block of A into workspace
220 *
221  CALL pdlacpy( 'All', jb, jb, a, ia, ja, desca, work, 1, 1,
222  $ descw )
223 *
224 * Zero strict lower triangular part of diagonal block, to make
225 * it U1.
226 *
227  CALL pdlaset( 'Lower', jb-1, jb, zero, zero, a, ia+1, ja,
228  $ desca )
229 *
230 * Update the row panel U with the triangular matrix
231 *
232  CALL pdtrmm( 'Left', 'Upper', 'Transpose', 'Non-Unit', jb,
233  $ n, one, work, 1, 1, descw, a, ia, ja, desca )
234 *
235 * Restore the strict lower triangular part of diagonal block.
236 *
237  CALL pdlacpy( 'Lower', jb-1, jb, work, 2, 1, descw, a, ia+1,
238  $ ja, desca )
239 *
240  ELSE
241 *
242 * Compute A from the Cholesky factor L : A = L*L'.
243 *
244  CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', 'S-ring' )
245  CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', ' ' )
246 *
247  DO 20 j = jl, jn+1, -desca( nb_ )
248 *
249  jb = min( ja+n-j, desca( nb_ ) )
250 *
251 * Update the trailing matrix, A = A + L*L'
252 *
253  CALL pdsyrk( 'Lower', 'No transpose', ia+n-il-jb, jb, one, a,
254  $ il+jb, j, desca, one, a, il+jb, j+jb, desca )
255 *
256 * Copy current diagonal block of A into workspace
257 *
258  CALL pdlacpy( 'All', jb, jb, a, il, j, desca, work, 1, 1,
259  $ descw )
260 *
261 * Zero strict upper triangular part of diagonal block, to make
262 * it L1.
263 *
264  CALL pdlaset( 'Upper', jb, jb-1, zero, zero, a, il, j+1,
265  $ desca )
266 *
267 * Update the column panel L with the triangular matrix
268 *
269  CALL pdtrmm( 'Right', 'Lower', 'Transpose', 'Non-Unit',
270  $ ia+n-il, jb, one, work, 1, 1, descw, a, il,
271  $ j, desca )
272 *
273 * Restore the strict upper triangular part of diagonal block.
274 *
275  CALL pdlacpy( 'Upper', jb, jb-1, work, 1, 2, descw, a,
276  $ il, j+1, desca )
277 *
278  il = il - desca( mb_ )
279  descw( rsrc_ ) = mod( descw( rsrc_ ) + nprow - 1, nprow )
280  descw( csrc_ ) = mod( descw( csrc_ ) + npcol - 1, npcol )
281 *
282  20 CONTINUE
283 *
284 * Handle first block separately
285 *
286  jb = min( jn-ja+1, desca( nb_ ) )
287 *
288 * Update the trailing matrix, A = A + L*L'
289 *
290  CALL pdsyrk( 'Lower', 'No transpose', n-jb, jb, one, a,
291  $ ia+jb, ja, desca, one, a, ia+jb, ja+jb, desca )
292 *
293 * Copy current diagonal block of A into workspace
294 *
295  CALL pdlacpy( 'All', jb, jb, a, ia, ja, desca, work, 1, 1,
296  $ descw )
297 *
298 * Zero strict upper triangular part of diagonal block, to make
299 * it L1.
300 *
301  CALL pdlaset( 'Upper', jb, jb-1, zero, zero, a, ia, ja+1,
302  $ desca )
303 *
304 * Update the column panel L with the triangular matrix
305 *
306  CALL pdtrmm( 'Right', 'Lower', 'Transpose', 'Non-Unit', n, jb,
307  $ one, work, 1, 1, descw, a, ia, ja, desca )
308 *
309 * Restore the strict upper triangular part of diagonal block.
310 *
311  CALL pdlacpy( 'Upper', jb, jb-1, work, 1, 2, descw, a, ia,
312  $ ja+1, desca )
313 *
314  END IF
315 *
316  CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', rowbtop )
317  CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', colbtop )
318 *
319  RETURN
320 *
321 * End of PDPOTRRV
322 *
323  END
max
#define max(A, B)
Definition: pcgemr.c:180
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
pdlaset
subroutine pdlaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: pdblastst.f:6862
pdpotrrv
subroutine pdpotrrv(UPLO, N, A, IA, JA, DESCA, WORK)
Definition: pdpotrrv.f:2
pdlacpy
subroutine pdlacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pdlacpy.f:3
min
#define min(A, B)
Definition: pcgemr.c:181