SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdpotrrv.f
Go to the documentation of this file.
1 SUBROUTINE pdpotrrv( UPLO, N, A, IA, JA, DESCA, WORK )
2*
3* -- ScaLAPACK routine (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 28, 2001
7*
8* .. Scalar Arguments ..
9 CHARACTER UPLO
10 INTEGER IA, JA, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 DOUBLE PRECISION A( * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PDPOTRRV recomputes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from L or U
21* computed by PDPOTRF. The routine performs the Cholesky factorization
22* in reverse.
23*
24* Notes
25* =====
26*
27* Each global data object is described by an associated description
28* vector. This vector stores the information required to establish
29* the mapping between an object element and its corresponding process
30* and memory location.
31*
32* Let A be a generic term for any 2D block cyclicly distributed array.
33* Such a global array has an associated description vector DESCA.
34* In the following comments, the character _ should be read as
35* "of the global array".
36*
37* NOTATION STORED IN EXPLANATION
38* --------------- -------------- --------------------------------------
39* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
40* DTYPE_A = 1.
41* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42* the BLACS process grid A is distribu-
43* ted over. The context itself is glo-
44* bal, but the handle (the integer
45* value) may vary.
46* M_A (global) DESCA( M_ ) The number of rows in the global
47* array A.
48* N_A (global) DESCA( N_ ) The number of columns in the global
49* array A.
50* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
51* the rows of the array.
52* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
53* the columns of the array.
54* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55* row of the array A is distributed.
56* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57* first column of the array A is
58* distributed.
59* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
60* array. LLD_A >= MAX(1,LOCr(M_A)).
61*
62* Let K be the number of rows or columns of a distributed matrix,
63* and assume that its process grid has dimension p x q.
64* LOCr( K ) denotes the number of elements of K that a process
65* would receive if K were distributed over the p processes of its
66* process column.
67* Similarly, LOCc( K ) denotes the number of elements of K that a
68* process would receive if K were distributed over the q processes of
69* its process row.
70* The values of LOCr() and LOCc() may be determined via a call to the
71* ScaLAPACK tool function, NUMROC:
72* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74* An upper bound for these quantities may be computed by:
75* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77*
78* Arguments
79* =========
80*
81* UPLO (global input) CHARACTER
82* Specifies whether the upper or lower triangular part of the
83* symmetric distributed matrix sub( A ) is stored:
84* stored:
85* = 'U': Upper triangular
86* = 'L': Lower triangular
87*
88* N (global input) INTEGER
89* The number of rows and columns to be operated on, i.e. the
90* order of the distributed submatrix sub( A ). N >= 0.
91*
92* A (local input/local output) DOUBLE PRECISION pointer into the
93* local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
94* On entry, the local pieces of the factors L or U of the
95* distributed matrix sub( A ) from the Cholesky factorization.
96* On exit, the original distributed matrix sub( A ) is
97* restored.
98*
99* IA (global input) INTEGER
100* The row index in the global array A indicating the first
101* row of sub( A ).
102*
103* JA (global input) INTEGER
104* The column index in the global array A indicating the
105* first column of sub( A ).
106*
107* DESCA (global and local input) INTEGER array of dimension DLEN_.
108* The array descriptor for the distributed matrix A.
109*
110* WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
111* LWORK >= MB_A*NB_A.
112*
113* =====================================================================
114*
115* .. Parameters ..
116 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
117 $ LLD_, MB_, M_, NB_, N_, RSRC_
118 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
119 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
120 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
121 DOUBLE PRECISION ONE, ZERO
122 parameter( one = 1.0d+0, zero = 0.0d+0 )
123* ..
124* .. Local Scalars ..
125 LOGICAL UPPER
126 CHARACTER COLBTOP, ROWBTOP
127 INTEGER IACOL, IAROW, ICTXT, IL, J, JB, JL, JN, MYCOL,
128 $ MYROW, NPCOL, NPROW
129* .. Local Arrays ..
130 INTEGER DESCW( DLEN_ )
131* ..
132* .. External Subroutines ..
133 EXTERNAL blacs_gridinfo, descset, pdlacpy, pdlaset,
134 $ pdsyrk, pdtrmm, pb_topget, pb_topset
135* ..
136* .. External Functions ..
137 LOGICAL LSAME
138 INTEGER ICEIL, INDXG2P
139 EXTERNAL iceil, indxg2p, lsame
140* ..
141* .. Intrinsic Functions ..
142 INTRINSIC min, mod
143* ..
144* .. Executable Statements ..
145*
146* Get grid parameters
147*
148 ictxt = desca( ctxt_ )
149 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
150*
151 CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
152 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
153*
154 upper = lsame( uplo, 'U' )
155 jn = min( iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
156 jl = max( ( ( ja+n-2 ) / desca( nb_ ) ) * desca( nb_ ) + 1, ja )
157 il = max( ( ( ia+n-2 ) / desca( mb_ ) ) * desca( mb_ ) + 1, ia )
158 iarow = indxg2p( il, desca( mb_ ), myrow, desca( rsrc_ ), nprow )
159 iacol = indxg2p( jl, desca( nb_ ), mycol, desca( csrc_ ), npcol )
160*
161* Define array descriptor for working array WORK
162*
163 CALL descset( descw, desca( mb_ ), desca( nb_ ), desca( mb_ ),
164 $ desca( nb_ ), iarow, iacol, ictxt, desca( mb_ ) )
165*
166 IF ( upper ) THEN
167*
168* Compute A from the Cholesky factor U : A = U'*U.
169*
170 CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', ' ' )
171 CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', 'S-ring' )
172*
173 DO 10 j = jl, jn+1, -desca( nb_ )
174*
175 jb = min( ja+n-j, desca( nb_ ) )
176*
177* Update the trailing matrix, A = A + U'*U
178*
179 CALL pdsyrk( 'Upper', 'Transpose', ja+n-j-jb, jb, one, a, il,
180 $ j+jb, desca, one, a, il+jb, j+jb, desca )
181*
182* Copy current diagonal block of A into workspace
183*
184 CALL pdlacpy( 'All', jb, jb, a, il, j, desca, work, 1, 1,
185 $ descw )
186*
187* Zero strict lower triangular part of diagonal block, to make
188* it U1.
189*
190 CALL pdlaset( 'Lower', jb-1, jb, zero, zero, a, il+1, j,
191 $ desca )
192*
193* Update the row panel U with the triangular matrix
194*
195 CALL pdtrmm( 'Left', 'Upper', 'Transpose', 'Non-Unit', jb,
196 $ n-j+ja, one, work, 1, 1, descw, a, il, j,
197 $ desca )
198*
199* Restore the strict lower triangular part of diagonal block.
200*
201 CALL pdlacpy( 'Lower', jb-1, jb, work, 2, 1, descw, a,
202 $ il+1, j, desca )
203*
204 il = il - desca( mb_ )
205 descw( rsrc_ ) = mod( descw( rsrc_ ) + nprow - 1, nprow )
206 descw( csrc_ ) = mod( descw( csrc_ ) + npcol - 1, npcol )
207*
208 10 CONTINUE
209*
210* Handle first block separately
211*
212 jb = min( jn-ja+1, desca( nb_ ) )
213*
214* Update the trailing matrix, A = A + U'*U
215*
216 CALL pdsyrk( 'Upper', 'Transpose', n-jb, jb, one, a, ia, ja+jb,
217 $ desca, one, a, ia+jb, ja+jb, desca )
218*
219* Copy current diagonal block of A into workspace
220*
221 CALL pdlacpy( 'All', jb, jb, a, ia, ja, desca, work, 1, 1,
222 $ descw )
223*
224* Zero strict lower triangular part of diagonal block, to make
225* it U1.
226*
227 CALL pdlaset( 'Lower', jb-1, jb, zero, zero, a, ia+1, ja,
228 $ desca )
229*
230* Update the row panel U with the triangular matrix
231*
232 CALL pdtrmm( 'Left', 'Upper', 'Transpose', 'Non-Unit', jb,
233 $ n, one, work, 1, 1, descw, a, ia, ja, desca )
234*
235* Restore the strict lower triangular part of diagonal block.
236*
237 CALL pdlacpy( 'Lower', jb-1, jb, work, 2, 1, descw, a, ia+1,
238 $ ja, desca )
239*
240 ELSE
241*
242* Compute A from the Cholesky factor L : A = L*L'.
243*
244 CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', 'S-ring' )
245 CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', ' ' )
246*
247 DO 20 j = jl, jn+1, -desca( nb_ )
248*
249 jb = min( ja+n-j, desca( nb_ ) )
250*
251* Update the trailing matrix, A = A + L*L'
252*
253 CALL pdsyrk( 'Lower', 'No transpose', ia+n-il-jb, jb, one, a,
254 $ il+jb, j, desca, one, a, il+jb, j+jb, desca )
255*
256* Copy current diagonal block of A into workspace
257*
258 CALL pdlacpy( 'All', jb, jb, a, il, j, desca, work, 1, 1,
259 $ descw )
260*
261* Zero strict upper triangular part of diagonal block, to make
262* it L1.
263*
264 CALL pdlaset( 'Upper', jb, jb-1, zero, zero, a, il, j+1,
265 $ desca )
266*
267* Update the column panel L with the triangular matrix
268*
269 CALL pdtrmm( 'Right', 'Lower', 'Transpose', 'Non-Unit',
270 $ ia+n-il, jb, one, work, 1, 1, descw, a, il,
271 $ j, desca )
272*
273* Restore the strict upper triangular part of diagonal block.
274*
275 CALL pdlacpy( 'Upper', jb, jb-1, work, 1, 2, descw, a,
276 $ il, j+1, desca )
277*
278 il = il - desca( mb_ )
279 descw( rsrc_ ) = mod( descw( rsrc_ ) + nprow - 1, nprow )
280 descw( csrc_ ) = mod( descw( csrc_ ) + npcol - 1, npcol )
281*
282 20 CONTINUE
283*
284* Handle first block separately
285*
286 jb = min( jn-ja+1, desca( nb_ ) )
287*
288* Update the trailing matrix, A = A + L*L'
289*
290 CALL pdsyrk( 'Lower', 'No transpose', n-jb, jb, one, a,
291 $ ia+jb, ja, desca, one, a, ia+jb, ja+jb, desca )
292*
293* Copy current diagonal block of A into workspace
294*
295 CALL pdlacpy( 'All', jb, jb, a, ia, ja, desca, work, 1, 1,
296 $ descw )
297*
298* Zero strict upper triangular part of diagonal block, to make
299* it L1.
300*
301 CALL pdlaset( 'Upper', jb, jb-1, zero, zero, a, ia, ja+1,
302 $ desca )
303*
304* Update the column panel L with the triangular matrix
305*
306 CALL pdtrmm( 'Right', 'Lower', 'Transpose', 'Non-Unit', n, jb,
307 $ one, work, 1, 1, descw, a, ia, ja, desca )
308*
309* Restore the strict upper triangular part of diagonal block.
310*
311 CALL pdlacpy( 'Upper', jb, jb-1, work, 1, 2, descw, a, ia,
312 $ ja+1, desca )
313*
314 END IF
315*
316 CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', rowbtop )
317 CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', colbtop )
318*
319 RETURN
320*
321* End of PDPOTRRV
322*
323 END
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdlaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition pdblastst.f:6862
subroutine pdlacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pdlacpy.f:3
subroutine pdpotrrv(uplo, n, a, ia, ja, desca, work)
Definition pdpotrrv.f:2