SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pzpotrrv.f
Go to the documentation of this file.
1 SUBROUTINE pzpotrrv( UPLO, N, A, IA, JA, DESCA, WORK )
2*
3* -- ScaLAPACK routine (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 28, 2001
7*
8* .. Scalar Arguments ..
9 CHARACTER UPLO
10 INTEGER IA, JA, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 COMPLEX*16 A( * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PZPOTRRV recomputes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from L or U
21* computed by PZPOTRF. The routine performs the Cholesky factorization
22* in reverse.
23*
24* Notes
25* =====
26*
27* Each global data object is described by an associated description
28* vector. This vector stores the information required to establish
29* the mapping between an object element and its corresponding process
30* and memory location.
31*
32* Let A be a generic term for any 2D block cyclicly distributed array.
33* Such a global array has an associated description vector DESCA.
34* In the following comments, the character _ should be read as
35* "of the global array".
36*
37* NOTATION STORED IN EXPLANATION
38* --------------- -------------- --------------------------------------
39* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
40* DTYPE_A = 1.
41* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42* the BLACS process grid A is distribu-
43* ted over. The context itself is glo-
44* bal, but the handle (the integer
45* value) may vary.
46* M_A (global) DESCA( M_ ) The number of rows in the global
47* array A.
48* N_A (global) DESCA( N_ ) The number of columns in the global
49* array A.
50* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
51* the rows of the array.
52* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
53* the columns of the array.
54* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55* row of the array A is distributed.
56* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57* first column of the array A is
58* distributed.
59* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
60* array. LLD_A >= MAX(1,LOCr(M_A)).
61*
62* Let K be the number of rows or columns of a distributed matrix,
63* and assume that its process grid has dimension p x q.
64* LOCr( K ) denotes the number of elements of K that a process
65* would receive if K were distributed over the p processes of its
66* process column.
67* Similarly, LOCc( K ) denotes the number of elements of K that a
68* process would receive if K were distributed over the q processes of
69* its process row.
70* The values of LOCr() and LOCc() may be determined via a call to the
71* ScaLAPACK tool function, NUMROC:
72* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74* An upper bound for these quantities may be computed by:
75* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77*
78* Arguments
79* =========
80*
81* UPLO (global input) CHARACTER
82* Specifies whether the upper or lower triangular part of the
83* hermitian distributed matrix sub( A ) is stored:
84* stored:
85* = 'U': Upper triangular
86* = 'L': Lower triangular
87*
88* N (global input) INTEGER
89* The number of rows and columns to be operated on, i.e. the
90* order of the distributed submatrix sub( A ). N >= 0.
91*
92* A (local input/local output) COMPLEX*16 pointer into the
93* local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
94* On entry, the local pieces of the factors L or U of the
95* distributed matrix sub( A ) from the Cholesky factorization.
96* On exit, the original distributed matrix sub( A ) is
97* restored.
98*
99* IA (global input) INTEGER
100* The row index in the global array A indicating the first
101* row of sub( A ).
102*
103* JA (global input) INTEGER
104* The column index in the global array A indicating the
105* first column of sub( A ).
106*
107* DESCA (global and local input) INTEGER array of dimension DLEN_.
108* The array descriptor for the distributed matrix A.
109*
110* WORK (local workspace) COMPLEX*16 array, dimension (LWORK)
111* LWORK >= MB_A*NB_A.
112*
113* =====================================================================
114*
115* .. Parameters ..
116 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
117 $ LLD_, MB_, M_, NB_, N_, RSRC_
118 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
119 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
120 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
121 DOUBLE PRECISION ONE
122 parameter( one = 1.0d+0 )
123 COMPLEX*16 CONE, ZERO
124 parameter( cone = ( 1.0d+0, 0.0d+0 ),
125 $ zero = ( 0.0d+0, 0.0d+0 ) )
126* ..
127* .. Local Scalars ..
128 LOGICAL UPPER
129 CHARACTER COLBTOP, ROWBTOP
130 INTEGER IACOL, IAROW, ICTXT, IL, J, JB, JL, JN, MYCOL,
131 $ MYROW, NPCOL, NPROW
132* .. Local Arrays ..
133 INTEGER DESCW( DLEN_ )
134* ..
135* .. External Subroutines ..
136 EXTERNAL blacs_gridinfo, descset, pb_topget, pb_topset,
137 $ pzlacpy, pzlaset, pzherk, pztrmm
138* ..
139* .. External Functions ..
140 LOGICAL LSAME
141 INTEGER ICEIL, INDXG2P
142 EXTERNAL iceil, indxg2p, lsame
143* ..
144* .. Intrinsic Functions ..
145 INTRINSIC min, mod
146* ..
147* .. Executable Statements ..
148*
149* Get grid parameters
150*
151 ictxt = desca( ctxt_ )
152 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
153*
154 CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
155 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
156*
157 upper = lsame( uplo, 'U' )
158 jn = min( iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
159 jl = max( ( ( ja+n-2 ) / desca( nb_ ) ) * desca( nb_ ) + 1, ja )
160 il = max( ( ( ia+n-2 ) / desca( mb_ ) ) * desca( mb_ ) + 1, ia )
161 iarow = indxg2p( il, desca( mb_ ), myrow, desca( rsrc_ ), nprow )
162 iacol = indxg2p( jl, desca( nb_ ), mycol, desca( csrc_ ), npcol )
163*
164* Define array descriptor for working array WORK
165*
166 CALL descset( descw, desca( mb_ ), desca( nb_ ), desca( mb_ ),
167 $ desca( nb_ ), iarow, iacol, ictxt, desca( mb_ ) )
168*
169 IF ( upper ) THEN
170*
171* Compute A from the Cholesky factor U : A = U'*U.
172*
173 CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', ' ' )
174 CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', 'S-ring' )
175*
176 DO 10 j = jl, jn+1, -desca( nb_ )
177*
178 jb = min( ja+n-j, desca( nb_ ) )
179*
180* Update the trailing matrix, A = A + U'*U
181*
182 CALL pzherk( 'Upper', 'Conjugate Transpose', ja+n-j-jb, jb,
183 $ one, a, il, j+jb, desca, one, a, il+jb, j+jb,
184 $ desca )
185*
186* Copy current diagonal block of A into workspace
187*
188 CALL pzlacpy( 'All', jb, jb, a, il, j, desca, work, 1, 1,
189 $ descw )
190*
191* Zero strict lower triangular part of diagonal block, to make
192* it U1.
193*
194 CALL pzlaset( 'Lower', jb-1, jb, zero, zero, a, il+1, j,
195 $ desca )
196*
197* Update the row panel U with the triangular matrix
198*
199 CALL pztrmm( 'Left', 'Upper', 'Conjugate Transpose',
200 $ 'Non-Unit', jb, n-j+ja, cone, work, 1, 1,
201 $ descw, a, il, j, desca )
202*
203* Restore the strict lower triangular part of diagonal block.
204*
205 CALL pzlacpy( 'Lower', jb-1, jb, work, 2, 1, descw, a,
206 $ il+1, j, desca )
207*
208 il = il - desca( mb_ )
209 descw( rsrc_ ) = mod( descw( rsrc_ ) + nprow - 1, nprow )
210 descw( csrc_ ) = mod( descw( csrc_ ) + npcol - 1, npcol )
211*
212 10 CONTINUE
213*
214* Handle first block separately
215*
216 jb = min( jn-ja+1, desca( nb_ ) )
217*
218* Update the trailing matrix, A = A + U'*U
219*
220 CALL pzherk( 'Upper', 'Conjugate Transpose', n-jb, jb, one, a,
221 $ ia, ja+jb, desca, one, a, ia+jb, ja+jb, desca )
222*
223* Copy current diagonal block of A into workspace
224*
225 CALL pzlacpy( 'All', jb, jb, a, ia, ja, desca, work, 1, 1,
226 $ descw )
227*
228* Zero strict lower triangular part of diagonal block, to make
229* it U1.
230*
231 CALL pzlaset( 'Lower', jb-1, jb, zero, zero, a, ia+1, ja,
232 $ desca )
233*
234* Update the row panel U with the triangular matrix
235*
236 CALL pztrmm( 'Left', 'Upper', 'Conjugate Transpose', 'Non-Unit',
237 $ jb, n, cone, work, 1, 1, descw, a, ia, ja, desca )
238*
239* Restore the strict lower triangular part of diagonal block.
240*
241 CALL pzlacpy( 'Lower', jb-1, jb, work, 2, 1, descw, a, ia+1,
242 $ ja, desca )
243*
244 ELSE
245*
246* Compute A from the Cholesky factor L : A = L*L'.
247*
248 CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', 'S-ring' )
249 CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', ' ' )
250*
251 DO 20 j = jl, jn+1, -desca( nb_ )
252*
253 jb = min( ja+n-j, desca( nb_ ) )
254*
255* Update the trailing matrix, A = A + L*L'
256*
257 CALL pzherk( 'Lower', 'No Transpose', ia+n-il-jb, jb, one, a,
258 $ il+jb, j, desca, one, a, il+jb, j+jb, desca )
259*
260* Copy current diagonal block of A into workspace
261*
262 CALL pzlacpy( 'All', jb, jb, a, il, j, desca, work, 1, 1,
263 $ descw )
264*
265* Zero strict upper triangular part of diagonal block, to make
266* it L1.
267*
268 CALL pzlaset( 'Upper', jb, jb-1, zero, zero, a, il, j+1,
269 $ desca )
270*
271* Update the column panel L with the triangular matrix
272*
273 CALL pztrmm( 'Right', 'Lower', 'Conjugate transpose',
274 $ 'Non-Unit', ia+n-il, jb, cone, work, 1, 1,
275 $ descw, a, il, j, desca )
276*
277* Restore the strict upper triangular part of diagonal block.
278*
279 CALL pzlacpy( 'Upper', jb, jb-1, work, 1, 2, descw, a,
280 $ il, j+1, desca )
281*
282 il = il - desca( mb_ )
283 descw( rsrc_ ) = mod( descw( rsrc_ ) + nprow - 1, nprow )
284 descw( csrc_ ) = mod( descw( csrc_ ) + npcol - 1, npcol )
285*
286 20 CONTINUE
287*
288* Handle first block separately
289*
290 jb = min( jn-ja+1, desca( nb_ ) )
291*
292* Update the trailing matrix, A = A + L*L'
293*
294 CALL pzherk( 'Lower', 'No Transpose', n-jb, jb, one, a,
295 $ ia+jb, ja, desca, one, a, ia+jb, ja+jb, desca )
296*
297* Copy current diagonal block of A into workspace
298*
299 CALL pzlacpy( 'All', jb, jb, a, ia, ja, desca, work, 1, 1,
300 $ descw )
301*
302* Zero strict upper triangular part of diagonal block, to make
303* it L1.
304*
305 CALL pzlaset( 'Upper', jb, jb-1, zero, zero, a, ia, ja+1,
306 $ desca )
307*
308* Update the column panel L with the triangular matrix
309*
310 CALL pztrmm( 'Right', 'Lower', 'Conjugate transpose',
311 $ 'Non-Unit', n, jb, cone, work, 1, 1, descw, a,
312 $ ia, ja, desca )
313*
314* Restore the strict upper triangular part of diagonal block.
315*
316 CALL pzlacpy( 'Upper', jb, jb-1, work, 1, 2, descw, a, ia,
317 $ ja+1, desca )
318*
319 END IF
320*
321 CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', rowbtop )
322 CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', colbtop )
323*
324 RETURN
325*
326* End of PZPOTRRV
327*
328 END
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pzlaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition pzblastst.f:7509
subroutine pzlacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pzlacpy.f:3
subroutine pzpotrrv(uplo, n, a, ia, ja, desca, work)
Definition pzpotrrv.f:2