ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzpotrrv.f
Go to the documentation of this file.
1  SUBROUTINE pzpotrrv( UPLO, N, A, IA, JA, DESCA, WORK )
2 *
3 * -- ScaLAPACK routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 28, 2001
7 *
8 * .. Scalar Arguments ..
9  CHARACTER UPLO
10  INTEGER IA, JA, N
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * )
14  COMPLEX*16 A( * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PZPOTRRV recomputes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from L or U
21 * computed by PZPOTRF. The routine performs the Cholesky factorization
22 * in reverse.
23 *
24 * Notes
25 * =====
26 *
27 * Each global data object is described by an associated description
28 * vector. This vector stores the information required to establish
29 * the mapping between an object element and its corresponding process
30 * and memory location.
31 *
32 * Let A be a generic term for any 2D block cyclicly distributed array.
33 * Such a global array has an associated description vector DESCA.
34 * In the following comments, the character _ should be read as
35 * "of the global array".
36 *
37 * NOTATION STORED IN EXPLANATION
38 * --------------- -------------- --------------------------------------
39 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
40 * DTYPE_A = 1.
41 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42 * the BLACS process grid A is distribu-
43 * ted over. The context itself is glo-
44 * bal, but the handle (the integer
45 * value) may vary.
46 * M_A (global) DESCA( M_ ) The number of rows in the global
47 * array A.
48 * N_A (global) DESCA( N_ ) The number of columns in the global
49 * array A.
50 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
51 * the rows of the array.
52 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
53 * the columns of the array.
54 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55 * row of the array A is distributed.
56 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57 * first column of the array A is
58 * distributed.
59 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
60 * array. LLD_A >= MAX(1,LOCr(M_A)).
61 *
62 * Let K be the number of rows or columns of a distributed matrix,
63 * and assume that its process grid has dimension p x q.
64 * LOCr( K ) denotes the number of elements of K that a process
65 * would receive if K were distributed over the p processes of its
66 * process column.
67 * Similarly, LOCc( K ) denotes the number of elements of K that a
68 * process would receive if K were distributed over the q processes of
69 * its process row.
70 * The values of LOCr() and LOCc() may be determined via a call to the
71 * ScaLAPACK tool function, NUMROC:
72 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74 * An upper bound for these quantities may be computed by:
75 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77 *
78 * Arguments
79 * =========
80 *
81 * UPLO (global input) CHARACTER
82 * Specifies whether the upper or lower triangular part of the
83 * hermitian distributed matrix sub( A ) is stored:
84 * stored:
85 * = 'U': Upper triangular
86 * = 'L': Lower triangular
87 *
88 * N (global input) INTEGER
89 * The number of rows and columns to be operated on, i.e. the
90 * order of the distributed submatrix sub( A ). N >= 0.
91 *
92 * A (local input/local output) COMPLEX*16 pointer into the
93 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
94 * On entry, the local pieces of the factors L or U of the
95 * distributed matrix sub( A ) from the Cholesky factorization.
96 * On exit, the original distributed matrix sub( A ) is
97 * restored.
98 *
99 * IA (global input) INTEGER
100 * The row index in the global array A indicating the first
101 * row of sub( A ).
102 *
103 * JA (global input) INTEGER
104 * The column index in the global array A indicating the
105 * first column of sub( A ).
106 *
107 * DESCA (global and local input) INTEGER array of dimension DLEN_.
108 * The array descriptor for the distributed matrix A.
109 *
110 * WORK (local workspace) COMPLEX*16 array, dimension (LWORK)
111 * LWORK >= MB_A*NB_A.
112 *
113 * =====================================================================
114 *
115 * .. Parameters ..
116  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
117  $ LLD_, MB_, M_, NB_, N_, RSRC_
118  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
119  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
120  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
121  DOUBLE PRECISION ONE
122  parameter( one = 1.0d+0 )
123  COMPLEX*16 CONE, ZERO
124  parameter( cone = ( 1.0d+0, 0.0d+0 ),
125  $ zero = ( 0.0d+0, 0.0d+0 ) )
126 * ..
127 * .. Local Scalars ..
128  LOGICAL UPPER
129  CHARACTER COLBTOP, ROWBTOP
130  INTEGER IACOL, IAROW, ICTXT, IL, J, JB, JL, JN, MYCOL,
131  $ MYROW, NPCOL, NPROW
132 * .. Local Arrays ..
133  INTEGER DESCW( DLEN_ )
134 * ..
135 * .. External Subroutines ..
136  EXTERNAL blacs_gridinfo, descset, pb_topget, pb_topset,
137  $ pzlacpy, pzlaset, pzherk, pztrmm
138 * ..
139 * .. External Functions ..
140  LOGICAL LSAME
141  INTEGER ICEIL, INDXG2P
142  EXTERNAL iceil, indxg2p, lsame
143 * ..
144 * .. Intrinsic Functions ..
145  INTRINSIC min, mod
146 * ..
147 * .. Executable Statements ..
148 *
149 * Get grid parameters
150 *
151  ictxt = desca( ctxt_ )
152  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
153 *
154  CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
155  CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
156 *
157  upper = lsame( uplo, 'U' )
158  jn = min( iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
159  jl = max( ( ( ja+n-2 ) / desca( nb_ ) ) * desca( nb_ ) + 1, ja )
160  il = max( ( ( ia+n-2 ) / desca( mb_ ) ) * desca( mb_ ) + 1, ia )
161  iarow = indxg2p( il, desca( mb_ ), myrow, desca( rsrc_ ), nprow )
162  iacol = indxg2p( jl, desca( nb_ ), mycol, desca( csrc_ ), npcol )
163 *
164 * Define array descriptor for working array WORK
165 *
166  CALL descset( descw, desca( mb_ ), desca( nb_ ), desca( mb_ ),
167  $ desca( nb_ ), iarow, iacol, ictxt, desca( mb_ ) )
168 *
169  IF ( upper ) THEN
170 *
171 * Compute A from the Cholesky factor U : A = U'*U.
172 *
173  CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', ' ' )
174  CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', 'S-ring' )
175 *
176  DO 10 j = jl, jn+1, -desca( nb_ )
177 *
178  jb = min( ja+n-j, desca( nb_ ) )
179 *
180 * Update the trailing matrix, A = A + U'*U
181 *
182  CALL pzherk( 'Upper', 'Conjugate Transpose', ja+n-j-jb, jb,
183  $ one, a, il, j+jb, desca, one, a, il+jb, j+jb,
184  $ desca )
185 *
186 * Copy current diagonal block of A into workspace
187 *
188  CALL pzlacpy( 'All', jb, jb, a, il, j, desca, work, 1, 1,
189  $ descw )
190 *
191 * Zero strict lower triangular part of diagonal block, to make
192 * it U1.
193 *
194  CALL pzlaset( 'Lower', jb-1, jb, zero, zero, a, il+1, j,
195  $ desca )
196 *
197 * Update the row panel U with the triangular matrix
198 *
199  CALL pztrmm( 'Left', 'Upper', 'Conjugate Transpose',
200  $ 'Non-Unit', jb, n-j+ja, cone, work, 1, 1,
201  $ descw, a, il, j, desca )
202 *
203 * Restore the strict lower triangular part of diagonal block.
204 *
205  CALL pzlacpy( 'Lower', jb-1, jb, work, 2, 1, descw, a,
206  $ il+1, j, desca )
207 *
208  il = il - desca( mb_ )
209  descw( rsrc_ ) = mod( descw( rsrc_ ) + nprow - 1, nprow )
210  descw( csrc_ ) = mod( descw( csrc_ ) + npcol - 1, npcol )
211 *
212  10 CONTINUE
213 *
214 * Handle first block separately
215 *
216  jb = min( jn-ja+1, desca( nb_ ) )
217 *
218 * Update the trailing matrix, A = A + U'*U
219 *
220  CALL pzherk( 'Upper', 'Conjugate Transpose', n-jb, jb, one, a,
221  $ ia, ja+jb, desca, one, a, ia+jb, ja+jb, desca )
222 *
223 * Copy current diagonal block of A into workspace
224 *
225  CALL pzlacpy( 'All', jb, jb, a, ia, ja, desca, work, 1, 1,
226  $ descw )
227 *
228 * Zero strict lower triangular part of diagonal block, to make
229 * it U1.
230 *
231  CALL pzlaset( 'Lower', jb-1, jb, zero, zero, a, ia+1, ja,
232  $ desca )
233 *
234 * Update the row panel U with the triangular matrix
235 *
236  CALL pztrmm( 'Left', 'Upper', 'Conjugate Transpose', 'Non-Unit',
237  $ jb, n, cone, work, 1, 1, descw, a, ia, ja, desca )
238 *
239 * Restore the strict lower triangular part of diagonal block.
240 *
241  CALL pzlacpy( 'Lower', jb-1, jb, work, 2, 1, descw, a, ia+1,
242  $ ja, desca )
243 *
244  ELSE
245 *
246 * Compute A from the Cholesky factor L : A = L*L'.
247 *
248  CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', 'S-ring' )
249  CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', ' ' )
250 *
251  DO 20 j = jl, jn+1, -desca( nb_ )
252 *
253  jb = min( ja+n-j, desca( nb_ ) )
254 *
255 * Update the trailing matrix, A = A + L*L'
256 *
257  CALL pzherk( 'Lower', 'No Transpose', ia+n-il-jb, jb, one, a,
258  $ il+jb, j, desca, one, a, il+jb, j+jb, desca )
259 *
260 * Copy current diagonal block of A into workspace
261 *
262  CALL pzlacpy( 'All', jb, jb, a, il, j, desca, work, 1, 1,
263  $ descw )
264 *
265 * Zero strict upper triangular part of diagonal block, to make
266 * it L1.
267 *
268  CALL pzlaset( 'Upper', jb, jb-1, zero, zero, a, il, j+1,
269  $ desca )
270 *
271 * Update the column panel L with the triangular matrix
272 *
273  CALL pztrmm( 'Right', 'Lower', 'Conjugate transpose',
274  $ 'Non-Unit', ia+n-il, jb, cone, work, 1, 1,
275  $ descw, a, il, j, desca )
276 *
277 * Restore the strict upper triangular part of diagonal block.
278 *
279  CALL pzlacpy( 'Upper', jb, jb-1, work, 1, 2, descw, a,
280  $ il, j+1, desca )
281 *
282  il = il - desca( mb_ )
283  descw( rsrc_ ) = mod( descw( rsrc_ ) + nprow - 1, nprow )
284  descw( csrc_ ) = mod( descw( csrc_ ) + npcol - 1, npcol )
285 *
286  20 CONTINUE
287 *
288 * Handle first block separately
289 *
290  jb = min( jn-ja+1, desca( nb_ ) )
291 *
292 * Update the trailing matrix, A = A + L*L'
293 *
294  CALL pzherk( 'Lower', 'No Transpose', n-jb, jb, one, a,
295  $ ia+jb, ja, desca, one, a, ia+jb, ja+jb, desca )
296 *
297 * Copy current diagonal block of A into workspace
298 *
299  CALL pzlacpy( 'All', jb, jb, a, ia, ja, desca, work, 1, 1,
300  $ descw )
301 *
302 * Zero strict upper triangular part of diagonal block, to make
303 * it L1.
304 *
305  CALL pzlaset( 'Upper', jb, jb-1, zero, zero, a, ia, ja+1,
306  $ desca )
307 *
308 * Update the column panel L with the triangular matrix
309 *
310  CALL pztrmm( 'Right', 'Lower', 'Conjugate transpose',
311  $ 'Non-Unit', n, jb, cone, work, 1, 1, descw, a,
312  $ ia, ja, desca )
313 *
314 * Restore the strict upper triangular part of diagonal block.
315 *
316  CALL pzlacpy( 'Upper', jb, jb-1, work, 1, 2, descw, a, ia,
317  $ ja+1, desca )
318 *
319  END IF
320 *
321  CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', rowbtop )
322  CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', colbtop )
323 *
324  RETURN
325 *
326 * End of PZPOTRRV
327 *
328  END
max
#define max(A, B)
Definition: pcgemr.c:180
pzlaset
subroutine pzlaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: pzblastst.f:7509
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
pzlacpy
subroutine pzlacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pzlacpy.f:3
pzpotrrv
subroutine pzpotrrv(UPLO, N, A, IA, JA, DESCA, WORK)
Definition: pzpotrrv.f:2
min
#define min(A, B)
Definition: pcgemr.c:181