SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pclarzt.f
Go to the documentation of this file.
1 SUBROUTINE pclarzt( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU,
2 $ T, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 CHARACTER DIRECT, STOREV
11 INTEGER IV, JV, K, N
12* ..
13* .. Array Arguments ..
14 INTEGER DESCV( * )
15 COMPLEX TAU( * ), T( * ), V( * ), WORK( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PCLARZT forms the triangular factor T of a complex block reflector
22* H of order > n, which is defined as a product of k elementary
23* reflectors as returned by PCTZRZF.
24*
25* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
26*
27* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
28*
29* If STOREV = 'C', the vector which defines the elementary reflector
30* H(i) is stored in the i-th column of the array V, and
31*
32* H = I - V * T * V'
33*
34* If STOREV = 'R', the vector which defines the elementary reflector
35* H(i) is stored in the i-th row of the array V, and
36*
37* H = I - V' * T * V
38*
39* Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
40*
41* Notes
42* =====
43*
44* Each global data object is described by an associated description
45* vector. This vector stores the information required to establish
46* the mapping between an object element and its corresponding process
47* and memory location.
48*
49* Let A be a generic term for any 2D block cyclicly distributed array.
50* Such a global array has an associated description vector DESCA.
51* In the following comments, the character _ should be read as
52* "of the global array".
53*
54* NOTATION STORED IN EXPLANATION
55* --------------- -------------- --------------------------------------
56* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
57* DTYPE_A = 1.
58* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
59* the BLACS process grid A is distribu-
60* ted over. The context itself is glo-
61* bal, but the handle (the integer
62* value) may vary.
63* M_A (global) DESCA( M_ ) The number of rows in the global
64* array A.
65* N_A (global) DESCA( N_ ) The number of columns in the global
66* array A.
67* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
68* the rows of the array.
69* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
70* the columns of the array.
71* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
72* row of the array A is distributed.
73* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
74* first column of the array A is
75* distributed.
76* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
77* array. LLD_A >= MAX(1,LOCr(M_A)).
78*
79* Let K be the number of rows or columns of a distributed matrix,
80* and assume that its process grid has dimension p x q.
81* LOCr( K ) denotes the number of elements of K that a process
82* would receive if K were distributed over the p processes of its
83* process column.
84* Similarly, LOCc( K ) denotes the number of elements of K that a
85* process would receive if K were distributed over the q processes of
86* its process row.
87* The values of LOCr() and LOCc() may be determined via a call to the
88* ScaLAPACK tool function, NUMROC:
89* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
90* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
91* An upper bound for these quantities may be computed by:
92* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
93* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
94*
95* Arguments
96* =========
97*
98* DIRECT (global input) CHARACTER
99* Specifies the order in which the elementary reflectors are
100* multiplied to form the block reflector:
101* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
102* = 'B': H = H(k) . . . H(2) H(1) (Backward)
103*
104* STOREV (global input) CHARACTER
105* Specifies how the vectors which define the elementary
106* reflectors are stored (see also Further Details):
107* = 'C': columnwise (not supported yet)
108* = 'R': rowwise
109*
110* N (global input) INTEGER
111* The number of meaningful entries of the block reflector H.
112* N >= 0.
113*
114* K (global input) INTEGER
115* The order of the triangular factor T (= the number of
116* elementary reflectors). 1 <= K <= MB_V (= NB_V).
117*
118* V (input/output) COMPLEX pointer into the local memory
119* to an array of local dimension (LOCr(IV+K-1),LOCc(JV+N-1)).
120* The distributed matrix V contains the Householder vectors.
121* See further details.
122*
123* IV (global input) INTEGER
124* The row index in the global array V indicating the first
125* row of sub( V ).
126*
127* JV (global input) INTEGER
128* The column index in the global array V indicating the
129* first column of sub( V ).
130*
131* DESCV (global and local input) INTEGER array of dimension DLEN_.
132* The array descriptor for the distributed matrix V.
133*
134* TAU (local input) COMPLEX, array, dimension LOCr(IV+K-1)
135* if INCV = M_V, and LOCc(JV+K-1) otherwise. This array
136* contains the Householder scalars related to the Householder
137* vectors. TAU is tied to the distributed matrix V.
138*
139* T (local output) COMPLEX array, dimension (MB_V,MB_V)
140* It contains the k-by-k triangular factor of the block
141* reflector associated with V. T is lower triangular.
142*
143* WORK (local workspace) COMPLEX array,
144* dimension (K*(K-1)/2)
145*
146* Further Details
147* ===============
148*
149* The shape of the matrix V and the storage of the vectors which define
150* the H(i) is best illustrated by the following example with n = 5 and
151* k = 3. The elements equal to 1 are not stored; the corresponding
152* array elements are modified but restored on exit. The rest of the
153* array is not used.
154*
155* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
156*
157* ______V_____
158* ( v1 v2 v3 ) / \
159* ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 )
160* V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 )
161* ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 )
162* ( v1 v2 v3 )
163* . . .
164* . . .
165* 1 . .
166* 1 .
167* 1
168*
169* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
170*
171* ______V_____
172* 1 / \
173* . 1 ( 1 . . . . v1 v1 v1 v1 v1 )
174* . . 1 ( . 1 . . . v2 v2 v2 v2 v2 )
175* . . . ( . . 1 . . v3 v3 v3 v3 v3 )
176* . . .
177* ( v1 v2 v3 )
178* ( v1 v2 v3 )
179* V = ( v1 v2 v3 )
180* ( v1 v2 v3 )
181* ( v1 v2 v3 )
182*
183* =====================================================================
184*
185* .. Parameters ..
186 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
187 $ lld_, mb_, m_, nb_, n_, rsrc_
188 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
189 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
190 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
191 COMPLEX ZERO
192 parameter( zero = ( 0.0e+0, 0.0e+0 ) )
193* ..
194* .. Local Scalars ..
195 INTEGER ICOFF, ICTXT, II, IIV, INFO, IVCOL, IVROW,
196 $ itmp0, itmp1, iw, jjv, ldv, mycol, myrow,
197 $ npcol, nprow, nq
198* ..
199* .. External Subroutines ..
200 EXTERNAL blacs_abort, blacs_gridinfo, ccopy, cgemv,
201 $ cgsum2d, clacgv, claset, ctrmv,
203* ..
204* .. External Functions ..
205 LOGICAL LSAME
206 INTEGER NUMROC
207 EXTERNAL lsame, numroc
208* ..
209* .. Intrinsic Functions ..
210 INTRINSIC mod
211* ..
212* .. Executable Statements ..
213*
214* Get grid parameters
215*
216 ictxt = descv( ctxt_ )
217 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
218*
219* Check for currently supported options
220*
221 info = 0
222 IF( .NOT.lsame( direct, 'B' ) ) THEN
223 info = -1
224 ELSE IF( .NOT.lsame( storev, 'R' ) ) THEN
225 info = -2
226 END IF
227 IF( info.NE.0 ) THEN
228 CALL pxerbla( ictxt, 'PCLARZT', -info )
229 CALL blacs_abort( ictxt, 1 )
230 RETURN
231 END IF
232*
233 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol,
234 $ iiv, jjv, ivrow, ivcol )
235*
236 IF( myrow.EQ.ivrow ) THEN
237 iw = 1
238 itmp0 = 0
239 ldv = descv( lld_ )
240 icoff = mod( jv-1, descv( nb_ ) )
241 nq = numroc( n+icoff, descv( nb_ ), mycol, ivcol, npcol )
242 IF( mycol.EQ.ivcol )
243 $ nq = nq - icoff
244*
245 DO 10 ii = iiv+k-2, iiv, -1
246*
247* T(i+1:k,i) = -tau( iv+i-1 ) *
248* V(iv+i:iv+k-1,jv:jv+n-1) * V(iv+i-1,jv:jv+n-1)'
249*
250 itmp0 = itmp0 + 1
251 IF( nq.GT.0 ) THEN
252 CALL clacgv( nq, v( ii+(jjv-1)*ldv ), ldv )
253 CALL cgemv( 'No transpose', itmp0, nq, -tau( ii ),
254 $ v( ii+1+(jjv-1)*ldv ), ldv,
255 $ v( ii+(jjv-1)*ldv ), ldv, zero, work( iw ),
256 $ 1 )
257 CALL clacgv( nq, v( ii+(jjv-1)*ldv ), ldv )
258 ELSE
259 CALL claset( 'All', itmp0, 1, zero, zero, work( iw ),
260 $ itmp0 )
261 END IF
262 iw = iw + itmp0
263*
264 10 CONTINUE
265*
266 CALL cgsum2d( ictxt, 'Rowwise', ' ', iw-1, 1, work, iw-1,
267 $ myrow, ivcol )
268*
269 IF( mycol.EQ.ivcol ) THEN
270*
271 iw = 1
272 itmp0 = 0
273 itmp1 = k + 1 + (k-1) * descv( mb_ )
274*
275 t( itmp1-1 ) = tau( iiv+k-1 )
276*
277 DO 20 ii = iiv+k-2, iiv, -1
278*
279* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i)
280*
281 itmp0 = itmp0 + 1
282 itmp1 = itmp1 - descv( mb_ ) - 1
283 CALL ccopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
284 iw = iw + itmp0
285*
286 CALL ctrmv( 'Lower', 'No transpose', 'Non-unit', itmp0,
287 $ t( itmp1+descv( mb_ ) ), descv( mb_ ),
288 $ t( itmp1 ), 1 )
289 t( itmp1-1 ) = tau( ii )
290*
291 20 CONTINUE
292*
293 END IF
294*
295 END IF
296*
297 RETURN
298*
299* End of PCLARZT
300*
301 END
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pclarzt(direct, storev, n, k, v, iv, jv, descv, tau, t, work)
Definition pclarzt.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2