SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzlarft.f
Go to the documentation of this file.
1 SUBROUTINE pzlarft( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU,
2 $ T, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 CHARACTER DIRECT, STOREV
11 INTEGER IV, JV, K, N
12* ..
13* .. Array Arguments ..
14 INTEGER DESCV( * )
15 COMPLEX*16 TAU( * ), T( * ), V( * ), WORK( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PZLARFT forms the triangular factor T of a complex block reflector H
22* of order n, which is defined as a product of k elementary reflectors.
23*
24* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
25*
26* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
27*
28* If STOREV = 'C', the vector which defines the elementary reflector
29* H(i) is stored in the i-th column of the distributed matrix V, and
30*
31* H = I - V * T * V'
32*
33* If STOREV = 'R', the vector which defines the elementary reflector
34* H(i) is stored in the i-th row of the distributed matrix V, and
35*
36* H = I - V' * T * V
37*
38* Notes
39* =====
40*
41* Each global data object is described by an associated description
42* vector. This vector stores the information required to establish
43* the mapping between an object element and its corresponding process
44* and memory location.
45*
46* Let A be a generic term for any 2D block cyclicly distributed array.
47* Such a global array has an associated description vector DESCA.
48* In the following comments, the character _ should be read as
49* "of the global array".
50*
51* NOTATION STORED IN EXPLANATION
52* --------------- -------------- --------------------------------------
53* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
54* DTYPE_A = 1.
55* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
56* the BLACS process grid A is distribu-
57* ted over. The context itself is glo-
58* bal, but the handle (the integer
59* value) may vary.
60* M_A (global) DESCA( M_ ) The number of rows in the global
61* array A.
62* N_A (global) DESCA( N_ ) The number of columns in the global
63* array A.
64* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
65* the rows of the array.
66* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
67* the columns of the array.
68* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
69* row of the array A is distributed.
70* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
71* first column of the array A is
72* distributed.
73* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
74* array. LLD_A >= MAX(1,LOCr(M_A)).
75*
76* Let K be the number of rows or columns of a distributed matrix,
77* and assume that its process grid has dimension p x q.
78* LOCr( K ) denotes the number of elements of K that a process
79* would receive if K were distributed over the p processes of its
80* process column.
81* Similarly, LOCc( K ) denotes the number of elements of K that a
82* process would receive if K were distributed over the q processes of
83* its process row.
84* The values of LOCr() and LOCc() may be determined via a call to the
85* ScaLAPACK tool function, NUMROC:
86* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
87* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
88* An upper bound for these quantities may be computed by:
89* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
90* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
91*
92* Arguments
93* =========
94*
95* DIRECT (global input) CHARACTER*1
96* Specifies the order in which the elementary reflectors are
97* multiplied to form the block reflector:
98* = 'F': H = H(1) H(2) . . . H(k) (Forward)
99* = 'B': H = H(k) . . . H(2) H(1) (Backward)
100*
101* STOREV (global input) CHARACTER*1
102* Specifies how the vectors which define the elementary
103* reflectors are stored (see also Further Details):
104* = 'C': columnwise
105* = 'R': rowwise
106*
107* N (global input) INTEGER
108* The order of the block reflector H. N >= 0.
109*
110* K (global input) INTEGER
111* The order of the triangular factor T (= the number of
112* elementary reflectors). 1 <= K <= MB_V (= NB_V).
113*
114* V (input/output) COMPLEX*16 pointer into the local memory
115* to an array of local dimension (LOCr(IV+N-1),LOCc(JV+K-1))
116* if STOREV = 'C', and (LOCr(IV+K-1),LOCc(JV+N-1)) if
117* STOREV = 'R'. The distributed matrix V contains the
118* Householder vectors. See further details.
119*
120* IV (global input) INTEGER
121* The row index in the global array V indicating the first
122* row of sub( V ).
123*
124* JV (global input) INTEGER
125* The column index in the global array V indicating the
126* first column of sub( V ).
127*
128* DESCV (global and local input) INTEGER array of dimension DLEN_.
129* The array descriptor for the distributed matrix V.
130*
131* TAU (local input) COMPLEX*16, array, dimension LOCr(IV+K-1)
132* if INCV = M_V, and LOCc(JV+K-1) otherwise. This array
133* contains the Householder scalars related to the Householder
134* vectors. TAU is tied to the distributed matrix V.
135*
136* T (local output) COMPLEX*16 array, dimension (NB_V,NB_V)
137* if STOREV = 'Col', and (MB_V,MB_V) otherwise. It contains
138* the k-by-k triangular factor of the block reflector asso-
139* ciated with V. If DIRECT = 'F', T is upper triangular;
140* if DIRECT = 'B', T is lower triangular.
141*
142* WORK (local workspace) COMPLEX*16 array,
143* dimension (K*(K-1)/2)
144*
145* Further Details
146* ===============
147*
148* The shape of the matrix V and the storage of the vectors which define
149* the H(i) is best illustrated by the following example with n = 5 and
150* k = 3. The elements equal to 1 are not stored; the corresponding
151* array elements are modified but restored on exit. The rest of the
152* array is not used.
153*
154* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
155*
156* V( IV:IV+N-1, ( 1 ) V( IV:IV+K-1, ( 1 v1 v1 v1 v1 )
157* JV:JV+K-1 ) = ( v1 1 ) JV:JV+N-1 ) = ( 1 v2 v2 v2 )
158* ( v1 v2 1 ) ( 1 v3 v3 )
159* ( v1 v2 v3 )
160* ( v1 v2 v3 )
161*
162* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
163*
164* V( IV:IV+N-1, ( v1 v2 v3 ) V( IV:IV+K-1, ( v1 v1 1 )
165* JV:JV+K-1 ) = ( v1 v2 v3 ) JV:JV+N-1 ) = ( v2 v2 v2 1 )
166* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
167* ( 1 v3 )
168* ( 1 )
169*
170* =====================================================================
171*
172* .. Parameters ..
173 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
174 $ lld_, mb_, m_, nb_, n_, rsrc_
175 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
176 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
177 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
178 COMPLEX*16 ONE, ZERO
179 parameter( one = ( 1.0d+0, 0.0d+0 ),
180 $ zero = ( 0.0d+0, 0.0d+0 ) )
181* ..
182* .. Local Scalars ..
183 LOGICAL FORWARD
184 INTEGER ICOFF, ICTXT, II, IIV, IROFF, IVCOL, IVROW,
185 $ itmp0, itmp1, iw, jj, jjv, ldv, micol, mirow,
186 $ mycol, myrow, np, npcol, nprow, nq
187 COMPLEX*16 VII
188* ..
189* .. External Subroutines ..
190 EXTERNAL blacs_gridinfo, infog2l, zcopy, zgemv,
191 $ zgsum2d, zlacgv, zlaset, ztrmv
192* ..
193* .. External Functions ..
194 LOGICAL LSAME
195 INTEGER INDXG2P, NUMROC
196 EXTERNAL indxg2p, lsame, numroc
197* ..
198* .. Intrinsic Functions ..
199 INTRINSIC mod
200* ..
201* .. Executable Statements ..
202*
203* Quick return if possible
204*
205 IF( n.LE.0 .OR. k.LE.0 )
206 $ RETURN
207*
208 ictxt = descv( ctxt_ )
209 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
210*
211 forward = lsame( direct, 'F' )
212 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol,
213 $ iiv, jjv, ivrow, ivcol )
214*
215 IF( lsame( storev, 'C' ) .AND. mycol.EQ.ivcol ) THEN
216*
217 iw = 1
218 ldv = descv( lld_ )
219 iroff = mod( iv-1, descv( mb_ ) )
220*
221 IF( forward ) THEN
222*
223* DIRECT = 'Forward', STOREV = 'Columnwise'
224*
225 np = numroc( n+iroff, descv( mb_ ), myrow, ivrow, nprow )
226 IF( myrow.EQ.ivrow ) THEN
227 np = np - iroff
228 ii = iiv + 1
229 ELSE
230 ii = iiv
231 END IF
232 IF( iroff+1.EQ.descv( mb_ ) ) THEN
233 mirow = mod( ivrow+1, nprow )
234 ELSE
235 mirow = ivrow
236 END IF
237 itmp0 = 0
238*
239 DO 10 jj = jjv+1, jjv+k-1
240*
241 IF( myrow.EQ.mirow ) THEN
242 vii = v( ii+(jj-1)*ldv )
243 v( ii+(jj-1)*ldv ) = one
244 END IF
245*
246* T(1:i-1,i) = -tau( jv+i-1 ) *
247* V(iv+i-1:iv+n-1,jv:jv+i-2)' * V(iv+i-1:iv+n-1,jv+i-1)
248*
249 itmp0 = itmp0 + 1
250 IF( np-ii+iiv.GT.0 ) THEN
251 CALL zgemv( 'Conjugate transpose', np-ii+iiv, itmp0,
252 $ -tau( jj ), v( ii+(jjv-1)*ldv ), ldv,
253 $ v( ii+(jj-1)*ldv ), 1, zero,
254 $ work( iw ), 1 )
255 ELSE
256 CALL zlaset( 'All', itmp0, 1, zero, zero, work( iw ),
257 $ itmp0 )
258 END IF
259*
260 iw = iw + itmp0
261 IF( myrow.EQ.mirow ) THEN
262 v( ii+(jj-1)*ldv ) = vii
263 ii = ii + 1
264 END IF
265*
266 IF( mod( iv+itmp0, descv( mb_ ) ).EQ.0 )
267 $ mirow = mod( mirow+1, nprow )
268*
269 10 CONTINUE
270*
271 CALL zgsum2d( ictxt, 'Columnwise', ' ', iw-1, 1, work, iw-1,
272 $ ivrow, mycol )
273*
274 IF( myrow.EQ.ivrow ) THEN
275*
276 iw = 1
277 itmp0 = 0
278 itmp1 = 1
279*
280 t( itmp1 ) = tau( jjv )
281*
282 DO 20 jj = jjv+1, jjv+k-1
283*
284* T(1:j-1,j) = T(1:j-1,1:j-1) * T(1:j-1,j)
285*
286 itmp0 = itmp0 + 1
287 itmp1 = itmp1 + descv( nb_ )
288 CALL zcopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
289 iw = iw + itmp0
290*
291 CALL ztrmv( 'Upper', 'No transpose', 'Non-unit',
292 $ itmp0, t, descv( nb_ ), t( itmp1 ), 1 )
293 t(itmp1+itmp0) = tau( jj )
294*
295 20 CONTINUE
296*
297 END IF
298*
299 ELSE
300*
301* DIRECT = 'Backward', STOREV = 'Columnwise'
302*
303 np = numroc( n+iroff-1, descv( mb_ ), myrow, ivrow, nprow )
304 IF( myrow.EQ.ivrow )
305 $ np = np - iroff
306 mirow = indxg2p( iv+n-2, descv( mb_ ), myrow,
307 $ descv( rsrc_ ), nprow )
308 ii = iiv + np - 1
309 itmp0 = 0
310*
311 DO 30 jj = jjv+k-2, jjv, -1
312*
313 IF( myrow.EQ.mirow ) THEN
314 vii = v( ii+(jj-1)*ldv )
315 v( ii+(jj-1)*ldv ) = one
316 END IF
317*
318* T(1:i-1,i) = -tau( jv+i-1 ) *
319* V(iv:iv+n-k+i-1,jv+i:jv+k-1)' * V(iv:iv+n-k+i-1,jv+i-1)
320*
321 itmp0 = itmp0 + 1
322 IF( ii-iiv+1.GT.0 ) THEN
323 CALL zgemv( 'Conjugate transpose', ii-iiv+1, itmp0,
324 $ -tau( jj ), v( iiv+jj*ldv ), ldv,
325 $ v( iiv+(jj-1)*ldv ), 1, zero,
326 $ work( iw ), 1 )
327 ELSE
328 CALL zlaset( 'All', itmp0, 1, zero, zero, work( iw ),
329 $ itmp0 )
330 END IF
331*
332 iw = iw + itmp0
333 IF( myrow.EQ.mirow ) THEN
334 v( ii+(jj-1)*ldv ) = vii
335 ii = ii - 1
336 END IF
337*
338 IF( mod( iv+n-itmp0-2, descv(mb_) ).EQ.0 )
339 $ mirow = mod( mirow+nprow-1, nprow )
340*
341 30 CONTINUE
342*
343 CALL zgsum2d( ictxt, 'Columnwise', ' ', iw-1, 1, work, iw-1,
344 $ ivrow, mycol )
345*
346 IF( myrow.EQ.ivrow ) THEN
347*
348 iw = 1
349 itmp0 = 0
350 itmp1 = k + 1 + (k-1) * descv( nb_ )
351*
352 t( itmp1-1 ) = tau( jjv+k-1 )
353*
354 DO 40 jj = jjv+k-2, jjv, -1
355*
356* T(j+1:k,j) = T(j+1:k,j+1:k) * T(j+1:k,j)
357*
358 itmp0 = itmp0 + 1
359 itmp1 = itmp1 - descv( nb_ ) - 1
360 CALL zcopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
361 iw = iw + itmp0
362*
363 CALL ztrmv( 'Lower', 'No transpose', 'Non-unit',
364 $ itmp0, t( itmp1+descv( nb_ ) ),
365 $ descv( nb_ ), t( itmp1 ), 1 )
366 t( itmp1-1 ) = tau( jj )
367*
368 40 CONTINUE
369*
370 END IF
371*
372 END IF
373*
374 ELSE IF( lsame( storev, 'R' ) .AND. myrow.EQ.ivrow ) THEN
375*
376 iw = 1
377 ldv = descv( lld_ )
378 icoff = mod( jv-1, descv( nb_ ) )
379*
380 IF( forward ) THEN
381*
382* DIRECT = 'Forward', STOREV = 'Rowwise'
383*
384 nq = numroc( n+icoff, descv( nb_ ), mycol, ivcol, npcol )
385 IF( mycol.EQ.ivcol ) THEN
386 nq = nq - icoff
387 jj = jjv + 1
388 ELSE
389 jj = jjv
390 END IF
391 IF( icoff+1.EQ.descv( nb_ ) ) THEN
392 micol = mod( ivcol+1, npcol )
393 ELSE
394 micol = ivcol
395 END IF
396 itmp0 = 0
397*
398 DO 50 ii = iiv+1, iiv+k-1
399*
400 IF( mycol.EQ.micol ) THEN
401 vii = v( ii+(jj-1)*ldv )
402 v( ii+(jj-1)*ldv ) = one
403 END IF
404*
405* T(1:i-1,i) = -tau( iv+i-1 ) *
406* V(iv+i-1,jv+i-1:jv+n-1) * V(iv:iv+i-2,jv+i-1:jv+n-1)'
407*
408 itmp0 = itmp0 + 1
409 IF( nq-jj+jjv.GT.0 ) THEN
410 CALL zlacgv( nq-jj+jjv, v( ii+(jj-1)*ldv ), ldv )
411 CALL zgemv( 'No transpose', itmp0, nq-jj+jjv,
412 $ -tau(ii), v( iiv+(jj-1)*ldv ), ldv,
413 $ v( ii+(jj-1)*ldv ), ldv, zero,
414 $ work( iw ), 1 )
415 CALL zlacgv( nq-jj+jjv, v( ii+(jj-1)*ldv ), ldv )
416 ELSE
417 CALL zlaset( 'All', itmp0, 1, zero, zero,
418 $ work( iw ), itmp0 )
419 END IF
420*
421 iw = iw + itmp0
422 IF( mycol.EQ.micol ) THEN
423 v( ii+(jj-1)*ldv ) = vii
424 jj = jj + 1
425 END IF
426*
427 IF( mod( jv+itmp0, descv( nb_ ) ).EQ.0 )
428 $ micol = mod( micol+1, npcol )
429*
430 50 CONTINUE
431*
432 CALL zgsum2d( ictxt, 'Rowwise', ' ', iw-1, 1, work, iw-1,
433 $ myrow, ivcol )
434*
435 IF( mycol.EQ.ivcol ) THEN
436*
437 iw = 1
438 itmp0 = 0
439 itmp1 = 1
440*
441 t( itmp1 ) = tau( iiv )
442*
443 DO 60 ii = iiv+1, iiv+k-1
444*
445* T(1:i-1,i) = T(1:i-1,1:i-1) * T(1:i-1,i)
446*
447 itmp0 = itmp0 + 1
448 itmp1 = itmp1 + descv( mb_ )
449 CALL zcopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
450 iw = iw + itmp0
451*
452 CALL ztrmv( 'Upper', 'No transpose', 'Non-unit',
453 $ itmp0, t, descv( mb_ ), t( itmp1 ), 1 )
454 t( itmp1+itmp0 ) = tau( ii )
455*
456 60 CONTINUE
457*
458 END IF
459*
460 ELSE
461*
462* DIRECT = 'Backward', STOREV = 'Rowwise'
463*
464 nq = numroc( n+icoff-1, descv( nb_ ), mycol, ivcol, npcol )
465 IF( mycol.EQ.ivcol )
466 $ nq = nq - icoff
467 micol = indxg2p( jv+n-2, descv( nb_ ), mycol,
468 $ descv( csrc_ ), npcol )
469 jj = jjv + nq - 1
470 itmp0 = 0
471*
472 DO 70 ii = iiv+k-2, iiv, -1
473*
474 IF( mycol.EQ.micol ) THEN
475 vii = v( ii+(jj-1)*ldv )
476 v( ii+(jj-1)*ldv ) = one
477 END IF
478*
479* T(i+1:k,i) = -tau( iv+i-1 ) *
480* V(iv+i:iv+k-1,jv:jv+n-k+i-1)' * V(iv+i-1,jv:jv+n-k+i-1)'
481*
482 itmp0 = itmp0 + 1
483 IF( jj-jjv+1.GT.0 ) THEN
484 CALL zlacgv( jj-jjv+1, v( ii+(jjv-1)*ldv ), ldv )
485 CALL zgemv( 'No transpose', itmp0, jj-jjv+1,
486 $ -tau( ii ), v( ii+1+(jjv-1)*ldv ), ldv,
487 $ v( ii+(jjv-1)*ldv ), ldv, zero,
488 $ work( iw ), 1 )
489 CALL zlacgv( jj-jjv+1, v( ii+(jjv-1)*ldv ), ldv )
490 ELSE
491 CALL zlaset( 'All', itmp0, 1, zero, zero,
492 $ work( iw ), itmp0 )
493 END IF
494*
495 iw = iw + itmp0
496 IF( mycol.EQ.micol ) THEN
497 v( ii+(jj-1)*ldv ) = vii
498 jj = jj - 1
499 END IF
500*
501 IF( mod( jv+n-itmp0-2, descv( nb_ ) ).EQ.0 )
502 $ micol = mod( micol+npcol-1, npcol )
503*
504 70 CONTINUE
505*
506 CALL zgsum2d( ictxt, 'Rowwise', ' ', iw-1, 1, work, iw-1,
507 $ myrow, ivcol )
508*
509 IF( mycol.EQ.ivcol ) THEN
510*
511 iw = 1
512 itmp0 = 0
513 itmp1 = k + 1 + (k-1) * descv( mb_ )
514*
515 t( itmp1-1 ) = tau( iiv+k-1 )
516*
517 DO 80 ii = iiv+k-2, iiv, -1
518*
519* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i)
520*
521 itmp0 = itmp0 + 1
522 itmp1 = itmp1 - descv( mb_ ) - 1
523 CALL zcopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
524 iw = iw + itmp0
525*
526 CALL ztrmv( 'Lower', 'No transpose', 'Non-unit',
527 $ itmp0, t( itmp1+descv( mb_ ) ),
528 $ descv( mb_ ), t( itmp1 ), 1 )
529 t( itmp1-1 ) = tau( ii )
530*
531 80 CONTINUE
532*
533 END IF
534*
535 END IF
536*
537 END IF
538*
539 RETURN
540*
541* End of PZLARFT
542*
543 END
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pzlarft(direct, storev, n, k, v, iv, jv, descv, tau, t, work)
Definition pzlarft.f:3