SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pslarft.f
Go to the documentation of this file.
1 SUBROUTINE pslarft( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU,
2 $ T, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 CHARACTER DIRECT, STOREV
11 INTEGER IV, JV, K, N
12* ..
13* .. Array Arguments ..
14 INTEGER DESCV( * )
15 REAL TAU( * ), T( * ), V( * ), WORK( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PSLARFT forms the triangular factor T of a real block reflector H
22* of order n, which is defined as a product of k elementary reflectors.
23*
24* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
25*
26* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
27*
28* If STOREV = 'C', the vector which defines the elementary reflector
29* H(i) is stored in the i-th column of the distributed matrix V, and
30*
31* H = I - V * T * V'
32*
33* If STOREV = 'R', the vector which defines the elementary reflector
34* H(i) is stored in the i-th row of the distributed matrix V, and
35*
36* H = I - V' * T * V
37*
38* Notes
39* =====
40*
41* Each global data object is described by an associated description
42* vector. This vector stores the information required to establish
43* the mapping between an object element and its corresponding process
44* and memory location.
45*
46* Let A be a generic term for any 2D block cyclicly distributed array.
47* Such a global array has an associated description vector DESCA.
48* In the following comments, the character _ should be read as
49* "of the global array".
50*
51* NOTATION STORED IN EXPLANATION
52* --------------- -------------- --------------------------------------
53* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
54* DTYPE_A = 1.
55* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
56* the BLACS process grid A is distribu-
57* ted over. The context itself is glo-
58* bal, but the handle (the integer
59* value) may vary.
60* M_A (global) DESCA( M_ ) The number of rows in the global
61* array A.
62* N_A (global) DESCA( N_ ) The number of columns in the global
63* array A.
64* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
65* the rows of the array.
66* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
67* the columns of the array.
68* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
69* row of the array A is distributed.
70* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
71* first column of the array A is
72* distributed.
73* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
74* array. LLD_A >= MAX(1,LOCr(M_A)).
75*
76* Let K be the number of rows or columns of a distributed matrix,
77* and assume that its process grid has dimension p x q.
78* LOCr( K ) denotes the number of elements of K that a process
79* would receive if K were distributed over the p processes of its
80* process column.
81* Similarly, LOCc( K ) denotes the number of elements of K that a
82* process would receive if K were distributed over the q processes of
83* its process row.
84* The values of LOCr() and LOCc() may be determined via a call to the
85* ScaLAPACK tool function, NUMROC:
86* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
87* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
88* An upper bound for these quantities may be computed by:
89* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
90* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
91*
92* Arguments
93* =========
94*
95* DIRECT (global input) CHARACTER*1
96* Specifies the order in which the elementary reflectors are
97* multiplied to form the block reflector:
98* = 'F': H = H(1) H(2) . . . H(k) (Forward)
99* = 'B': H = H(k) . . . H(2) H(1) (Backward)
100*
101* STOREV (global input) CHARACTER*1
102* Specifies how the vectors which define the elementary
103* reflectors are stored (see also Further Details):
104* = 'C': columnwise
105* = 'R': rowwise
106*
107* N (global input) INTEGER
108* The order of the block reflector H. N >= 0.
109*
110* K (global input) INTEGER
111* The order of the triangular factor T (= the number of
112* elementary reflectors). 1 <= K <= MB_V (= NB_V).
113*
114* V (input/output) REAL pointer into the local memory
115* to an array of local dimension (LOCr(IV+N-1),LOCc(JV+K-1))
116* if STOREV = 'C', and (LOCr(IV+K-1),LOCc(JV+N-1)) if
117* STOREV = 'R'. The distributed matrix V contains the
118* Householder vectors. See further details.
119*
120* IV (global input) INTEGER
121* The row index in the global array V indicating the first
122* row of sub( V ).
123*
124* JV (global input) INTEGER
125* The column index in the global array V indicating the
126* first column of sub( V ).
127*
128* DESCV (global and local input) INTEGER array of dimension DLEN_.
129* The array descriptor for the distributed matrix V.
130*
131* TAU (local input) REAL, array, dimension LOCr(IV+K-1)
132* if INCV = M_V, and LOCc(JV+K-1) otherwise. This array
133* contains the Householder scalars related to the Householder
134* vectors. TAU is tied to the distributed matrix V.
135*
136* T (local output) REAL array, dimension (NB_V,NB_V)
137* if STOREV = 'Col', and (MB_V,MB_V) otherwise. It contains
138* the k-by-k triangular factor of the block reflector asso-
139* ciated with V. If DIRECT = 'F', T is upper triangular;
140* if DIRECT = 'B', T is lower triangular.
141*
142* WORK (local workspace) REAL array,
143* dimension (K*(K-1)/2)
144*
145* Further Details
146* ===============
147*
148* The shape of the matrix V and the storage of the vectors which define
149* the H(i) is best illustrated by the following example with n = 5 and
150* k = 3. The elements equal to 1 are not stored; the corresponding
151* array elements are modified but restored on exit. The rest of the
152* array is not used.
153*
154* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
155*
156* V( IV:IV+N-1, ( 1 ) V( IV:IV+K-1, ( 1 v1 v1 v1 v1 )
157* JV:JV+K-1 ) = ( v1 1 ) JV:JV+N-1 ) = ( 1 v2 v2 v2 )
158* ( v1 v2 1 ) ( 1 v3 v3 )
159* ( v1 v2 v3 )
160* ( v1 v2 v3 )
161*
162* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
163*
164* V( IV:IV+N-1, ( v1 v2 v3 ) V( IV:IV+K-1, ( v1 v1 1 )
165* JV:JV+K-1 ) = ( v1 v2 v3 ) JV:JV+N-1 ) = ( v2 v2 v2 1 )
166* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
167* ( 1 v3 )
168* ( 1 )
169*
170* =====================================================================
171*
172* .. Parameters ..
173 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
174 $ lld_, mb_, m_, nb_, n_, rsrc_
175 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
176 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
177 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
178 REAL ONE, ZERO
179 parameter( one = 1.0e+0, zero = 0.0e+0 )
180* ..
181* .. Local Scalars ..
182 LOGICAL FORWARD
183 INTEGER ICOFF, ICTXT, II, IIV, IROFF, IVCOL, IVROW,
184 $ itmp0, itmp1, iw, jj, jjv, ldv, micol, mirow,
185 $ mycol, myrow, np, npcol, nprow, nq
186 REAL VII
187* ..
188* .. External Subroutines ..
189 EXTERNAL blacs_gridinfo, infog2l, scopy, sgemv,
190 $ sgsum2d, slaset, strmv
191* ..
192* .. External Functions ..
193 LOGICAL LSAME
194 INTEGER INDXG2P, NUMROC
195 EXTERNAL indxg2p, lsame, numroc
196* ..
197* .. Intrinsic Functions ..
198 INTRINSIC mod
199* ..
200* .. Executable Statements ..
201*
202* Quick return if possible
203*
204 IF( n.LE.0 .OR. k.LE.0 )
205 $ RETURN
206*
207 ictxt = descv( ctxt_ )
208 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
209*
210 forward = lsame( direct, 'F' )
211 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol,
212 $ iiv, jjv, ivrow, ivcol )
213*
214 IF( lsame( storev, 'C' ) .AND. mycol.EQ.ivcol ) THEN
215*
216 iw = 1
217 ldv = descv( lld_ )
218 iroff = mod( iv-1, descv( mb_ ) )
219*
220 IF( forward ) THEN
221*
222* DIRECT = 'Forward', STOREV = 'Columnwise'
223*
224 np = numroc( n+iroff, descv( mb_ ), myrow, ivrow, nprow )
225 IF( myrow.EQ.ivrow ) THEN
226 np = np - iroff
227 ii = iiv + 1
228 ELSE
229 ii = iiv
230 END IF
231 IF( iroff+1.EQ.descv( mb_ ) ) THEN
232 mirow = mod( ivrow+1, nprow )
233 ELSE
234 mirow = ivrow
235 END IF
236 itmp0 = 0
237*
238 DO 10 jj = jjv+1, jjv+k-1
239*
240 IF( myrow.EQ.mirow ) THEN
241 vii = v( ii+(jj-1)*ldv )
242 v( ii+(jj-1)*ldv ) = one
243 END IF
244*
245* T(1:i-1,i) = -tau( jv+i-1 ) *
246* V(iv+i-1:iv+n-1,jv:jv+i-2)' * V(iv+i-1:iv+n-1,jv+i-1)
247*
248 itmp0 = itmp0 + 1
249 IF( np-ii+iiv.GT.0 ) THEN
250 CALL sgemv( 'Transpose', np-ii+iiv, itmp0,
251 $ -tau( jj ), v( ii+(jjv-1)*ldv ), ldv,
252 $ v( ii+(jj-1)*ldv ), 1, zero,
253 $ work( iw ), 1 )
254 ELSE
255 CALL slaset( 'All', itmp0, 1, zero, zero, work( iw ),
256 $ itmp0 )
257 END IF
258*
259 iw = iw + itmp0
260 IF( myrow.EQ.mirow ) THEN
261 v( ii+(jj-1)*ldv ) = vii
262 ii = ii + 1
263 END IF
264*
265 IF( mod( iv+itmp0, descv( mb_ ) ).EQ.0 )
266 $ mirow = mod( mirow+1, nprow )
267*
268 10 CONTINUE
269*
270 CALL sgsum2d( ictxt, 'Columnwise', ' ', iw-1, 1, work, iw-1,
271 $ ivrow, mycol )
272*
273 IF( myrow.EQ.ivrow ) THEN
274*
275 iw = 1
276 itmp0 = 0
277 itmp1 = 1
278*
279 t( itmp1 ) = tau( jjv )
280*
281 DO 20 jj = jjv+1, jjv+k-1
282*
283* T(1:j-1,j) = T(1:j-1,1:j-1) * T(1:j-1,j)
284*
285 itmp0 = itmp0 + 1
286 itmp1 = itmp1 + descv( nb_ )
287 CALL scopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
288 iw = iw + itmp0
289*
290 CALL strmv( 'Upper', 'No transpose', 'Non-unit',
291 $ itmp0, t, descv( nb_ ), t( itmp1 ), 1 )
292 t(itmp1+itmp0) = tau( jj )
293*
294 20 CONTINUE
295*
296 END IF
297*
298 ELSE
299*
300* DIRECT = 'Backward', STOREV = 'Columnwise'
301*
302 np = numroc( n+iroff-1, descv( mb_ ), myrow, ivrow, nprow )
303 IF( myrow.EQ.ivrow )
304 $ np = np - iroff
305 mirow = indxg2p( iv+n-2, descv( mb_ ), myrow,
306 $ descv( rsrc_ ), nprow )
307 ii = iiv + np - 1
308 itmp0 = 0
309*
310 DO 30 jj = jjv+k-2, jjv, -1
311*
312 IF( myrow.EQ.mirow ) THEN
313 vii = v( ii+(jj-1)*ldv )
314 v( ii+(jj-1)*ldv ) = one
315 END IF
316*
317* T(1:i-1,i) = -tau( jv+i-1 ) *
318* V(iv:iv+n-k+i-1,jv+i:jv+k-1)' * V(iv:iv+n-k+i-1,jv+i-1)
319*
320 itmp0 = itmp0 + 1
321 IF( ii-iiv+1.GT.0 ) THEN
322 CALL sgemv( 'Transpose', ii-iiv+1, itmp0, -tau( jj ),
323 $ v( iiv+jj*ldv ), ldv,
324 $ v( iiv+(jj-1)*ldv ), 1, zero,
325 $ work( iw ), 1 )
326 ELSE
327 CALL slaset( 'All', itmp0, 1, zero, zero, work( iw ),
328 $ itmp0 )
329 END IF
330*
331 iw = iw + itmp0
332 IF( myrow.EQ.mirow ) THEN
333 v( ii+(jj-1)*ldv ) = vii
334 ii = ii - 1
335 END IF
336*
337 IF( mod( iv+n-itmp0-2, descv(mb_) ).EQ.0 )
338 $ mirow = mod( mirow+nprow-1, nprow )
339*
340 30 CONTINUE
341*
342 CALL sgsum2d( ictxt, 'Columnwise', ' ', iw-1, 1, work, iw-1,
343 $ ivrow, mycol )
344*
345 IF( myrow.EQ.ivrow ) THEN
346*
347 iw = 1
348 itmp0 = 0
349 itmp1 = k + 1 + (k-1) * descv( nb_ )
350*
351 t( itmp1-1 ) = tau( jjv+k-1 )
352*
353 DO 40 jj = jjv+k-2, jjv, -1
354*
355* T(j+1:k,j) = T(j+1:k,j+1:k) * T(j+1:k,j)
356*
357 itmp0 = itmp0 + 1
358 itmp1 = itmp1 - descv( nb_ ) - 1
359 CALL scopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
360 iw = iw + itmp0
361*
362 CALL strmv( 'Lower', 'No transpose', 'Non-unit',
363 $ itmp0, t( itmp1+descv( nb_ ) ),
364 $ descv( nb_ ), t( itmp1 ), 1 )
365 t( itmp1-1 ) = tau( jj )
366*
367 40 CONTINUE
368*
369 END IF
370*
371 END IF
372*
373 ELSE IF( lsame( storev, 'R' ) .AND. myrow.EQ.ivrow ) THEN
374*
375 iw = 1
376 ldv = descv( lld_ )
377 icoff = mod( jv-1, descv( nb_ ) )
378*
379 IF( forward ) THEN
380*
381* DIRECT = 'Forward', STOREV = 'Rowwise'
382*
383 nq = numroc( n+icoff, descv( nb_ ), mycol, ivcol, npcol )
384 IF( mycol.EQ.ivcol ) THEN
385 nq = nq - icoff
386 jj = jjv + 1
387 ELSE
388 jj = jjv
389 END IF
390 IF( icoff+1.EQ.descv( nb_ ) ) THEN
391 micol = mod( ivcol+1, npcol )
392 ELSE
393 micol = ivcol
394 END IF
395 itmp0 = 0
396*
397 DO 50 ii = iiv+1, iiv+k-1
398*
399 IF( mycol.EQ.micol ) THEN
400 vii = v( ii+(jj-1)*ldv )
401 v( ii+(jj-1)*ldv ) = one
402 END IF
403*
404* T(1:i-1,i) = -tau( iv+i-1 ) *
405* V(iv+i-1,jv+i-1:jv+n-1) * V(iv:iv+i-2,jv+i-1:jv+n-1)'
406*
407 itmp0 = itmp0 + 1
408 IF( nq-jj+jjv.GT.0 ) THEN
409 CALL sgemv( 'No transpose', itmp0, nq-jj+jjv,
410 $ -tau(ii), v( iiv+(jj-1)*ldv ), ldv,
411 $ v( ii+(jj-1)*ldv ), ldv, zero,
412 $ work( iw ), 1 )
413 ELSE
414 CALL slaset( 'All', itmp0, 1, zero, zero,
415 $ work( iw ), itmp0 )
416 END IF
417*
418 iw = iw + itmp0
419 IF( mycol.EQ.micol ) THEN
420 v( ii+(jj-1)*ldv ) = vii
421 jj = jj + 1
422 END IF
423*
424 IF( mod( jv+itmp0, descv( nb_ ) ).EQ.0 )
425 $ micol = mod( micol+1, npcol )
426*
427 50 CONTINUE
428*
429 CALL sgsum2d( ictxt, 'Rowwise', ' ', iw-1, 1, work, iw-1,
430 $ myrow, ivcol )
431*
432 IF( mycol.EQ.ivcol ) THEN
433*
434 iw = 1
435 itmp0 = 0
436 itmp1 = 1
437*
438 t( itmp1 ) = tau( iiv )
439*
440 DO 60 ii = iiv+1, iiv+k-1
441*
442* T(1:i-1,i) = T(1:i-1,1:i-1) * T(1:i-1,i)
443*
444 itmp0 = itmp0 + 1
445 itmp1 = itmp1 + descv( mb_ )
446 CALL scopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
447 iw = iw + itmp0
448*
449 CALL strmv( 'Upper', 'No transpose', 'Non-unit',
450 $ itmp0, t, descv( mb_ ), t( itmp1 ), 1 )
451 t( itmp1+itmp0 ) = tau( ii )
452*
453 60 CONTINUE
454*
455 END IF
456*
457 ELSE
458*
459* DIRECT = 'Backward', STOREV = 'Rowwise'
460*
461 nq = numroc( n+icoff-1, descv( nb_ ), mycol, ivcol, npcol )
462 IF( mycol.EQ.ivcol )
463 $ nq = nq - icoff
464 micol = indxg2p( jv+n-2, descv( nb_ ), mycol,
465 $ descv( csrc_ ), npcol )
466 jj = jjv + nq - 1
467 itmp0 = 0
468*
469 DO 70 ii = iiv+k-2, iiv, -1
470*
471 IF( mycol.EQ.micol ) THEN
472 vii = v( ii+(jj-1)*ldv )
473 v( ii+(jj-1)*ldv ) = one
474 END IF
475*
476* T(i+1:k,i) = -tau( iv+i-1 ) *
477* V(iv+i:iv+k-1,jv:jv+n-k+i-1)' * V(iv+i-1,jv:jv+n-k+i-1)'
478*
479 itmp0 = itmp0 + 1
480 IF( jj-jjv+1.GT.0 ) THEN
481 CALL sgemv( 'No transpose', itmp0, jj-jjv+1,
482 $ -tau( ii ), v( ii+1+(jjv-1)*ldv ), ldv,
483 $ v( ii+(jjv-1)*ldv ), ldv, zero,
484 $ work( iw ), 1 )
485 ELSE
486 CALL slaset( 'All', itmp0, 1, zero, zero,
487 $ work( iw ), itmp0 )
488 END IF
489*
490 iw = iw + itmp0
491 IF( mycol.EQ.micol ) THEN
492 v( ii+(jj-1)*ldv ) = vii
493 jj = jj - 1
494 END IF
495*
496 IF( mod( jv+n-itmp0-2, descv( nb_ ) ).EQ.0 )
497 $ micol = mod( micol+npcol-1, npcol )
498*
499 70 CONTINUE
500*
501 CALL sgsum2d( ictxt, 'Rowwise', ' ', iw-1, 1, work, iw-1,
502 $ myrow, ivcol )
503*
504 IF( mycol.EQ.ivcol ) THEN
505*
506 iw = 1
507 itmp0 = 0
508 itmp1 = k + 1 + (k-1) * descv( mb_ )
509*
510 t( itmp1-1 ) = tau( iiv+k-1 )
511*
512 DO 80 ii = iiv+k-2, iiv, -1
513*
514* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i)
515*
516 itmp0 = itmp0 + 1
517 itmp1 = itmp1 - descv( mb_ ) - 1
518 CALL scopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
519 iw = iw + itmp0
520*
521 CALL strmv( 'Lower', 'No transpose', 'Non-unit',
522 $ itmp0, t( itmp1+descv( mb_ ) ),
523 $ descv( mb_ ), t( itmp1 ), 1 )
524 t( itmp1-1 ) = tau( ii )
525*
526 80 CONTINUE
527*
528 END IF
529*
530 END IF
531*
532 END IF
533*
534 RETURN
535*
536* End of PSLARFT
537*
538 END
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pslarft(direct, storev, n, k, v, iv, jv, descv, tau, t, work)
Definition pslarft.f:3