ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pslarft.f
Go to the documentation of this file.
1  SUBROUTINE pslarft( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU,
2  $ T, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER DIRECT, STOREV
11  INTEGER IV, JV, K, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCV( * )
15  REAL TAU( * ), T( * ), V( * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PSLARFT forms the triangular factor T of a real block reflector H
22 * of order n, which is defined as a product of k elementary reflectors.
23 *
24 * If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
25 *
26 * If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
27 *
28 * If STOREV = 'C', the vector which defines the elementary reflector
29 * H(i) is stored in the i-th column of the distributed matrix V, and
30 *
31 * H = I - V * T * V'
32 *
33 * If STOREV = 'R', the vector which defines the elementary reflector
34 * H(i) is stored in the i-th row of the distributed matrix V, and
35 *
36 * H = I - V' * T * V
37 *
38 * Notes
39 * =====
40 *
41 * Each global data object is described by an associated description
42 * vector. This vector stores the information required to establish
43 * the mapping between an object element and its corresponding process
44 * and memory location.
45 *
46 * Let A be a generic term for any 2D block cyclicly distributed array.
47 * Such a global array has an associated description vector DESCA.
48 * In the following comments, the character _ should be read as
49 * "of the global array".
50 *
51 * NOTATION STORED IN EXPLANATION
52 * --------------- -------------- --------------------------------------
53 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
54 * DTYPE_A = 1.
55 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
56 * the BLACS process grid A is distribu-
57 * ted over. The context itself is glo-
58 * bal, but the handle (the integer
59 * value) may vary.
60 * M_A (global) DESCA( M_ ) The number of rows in the global
61 * array A.
62 * N_A (global) DESCA( N_ ) The number of columns in the global
63 * array A.
64 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
65 * the rows of the array.
66 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
67 * the columns of the array.
68 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
69 * row of the array A is distributed.
70 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
71 * first column of the array A is
72 * distributed.
73 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
74 * array. LLD_A >= MAX(1,LOCr(M_A)).
75 *
76 * Let K be the number of rows or columns of a distributed matrix,
77 * and assume that its process grid has dimension p x q.
78 * LOCr( K ) denotes the number of elements of K that a process
79 * would receive if K were distributed over the p processes of its
80 * process column.
81 * Similarly, LOCc( K ) denotes the number of elements of K that a
82 * process would receive if K were distributed over the q processes of
83 * its process row.
84 * The values of LOCr() and LOCc() may be determined via a call to the
85 * ScaLAPACK tool function, NUMROC:
86 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
87 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
88 * An upper bound for these quantities may be computed by:
89 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
90 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
91 *
92 * Arguments
93 * =========
94 *
95 * DIRECT (global input) CHARACTER*1
96 * Specifies the order in which the elementary reflectors are
97 * multiplied to form the block reflector:
98 * = 'F': H = H(1) H(2) . . . H(k) (Forward)
99 * = 'B': H = H(k) . . . H(2) H(1) (Backward)
100 *
101 * STOREV (global input) CHARACTER*1
102 * Specifies how the vectors which define the elementary
103 * reflectors are stored (see also Further Details):
104 * = 'C': columnwise
105 * = 'R': rowwise
106 *
107 * N (global input) INTEGER
108 * The order of the block reflector H. N >= 0.
109 *
110 * K (global input) INTEGER
111 * The order of the triangular factor T (= the number of
112 * elementary reflectors). 1 <= K <= MB_V (= NB_V).
113 *
114 * V (input/output) REAL pointer into the local memory
115 * to an array of local dimension (LOCr(IV+N-1),LOCc(JV+K-1))
116 * if STOREV = 'C', and (LOCr(IV+K-1),LOCc(JV+N-1)) if
117 * STOREV = 'R'. The distributed matrix V contains the
118 * Householder vectors. See further details.
119 *
120 * IV (global input) INTEGER
121 * The row index in the global array V indicating the first
122 * row of sub( V ).
123 *
124 * JV (global input) INTEGER
125 * The column index in the global array V indicating the
126 * first column of sub( V ).
127 *
128 * DESCV (global and local input) INTEGER array of dimension DLEN_.
129 * The array descriptor for the distributed matrix V.
130 *
131 * TAU (local input) REAL, array, dimension LOCr(IV+K-1)
132 * if INCV = M_V, and LOCc(JV+K-1) otherwise. This array
133 * contains the Householder scalars related to the Householder
134 * vectors. TAU is tied to the distributed matrix V.
135 *
136 * T (local output) REAL array, dimension (NB_V,NB_V)
137 * if STOREV = 'Col', and (MB_V,MB_V) otherwise. It contains
138 * the k-by-k triangular factor of the block reflector asso-
139 * ciated with V. If DIRECT = 'F', T is upper triangular;
140 * if DIRECT = 'B', T is lower triangular.
141 *
142 * WORK (local workspace) REAL array,
143 * dimension (K*(K-1)/2)
144 *
145 * Further Details
146 * ===============
147 *
148 * The shape of the matrix V and the storage of the vectors which define
149 * the H(i) is best illustrated by the following example with n = 5 and
150 * k = 3. The elements equal to 1 are not stored; the corresponding
151 * array elements are modified but restored on exit. The rest of the
152 * array is not used.
153 *
154 * DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
155 *
156 * V( IV:IV+N-1, ( 1 ) V( IV:IV+K-1, ( 1 v1 v1 v1 v1 )
157 * JV:JV+K-1 ) = ( v1 1 ) JV:JV+N-1 ) = ( 1 v2 v2 v2 )
158 * ( v1 v2 1 ) ( 1 v3 v3 )
159 * ( v1 v2 v3 )
160 * ( v1 v2 v3 )
161 *
162 * DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
163 *
164 * V( IV:IV+N-1, ( v1 v2 v3 ) V( IV:IV+K-1, ( v1 v1 1 )
165 * JV:JV+K-1 ) = ( v1 v2 v3 ) JV:JV+N-1 ) = ( v2 v2 v2 1 )
166 * ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
167 * ( 1 v3 )
168 * ( 1 )
169 *
170 * =====================================================================
171 *
172 * .. Parameters ..
173  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
174  $ lld_, mb_, m_, nb_, n_, rsrc_
175  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
176  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
177  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
178  REAL ONE, ZERO
179  parameter( one = 1.0e+0, zero = 0.0e+0 )
180 * ..
181 * .. Local Scalars ..
182  LOGICAL FORWARD
183  INTEGER ICOFF, ICTXT, II, IIV, IROFF, IVCOL, IVROW,
184  $ itmp0, itmp1, iw, jj, jjv, ldv, micol, mirow,
185  $ mycol, myrow, np, npcol, nprow, nq
186  REAL VII
187 * ..
188 * .. External Subroutines ..
189  EXTERNAL blacs_gridinfo, infog2l, scopy, sgemv,
190  $ sgsum2d, slaset, strmv
191 * ..
192 * .. External Functions ..
193  LOGICAL LSAME
194  INTEGER INDXG2P, NUMROC
195  EXTERNAL indxg2p, lsame, numroc
196 * ..
197 * .. Intrinsic Functions ..
198  INTRINSIC mod
199 * ..
200 * .. Executable Statements ..
201 *
202 * Quick return if possible
203 *
204  IF( n.LE.0 .OR. k.LE.0 )
205  $ RETURN
206 *
207  ictxt = descv( ctxt_ )
208  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
209 *
210  forward = lsame( direct, 'F' )
211  CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol,
212  $ iiv, jjv, ivrow, ivcol )
213 *
214  IF( lsame( storev, 'C' ) .AND. mycol.EQ.ivcol ) THEN
215 *
216  iw = 1
217  ldv = descv( lld_ )
218  iroff = mod( iv-1, descv( mb_ ) )
219 *
220  IF( forward ) THEN
221 *
222 * DIRECT = 'Forward', STOREV = 'Columnwise'
223 *
224  np = numroc( n+iroff, descv( mb_ ), myrow, ivrow, nprow )
225  IF( myrow.EQ.ivrow ) THEN
226  np = np - iroff
227  ii = iiv + 1
228  ELSE
229  ii = iiv
230  END IF
231  IF( iroff+1.EQ.descv( mb_ ) ) THEN
232  mirow = mod( ivrow+1, nprow )
233  ELSE
234  mirow = ivrow
235  END IF
236  itmp0 = 0
237 *
238  DO 10 jj = jjv+1, jjv+k-1
239 *
240  IF( myrow.EQ.mirow ) THEN
241  vii = v( ii+(jj-1)*ldv )
242  v( ii+(jj-1)*ldv ) = one
243  END IF
244 *
245 * T(1:i-1,i) = -tau( jv+i-1 ) *
246 * V(iv+i-1:iv+n-1,jv:jv+i-2)' * V(iv+i-1:iv+n-1,jv+i-1)
247 *
248  itmp0 = itmp0 + 1
249  IF( np-ii+iiv.GT.0 ) THEN
250  CALL sgemv( 'Transpose', np-ii+iiv, itmp0,
251  $ -tau( jj ), v( ii+(jjv-1)*ldv ), ldv,
252  $ v( ii+(jj-1)*ldv ), 1, zero,
253  $ work( iw ), 1 )
254  ELSE
255  CALL slaset( 'All', itmp0, 1, zero, zero, work( iw ),
256  $ itmp0 )
257  END IF
258 *
259  iw = iw + itmp0
260  IF( myrow.EQ.mirow ) THEN
261  v( ii+(jj-1)*ldv ) = vii
262  ii = ii + 1
263  END IF
264 *
265  IF( mod( iv+itmp0, descv( mb_ ) ).EQ.0 )
266  $ mirow = mod( mirow+1, nprow )
267 *
268  10 CONTINUE
269 *
270  CALL sgsum2d( ictxt, 'Columnwise', ' ', iw-1, 1, work, iw-1,
271  $ ivrow, mycol )
272 *
273  IF( myrow.EQ.ivrow ) THEN
274 *
275  iw = 1
276  itmp0 = 0
277  itmp1 = 1
278 *
279  t( itmp1 ) = tau( jjv )
280 *
281  DO 20 jj = jjv+1, jjv+k-1
282 *
283 * T(1:j-1,j) = T(1:j-1,1:j-1) * T(1:j-1,j)
284 *
285  itmp0 = itmp0 + 1
286  itmp1 = itmp1 + descv( nb_ )
287  CALL scopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
288  iw = iw + itmp0
289 *
290  CALL strmv( 'Upper', 'No transpose', 'Non-unit',
291  $ itmp0, t, descv( nb_ ), t( itmp1 ), 1 )
292  t(itmp1+itmp0) = tau( jj )
293 *
294  20 CONTINUE
295 *
296  END IF
297 *
298  ELSE
299 *
300 * DIRECT = 'Backward', STOREV = 'Columnwise'
301 *
302  np = numroc( n+iroff-1, descv( mb_ ), myrow, ivrow, nprow )
303  IF( myrow.EQ.ivrow )
304  $ np = np - iroff
305  mirow = indxg2p( iv+n-2, descv( mb_ ), myrow,
306  $ descv( rsrc_ ), nprow )
307  ii = iiv + np - 1
308  itmp0 = 0
309 *
310  DO 30 jj = jjv+k-2, jjv, -1
311 *
312  IF( myrow.EQ.mirow ) THEN
313  vii = v( ii+(jj-1)*ldv )
314  v( ii+(jj-1)*ldv ) = one
315  END IF
316 *
317 * T(1:i-1,i) = -tau( jv+i-1 ) *
318 * V(iv:iv+n-k+i-1,jv+i:jv+k-1)' * V(iv:iv+n-k+i-1,jv+i-1)
319 *
320  itmp0 = itmp0 + 1
321  IF( ii-iiv+1.GT.0 ) THEN
322  CALL sgemv( 'Transpose', ii-iiv+1, itmp0, -tau( jj ),
323  $ v( iiv+jj*ldv ), ldv,
324  $ v( iiv+(jj-1)*ldv ), 1, zero,
325  $ work( iw ), 1 )
326  ELSE
327  CALL slaset( 'All', itmp0, 1, zero, zero, work( iw ),
328  $ itmp0 )
329  END IF
330 *
331  iw = iw + itmp0
332  IF( myrow.EQ.mirow ) THEN
333  v( ii+(jj-1)*ldv ) = vii
334  ii = ii - 1
335  END IF
336 *
337  IF( mod( iv+n-itmp0-2, descv(mb_) ).EQ.0 )
338  $ mirow = mod( mirow+nprow-1, nprow )
339 *
340  30 CONTINUE
341 *
342  CALL sgsum2d( ictxt, 'Columnwise', ' ', iw-1, 1, work, iw-1,
343  $ ivrow, mycol )
344 *
345  IF( myrow.EQ.ivrow ) THEN
346 *
347  iw = 1
348  itmp0 = 0
349  itmp1 = k + 1 + (k-1) * descv( nb_ )
350 *
351  t( itmp1-1 ) = tau( jjv+k-1 )
352 *
353  DO 40 jj = jjv+k-2, jjv, -1
354 *
355 * T(j+1:k,j) = T(j+1:k,j+1:k) * T(j+1:k,j)
356 *
357  itmp0 = itmp0 + 1
358  itmp1 = itmp1 - descv( nb_ ) - 1
359  CALL scopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
360  iw = iw + itmp0
361 *
362  CALL strmv( 'Lower', 'No transpose', 'Non-unit',
363  $ itmp0, t( itmp1+descv( nb_ ) ),
364  $ descv( nb_ ), t( itmp1 ), 1 )
365  t( itmp1-1 ) = tau( jj )
366 *
367  40 CONTINUE
368 *
369  END IF
370 *
371  END IF
372 *
373  ELSE IF( lsame( storev, 'R' ) .AND. myrow.EQ.ivrow ) THEN
374 *
375  iw = 1
376  ldv = descv( lld_ )
377  icoff = mod( jv-1, descv( nb_ ) )
378 *
379  IF( forward ) THEN
380 *
381 * DIRECT = 'Forward', STOREV = 'Rowwise'
382 *
383  nq = numroc( n+icoff, descv( nb_ ), mycol, ivcol, npcol )
384  IF( mycol.EQ.ivcol ) THEN
385  nq = nq - icoff
386  jj = jjv + 1
387  ELSE
388  jj = jjv
389  END IF
390  IF( icoff+1.EQ.descv( nb_ ) ) THEN
391  micol = mod( ivcol+1, npcol )
392  ELSE
393  micol = ivcol
394  END IF
395  itmp0 = 0
396 *
397  DO 50 ii = iiv+1, iiv+k-1
398 *
399  IF( mycol.EQ.micol ) THEN
400  vii = v( ii+(jj-1)*ldv )
401  v( ii+(jj-1)*ldv ) = one
402  END IF
403 *
404 * T(1:i-1,i) = -tau( iv+i-1 ) *
405 * V(iv+i-1,jv+i-1:jv+n-1) * V(iv:iv+i-2,jv+i-1:jv+n-1)'
406 *
407  itmp0 = itmp0 + 1
408  IF( nq-jj+jjv.GT.0 ) THEN
409  CALL sgemv( 'No transpose', itmp0, nq-jj+jjv,
410  $ -tau(ii), v( iiv+(jj-1)*ldv ), ldv,
411  $ v( ii+(jj-1)*ldv ), ldv, zero,
412  $ work( iw ), 1 )
413  ELSE
414  CALL slaset( 'All', itmp0, 1, zero, zero,
415  $ work( iw ), itmp0 )
416  END IF
417 *
418  iw = iw + itmp0
419  IF( mycol.EQ.micol ) THEN
420  v( ii+(jj-1)*ldv ) = vii
421  jj = jj + 1
422  END IF
423 *
424  IF( mod( jv+itmp0, descv( nb_ ) ).EQ.0 )
425  $ micol = mod( micol+1, npcol )
426 *
427  50 CONTINUE
428 *
429  CALL sgsum2d( ictxt, 'Rowwise', ' ', iw-1, 1, work, iw-1,
430  $ myrow, ivcol )
431 *
432  IF( mycol.EQ.ivcol ) THEN
433 *
434  iw = 1
435  itmp0 = 0
436  itmp1 = 1
437 *
438  t( itmp1 ) = tau( iiv )
439 *
440  DO 60 ii = iiv+1, iiv+k-1
441 *
442 * T(1:i-1,i) = T(1:i-1,1:i-1) * T(1:i-1,i)
443 *
444  itmp0 = itmp0 + 1
445  itmp1 = itmp1 + descv( mb_ )
446  CALL scopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
447  iw = iw + itmp0
448 *
449  CALL strmv( 'Upper', 'No transpose', 'Non-unit',
450  $ itmp0, t, descv( mb_ ), t( itmp1 ), 1 )
451  t( itmp1+itmp0 ) = tau( ii )
452 *
453  60 CONTINUE
454 *
455  END IF
456 *
457  ELSE
458 *
459 * DIRECT = 'Backward', STOREV = 'Rowwise'
460 *
461  nq = numroc( n+icoff-1, descv( nb_ ), mycol, ivcol, npcol )
462  IF( mycol.EQ.ivcol )
463  $ nq = nq - icoff
464  micol = indxg2p( jv+n-2, descv( nb_ ), mycol,
465  $ descv( csrc_ ), npcol )
466  jj = jjv + nq - 1
467  itmp0 = 0
468 *
469  DO 70 ii = iiv+k-2, iiv, -1
470 *
471  IF( mycol.EQ.micol ) THEN
472  vii = v( ii+(jj-1)*ldv )
473  v( ii+(jj-1)*ldv ) = one
474  END IF
475 *
476 * T(i+1:k,i) = -tau( iv+i-1 ) *
477 * V(iv+i:iv+k-1,jv:jv+n-k+i-1)' * V(iv+i-1,jv:jv+n-k+i-1)'
478 *
479  itmp0 = itmp0 + 1
480  IF( jj-jjv+1.GT.0 ) THEN
481  CALL sgemv( 'No transpose', itmp0, jj-jjv+1,
482  $ -tau( ii ), v( ii+1+(jjv-1)*ldv ), ldv,
483  $ v( ii+(jjv-1)*ldv ), ldv, zero,
484  $ work( iw ), 1 )
485  ELSE
486  CALL slaset( 'All', itmp0, 1, zero, zero,
487  $ work( iw ), itmp0 )
488  END IF
489 *
490  iw = iw + itmp0
491  IF( mycol.EQ.micol ) THEN
492  v( ii+(jj-1)*ldv ) = vii
493  jj = jj - 1
494  END IF
495 *
496  IF( mod( jv+n-itmp0-2, descv( nb_ ) ).EQ.0 )
497  $ micol = mod( micol+npcol-1, npcol )
498 *
499  70 CONTINUE
500 *
501  CALL sgsum2d( ictxt, 'Rowwise', ' ', iw-1, 1, work, iw-1,
502  $ myrow, ivcol )
503 *
504  IF( mycol.EQ.ivcol ) THEN
505 *
506  iw = 1
507  itmp0 = 0
508  itmp1 = k + 1 + (k-1) * descv( mb_ )
509 *
510  t( itmp1-1 ) = tau( iiv+k-1 )
511 *
512  DO 80 ii = iiv+k-2, iiv, -1
513 *
514 * T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i)
515 *
516  itmp0 = itmp0 + 1
517  itmp1 = itmp1 - descv( mb_ ) - 1
518  CALL scopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
519  iw = iw + itmp0
520 *
521  CALL strmv( 'Lower', 'No transpose', 'Non-unit',
522  $ itmp0, t( itmp1+descv( mb_ ) ),
523  $ descv( mb_ ), t( itmp1 ), 1 )
524  t( itmp1-1 ) = tau( ii )
525 *
526  80 CONTINUE
527 *
528  END IF
529 *
530  END IF
531 *
532  END IF
533 *
534  RETURN
535 *
536 * End of PSLARFT
537 *
538  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pslarft
subroutine pslarft(DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, T, WORK)
Definition: pslarft.f:3