ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pslarfg.f
Go to the documentation of this file.
1  SUBROUTINE pslarfg( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX,
2  $ TAU )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IAX, INCX, IX, JAX, JX, N
11  REAL ALPHA
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCX( * )
15  REAL TAU( * ), X( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PSLARFG generates a real elementary reflector H of order n, such
22 * that
23 *
24 * H * sub( X ) = H * ( x(iax,jax) ) = ( alpha ), H' * H = I.
25 * ( x ) ( 0 )
26 *
27 * where alpha is a scalar, and sub( X ) is an (N-1)-element real
28 * distributed vector X(IX:IX+N-2,JX) if INCX = 1 and X(IX,JX:JX+N-2) if
29 * INCX = DESCX(M_). H is represented in the form
30 *
31 * H = I - tau * ( 1 ) * ( 1 v' ) ,
32 * ( v )
33 *
34 * where tau is a real scalar and v is a real (N-1)-element
35 * vector.
36 *
37 * If the elements of sub( X ) are all zero, then tau = 0 and H is
38 * taken to be the unit matrix.
39 *
40 * Otherwise 1 <= tau <= 2.
41 *
42 * Notes
43 * =====
44 *
45 * Each global data object is described by an associated description
46 * vector. This vector stores the information required to establish
47 * the mapping between an object element and its corresponding process
48 * and memory location.
49 *
50 * Let A be a generic term for any 2D block cyclicly distributed array.
51 * Such a global array has an associated description vector DESCA.
52 * In the following comments, the character _ should be read as
53 * "of the global array".
54 *
55 * NOTATION STORED IN EXPLANATION
56 * --------------- -------------- --------------------------------------
57 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
58 * DTYPE_A = 1.
59 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
60 * the BLACS process grid A is distribu-
61 * ted over. The context itself is glo-
62 * bal, but the handle (the integer
63 * value) may vary.
64 * M_A (global) DESCA( M_ ) The number of rows in the global
65 * array A.
66 * N_A (global) DESCA( N_ ) The number of columns in the global
67 * array A.
68 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
69 * the rows of the array.
70 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
71 * the columns of the array.
72 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
73 * row of the array A is distributed.
74 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
75 * first column of the array A is
76 * distributed.
77 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
78 * array. LLD_A >= MAX(1,LOCr(M_A)).
79 *
80 * Let K be the number of rows or columns of a distributed matrix,
81 * and assume that its process grid has dimension p x q.
82 * LOCr( K ) denotes the number of elements of K that a process
83 * would receive if K were distributed over the p processes of its
84 * process column.
85 * Similarly, LOCc( K ) denotes the number of elements of K that a
86 * process would receive if K were distributed over the q processes of
87 * its process row.
88 * The values of LOCr() and LOCc() may be determined via a call to the
89 * ScaLAPACK tool function, NUMROC:
90 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
91 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
92 * An upper bound for these quantities may be computed by:
93 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
94 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
95 *
96 * Because vectors may be viewed as a subclass of matrices, a
97 * distributed vector is considered to be a distributed matrix.
98 *
99 * Arguments
100 * =========
101 *
102 * N (global input) INTEGER
103 * The global order of the elementary reflector. N >= 0.
104 *
105 * ALPHA (local output) REAL
106 * On exit, alpha is computed in the process scope having the
107 * vector sub( X ).
108 *
109 * IAX (global input) INTEGER
110 * The global row index in X of X(IAX,JAX).
111 *
112 * JAX (global input) INTEGER
113 * The global column index in X of X(IAX,JAX).
114 *
115 * X (local input/local output) REAL, pointer into the
116 * local memory to an array of dimension (LLD_X,*). This array
117 * contains the local pieces of the distributed vector sub( X ).
118 * Before entry, the incremented array sub( X ) must contain
119 * the vector x. On exit, it is overwritten with the vector v.
120 *
121 * IX (global input) INTEGER
122 * The row index in the global array X indicating the first
123 * row of sub( X ).
124 *
125 * JX (global input) INTEGER
126 * The column index in the global array X indicating the
127 * first column of sub( X ).
128 *
129 * DESCX (global and local input) INTEGER array of dimension DLEN_.
130 * The array descriptor for the distributed matrix X.
131 *
132 * INCX (global input) INTEGER
133 * The global increment for the elements of X. Only two values
134 * of INCX are supported in this version, namely 1 and M_X.
135 * INCX must not be zero.
136 *
137 * TAU (local output) REAL, array, dimension LOCc(JX)
138 * if INCX = 1, and LOCr(IX) otherwise. This array contains the
139 * Householder scalars related to the Householder vectors.
140 * TAU is tied to the distributed matrix X.
141 *
142 * =====================================================================
143 *
144 * .. Parameters ..
145  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
146  $ lld_, mb_, m_, nb_, n_, rsrc_
147  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
148  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
149  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
150  REAL ONE, ZERO
151  parameter( one = 1.0e+0, zero = 0.0e+0 )
152 * ..
153 * .. Local Scalars ..
154  INTEGER ICTXT, IIAX, INDXTAU, IXCOL, IXROW, J, JJAX,
155  $ knt, mycol, myrow, npcol, nprow
156  REAL BETA, RSAFMN, SAFMIN, XNORM
157 * ..
158 * .. External Subroutines ..
159  EXTERNAL blacs_gridinfo, infog2l, psnrm2, sgebr2d,
160  $ sgebs2d, psscal
161 * ..
162 * .. External Functions ..
163  REAL SLAMCH, SLAPY2
164  EXTERNAL slamch, slapy2
165 * ..
166 * .. Intrinsic Functions ..
167  INTRINSIC abs, sign
168 * ..
169 * .. Executable Statements ..
170 *
171 * Get grid parameters.
172 *
173  ictxt = descx( ctxt_ )
174  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
175 *
176  IF( incx.EQ.descx( m_ ) ) THEN
177 *
178 * sub( X ) is distributed across a process row.
179 *
180  CALL infog2l( ix, jax, descx, nprow, npcol, myrow, mycol,
181  $ iiax, jjax, ixrow, ixcol )
182 *
183  IF( myrow.NE.ixrow )
184  $ RETURN
185 *
186 * Broadcast X(IAX,JAX) across the process row.
187 *
188  IF( mycol.EQ.ixcol ) THEN
189  j = iiax+(jjax-1)*descx( lld_ )
190  CALL sgebs2d( ictxt, 'Rowwise', ' ', 1, 1, x( j ), 1 )
191  alpha = x( j )
192  ELSE
193  CALL sgebr2d( ictxt, 'Rowwise', ' ', 1, 1, alpha, 1,
194  $ myrow, ixcol )
195  END IF
196 *
197  indxtau = iiax
198 *
199  ELSE
200 *
201 * sub( X ) is distributed across a process column.
202 *
203  CALL infog2l( iax, jx, descx, nprow, npcol, myrow, mycol,
204  $ iiax, jjax, ixrow, ixcol )
205 *
206  IF( mycol.NE.ixcol )
207  $ RETURN
208 *
209 * Broadcast X(IAX,JAX) across the process column.
210 *
211  IF( myrow.EQ.ixrow ) THEN
212  j = iiax+(jjax-1)*descx( lld_ )
213  CALL sgebs2d( ictxt, 'Columnwise', ' ', 1, 1, x( j ), 1 )
214  alpha = x( j )
215  ELSE
216  CALL sgebr2d( ictxt, 'Columnwise', ' ', 1, 1, alpha, 1,
217  $ ixrow, mycol )
218  END IF
219 *
220  indxtau = jjax
221 *
222  END IF
223 *
224  IF( n.LE.0 ) THEN
225  tau( indxtau ) = zero
226  RETURN
227  END IF
228 *
229  CALL psnrm2( n-1, xnorm, x, ix, jx, descx, incx )
230 *
231  IF( xnorm.EQ.zero ) THEN
232 *
233 * H = I
234 *
235  tau( indxtau ) = zero
236 *
237  ELSE
238 *
239 * General case
240 *
241  beta = -sign( slapy2( alpha, xnorm ), alpha )
242  safmin = slamch( 'S' )
243  rsafmn = one / safmin
244  IF( abs( beta ).LT.safmin ) THEN
245 *
246 * XNORM, BETA may be inaccurate; scale X and recompute them
247 *
248  knt = 0
249  10 CONTINUE
250  knt = knt + 1
251  CALL psscal( n-1, rsafmn, x, ix, jx, descx, incx )
252  beta = beta*rsafmn
253  alpha = alpha*rsafmn
254  IF( abs( beta ).LT.safmin )
255  $ GO TO 10
256 *
257 * New BETA is at most 1, at least SAFMIN
258 *
259  CALL psnrm2( n-1, xnorm, x, ix, jx, descx, incx )
260  beta = -sign( slapy2( alpha, xnorm ), alpha )
261  tau( indxtau ) = ( beta-alpha ) / beta
262  CALL psscal( n-1, one/(alpha-beta), x, ix, jx, descx, incx )
263 *
264 * If ALPHA is subnormal, it may lose relative accuracy
265 *
266  alpha = beta
267  DO 20 j = 1, knt
268  alpha = alpha*safmin
269  20 CONTINUE
270  ELSE
271  tau( indxtau ) = ( beta-alpha ) / beta
272  CALL psscal( n-1, one/(alpha-beta), x, ix, jx, descx, incx )
273  alpha = beta
274  END IF
275  END IF
276 *
277  RETURN
278 *
279 * End of PSLARFG
280 *
281  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pslarfg
subroutine pslarfg(N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, TAU)
Definition: pslarfg.f:3