SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pslahrd.f
Go to the documentation of this file.
1 SUBROUTINE pslahrd( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY,
2 $ DESCY, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* January 30, 2006
8*
9* .. Scalar Arguments ..
10 INTEGER IA, IY, JA, JY, K, N, NB
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * ), DESCY( * )
14 REAL A( * ), T( * ), TAU( * ), WORK( * ), Y( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PSLAHRD reduces the first NB columns of a real general N-by-(N-K+1)
21* distributed matrix A(IA:IA+N-1,JA:JA+N-K) so that elements below the
22* k-th subdiagonal are zero. The reduction is performed by an orthogo-
23* nal similarity transformation Q' * A * Q. The routine returns the
24* matrices V and T which determine Q as a block reflector I - V*T*V',
25* and also the matrix Y = A * V * T.
26*
27* This is an auxiliary routine called by PSGEHRD. In the following
28* comments sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1).
29*
30* Arguments
31* =========
32*
33* N (global input) INTEGER
34* The number of rows and columns to be operated on, i.e. the
35* order of the distributed submatrix sub( A ).
36* N >= 0.
37*
38* K (global input) INTEGER
39* The offset for the reduction. Elements below the k-th
40* subdiagonal in the first NB columns are reduced to zero.
41*
42* NB (global input) INTEGER
43* The number of columns to be reduced.
44*
45* A (local input/local output) REAL pointer into
46* the local memory to an array of dimension (LLD_A,
47* LOCc(JA+N-K)). On entry, this array contains the the local
48* pieces of the N-by-(N-K+1) general distributed matrix
49* A(IA:IA+N-1,JA:JA+N-K). On exit, the elements on and above
50* the k-th subdiagonal in the first NB columns are overwritten
51* with the corresponding elements of the reduced distributed
52* matrix; the elements below the k-th subdiagonal, with the
53* array TAU, represent the matrix Q as a product of elementary
54* reflectors. The other columns of A(IA:IA+N-1,JA:JA+N-K) are
55* unchanged. See Further Details.
56*
57* IA (global input) INTEGER
58* The row index in the global array A indicating the first
59* row of sub( A ).
60*
61* JA (global input) INTEGER
62* The column index in the global array A indicating the
63* first column of sub( A ).
64*
65* DESCA (global and local input) INTEGER array of dimension DLEN_.
66* The array descriptor for the distributed matrix A.
67*
68* TAU (local output) REAL array, dimension LOCc(JA+N-2)
69* The scalar factors of the elementary reflectors (see Further
70* Details). TAU is tied to the distributed matrix A.
71*
72* T (local output) REAL array, dimension (NB_A,NB_A)
73* The upper triangular matrix T.
74*
75* Y (local output) REAL pointer into the local memory
76* to an array of dimension (LLD_Y,NB_A). On exit, this array
77* contains the local pieces of the N-by-NB distributed
78* matrix Y. LLD_Y >= LOCr(IA+N-1).
79*
80* IY (global input) INTEGER
81* The row index in the global array Y indicating the first
82* row of sub( Y ).
83*
84* JY (global input) INTEGER
85* The column index in the global array Y indicating the
86* first column of sub( Y ).
87*
88* DESCY (global and local input) INTEGER array of dimension DLEN_.
89* The array descriptor for the distributed matrix Y.
90*
91* WORK (local workspace) REAL array, dimension (NB)
92*
93* Further Details
94* ===============
95*
96* The matrix Q is represented as a product of nb elementary reflectors
97*
98* Q = H(1) H(2) . . . H(nb).
99*
100* Each H(i) has the form
101*
102* H(i) = I - tau * v * v'
103*
104* where tau is a real scalar, and v is a real vector with
105* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
106* A(ia+i+k:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
107*
108* The elements of the vectors v together form the (n-k+1)-by-nb matrix
109* V which is needed, with T and Y, to apply the transformation to the
110* unreduced part of the matrix, using an update of the form:
111* A(ia:ia+n-1,ja:ja+n-k) := (I-V*T*V')*(A(ia:ia+n-1,ja:ja+n-k)-Y*V').
112*
113* The contents of A(ia:ia+n-1,ja:ja+n-k) on exit are illustrated by the
114* following example with n = 7, k = 3 and nb = 2:
115*
116* ( a h a a a )
117* ( a h a a a )
118* ( a h a a a )
119* ( h h a a a )
120* ( v1 h a a a )
121* ( v1 v2 a a a )
122* ( v1 v2 a a a )
123*
124* where a denotes an element of the original matrix
125* A(ia:ia+n-1,ja:ja+n-k), h denotes a modified element of the upper
126* Hessenberg matrix H, and vi denotes an element of the vector
127* defining H(i).
128*
129* =====================================================================
130*
131* .. Parameters ..
132 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
133 $ lld_, mb_, m_, nb_, n_, rsrc_
134 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
135 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
136 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
137 REAL ONE, ZERO
138 parameter( one = 1.0e+0, zero = 0.0e+0 )
139* ..
140* .. Local Scalars ..
141 LOGICAL IPROC
142 INTEGER I, IACOL, IAROW, ICTXT, IOFF, II, J, JJ, JL,
143 $ jt, jw, l, myrow, mycol, npcol, nprow, nq
144 REAL EI
145* ..
146* .. Local Arrays ..
147 INTEGER DESCW( DLEN_ )
148* ..
149* .. External Functions ..
150 INTEGER NUMROC
151 EXTERNAL numroc
152* ..
153* .. External Subroutines ..
154 EXTERNAL blacs_gridinfo, descset, infog2l, pselset,
155 $ psgemv, pslarfg, psscal, saxpy,
156 $ scopy, sscal, strmv
157* ..
158* .. Intrinsic Functions ..
159 INTRINSIC min, mod
160* ..
161* .. Executable Statements ..
162*
163* Quick return if possible
164*
165 IF( n.LE.1 )
166 $ RETURN
167*
168 ictxt = desca( ctxt_ )
169 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
170*
171 ioff = mod( ja-1, desca( nb_ ) )
172 CALL infog2l( ia+k, ja, desca, nprow, npcol, myrow, mycol, ii,
173 $ jj, iarow, iacol )
174*
175 iproc = ( myrow.EQ.iarow .AND. mycol.EQ.iacol )
176 nq = numroc( n+ja-1, desca( nb_ ), mycol, iacol, npcol )
177 IF( mycol.EQ.iacol )
178 $ nq = nq - ioff
179*
180 ei = zero
181
182 jw = ioff + 1
183 CALL descset( descw, 1, desca( mb_ ), 1, desca( mb_ ), iarow,
184 $ iacol, ictxt, 1 )
185*
186 DO 10 l = 1, nb
187 i = ia + k + l - 2
188 j = ja + l - 1
189*
190 IF( l.GT.1 ) THEN
191*
192* Update A(ia:ia+n-1,j)
193*
194* Compute i-th column of A - Y * V'
195*
196 CALL psgemv( 'No transpose', n, l-1, -one, y, iy, jy, descy,
197 $ a, i, ja, desca, desca( m_ ), one, a, ia, j,
198 $ desca, 1 )
199*
200* Apply I - V * T' * V' to this column (call it b) from the
201* left, using the last column of T as workspace
202*
203* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows)
204* ( V2 ) ( b2 )
205*
206* where V1 is unit lower triangular
207*
208* w := V1' * b1
209*
210 IF( iproc ) THEN
211 CALL scopy( l-1, a( (jj+l-2)*desca( lld_ )+ii ), 1,
212 $ work( jw ), 1 )
213 CALL strmv( 'Lower', 'Transpose', 'Unit', l-1,
214 $ a( (jj-1)*desca( lld_ )+ii ), desca( lld_ ),
215 $ work( jw ), 1 )
216 END IF
217*
218* w := w + V2'*b2
219*
220 CALL psgemv( 'Transpose', n-k-l+1, l-1, one, a, i+1, ja,
221 $ desca, a, i+1, j, desca, 1, one, work, 1, jw,
222 $ descw, descw( m_ ) )
223*
224* w := T'*w
225*
226 IF( iproc )
227 $ CALL strmv( 'Upper', 'Transpose', 'Non-unit', l-1, t,
228 $ desca( nb_ ), work( jw ), 1 )
229*
230* b2 := b2 - V2*w
231*
232 CALL psgemv( 'No transpose', n-k-l+1, l-1, -one, a, i+1, ja,
233 $ desca, work, 1, jw, descw, descw( m_ ), one,
234 $ a, i+1, j, desca, 1 )
235*
236* b1 := b1 - V1*w
237*
238 IF( iproc ) THEN
239 CALL strmv( 'Lower', 'No transpose', 'Unit', l-1,
240 $ a( (jj-1)*desca( lld_ )+ii ), desca( lld_ ),
241 $ work( jw ), 1 )
242 CALL saxpy( l-1, -one, work( jw ), 1,
243 $ a( ( jj+l-2 )*desca( lld_ )+ii ), 1 )
244 END IF
245 CALL pselset( a, i, j-1, desca, ei )
246 END IF
247*
248* Generate the elementary reflector H(i) to annihilate
249* A(ia+k+i:ia+n-1,j)
250*
251 CALL pslarfg( n-k-l+1, ei, i+1, j, a, min( i+2, n+ia-1 ), j,
252 $ desca, 1, tau )
253 CALL pselset( a, i+1, j, desca, one )
254*
255* Compute Y(iy:y+n-1,jy+l-1)
256*
257 CALL psgemv( 'No transpose', n, n-k-l+1, one, a, ia, j+1,
258 $ desca, a, i+1, j, desca, 1, zero, y, iy, jy+l-1,
259 $ descy, 1 )
260 CALL psgemv( 'Transpose', n-k-l+1, l-1, one, a, i+1, ja, desca,
261 $ a, i+1, j, desca, 1, zero, work, 1, jw, descw,
262 $ descw( m_ ) )
263 CALL psgemv( 'No transpose', n, l-1, -one, y, iy, jy, descy,
264 $ work, 1, jw, descw, descw( m_ ), one, y, iy,
265 $ jy+l-1, descy, 1 )
266 jl = min( jj+l-1, ja+nq-1 )
267 CALL psscal( n, tau( jl ), y, iy, jy+l-1, descy, 1 )
268*
269* Compute T(1:i,i)
270*
271 IF( iproc ) THEN
272 jt = ( l-1 ) * desca( nb_ )
273 CALL sscal( l-1, -tau( jl ), work( jw ), 1 )
274 CALL scopy( l-1, work( jw ), 1, t( jt+1 ), 1 )
275 CALL strmv( 'Upper', 'No transpose', 'Non-unit', l-1, t,
276 $ desca( nb_ ), t( jt+1 ), 1 )
277 t( jt+l ) = tau( jl )
278 END IF
279 10 CONTINUE
280*
281 CALL pselset( a, k+nb+ia-1, j, desca, ei )
282*
283 RETURN
284*
285* End of PSLAHRD
286*
287 END
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define min(A, B)
Definition pcgemr.c:181
subroutine pselset(a, ia, ja, desca, alpha)
Definition pselset.f:2
subroutine pslahrd(n, k, nb, a, ia, ja, desca, tau, t, y, iy, jy, descy, work)
Definition pslahrd.f:3
subroutine pslarfg(n, alpha, iax, jax, x, ix, jx, descx, incx, tau)
Definition pslarfg.f:3