SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pclahrd.f
Go to the documentation of this file.
1 SUBROUTINE pclahrd( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY,
2 $ DESCY, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER IA, IY, JA, JY, K, N, NB
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * ), DESCY( * )
14 COMPLEX A( * ), T( * ), TAU( * ), WORK( * ), Y( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PCLAHRD reduces the first NB columns of a complex general
21* N-by-(N-K+1) distributed matrix A(IA:IA+N-1,JA:JA+N-K) so that
22* elements below the k-th subdiagonal are zero. The reduction is
23* performed by an unitary similarity transformation Q' * A * Q. The
24* routine returns the matrices V and T which determine Q as a block
25* reflector I - V*T*V', and also the matrix Y = A * V * T.
26*
27* This is an auxiliary routine called by PCGEHRD. In the following
28* comments sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1).
29*
30* Arguments
31* =========
32*
33* N (global input) INTEGER
34* The number of rows and columns to be operated on, i.e. the
35* order of the distributed submatrix sub( A ).
36* N >= 0.
37*
38* K (global input) INTEGER
39* The offset for the reduction. Elements below the k-th
40* subdiagonal in the first NB columns are reduced to zero.
41*
42* NB (global input) INTEGER
43* The number of columns to be reduced.
44*
45* A (local input/local output) COMPLEX pointer into
46* the local memory to an array of dimension (LLD_A,
47* LOCc(JA+N-K)). On entry, this array contains the the local
48* pieces of the N-by-(N-K+1) general distributed matrix
49* A(IA:IA+N-1,JA:JA+N-K). On exit, the elements on and above
50* the k-th subdiagonal in the first NB columns are overwritten
51* with the corresponding elements of the reduced distributed
52* matrix; the elements below the k-th subdiagonal, with the
53* array TAU, represent the matrix Q as a product of elementary
54* reflectors. The other columns of A(IA:IA+N-1,JA:JA+N-K) are
55* unchanged. See Further Details.
56*
57* IA (global input) INTEGER
58* The row index in the global array A indicating the first
59* row of sub( A ).
60*
61* JA (global input) INTEGER
62* The column index in the global array A indicating the
63* first column of sub( A ).
64*
65* DESCA (global and local input) INTEGER array of dimension DLEN_.
66* The array descriptor for the distributed matrix A.
67*
68* TAU (local output) COMPLEX array, dimension LOCc(JA+N-2)
69* The scalar factors of the elementary reflectors (see Further
70* Details). TAU is tied to the distributed matrix A.
71*
72* T (local output) COMPLEX array, dimension (NB_A,NB_A)
73* The upper triangular matrix T.
74*
75* Y (local output) COMPLEX pointer into the local memory
76* to an array of dimension (LLD_Y,NB_A). On exit, this array
77* contains the local pieces of the N-by-NB distributed
78* matrix Y. LLD_Y >= LOCr(IA+N-1).
79*
80* IY (global input) INTEGER
81* The row index in the global array Y indicating the first
82* row of sub( Y ).
83*
84* JY (global input) INTEGER
85* The column index in the global array Y indicating the
86* first column of sub( Y ).
87*
88* DESCY (global and local input) INTEGER array of dimension DLEN_.
89* The array descriptor for the distributed matrix Y.
90*
91* WORK (local workspace) COMPLEX array, dimension (NB)
92*
93* Further Details
94* ===============
95*
96* The matrix Q is represented as a product of nb elementary reflectors
97*
98* Q = H(1) H(2) . . . H(nb).
99*
100* Each H(i) has the form
101*
102* H(i) = I - tau * v * v'
103*
104* where tau is a complex scalar, and v is a complex vector with
105* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
106* A(ia+i+k:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
107*
108* The elements of the vectors v together form the (n-k+1)-by-nb matrix
109* V which is needed, with T and Y, to apply the transformation to the
110* unreduced part of the matrix, using an update of the form:
111* A(ia:ia+n-1,ja:ja+n-k) := (I-V*T*V')*(A(ia:ia+n-1,ja:ja+n-k)-Y*V').
112*
113* The contents of A(ia:ia+n-1,ja:ja+n-k) on exit are illustrated by the
114* following example with n = 7, k = 3 and nb = 2:
115*
116* ( a h a a a )
117* ( a h a a a )
118* ( a h a a a )
119* ( h h a a a )
120* ( v1 h a a a )
121* ( v1 v2 a a a )
122* ( v1 v2 a a a )
123*
124* where a denotes an element of the original matrix
125* A(ia:ia+n-1,ja:ja+n-k), h denotes a modified element of the upper
126* Hessenberg matrix H, and vi denotes an element of the vector
127* defining H(i).
128*
129* =====================================================================
130*
131* .. Parameters ..
132 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
133 $ lld_, mb_, m_, nb_, n_, rsrc_
134 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
135 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
136 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
137 COMPLEX ONE, ZERO
138 parameter( one = ( 1.0e+0, 0.0e+0 ),
139 $ zero = ( 0.0e+0, 0.0e+0 ) )
140* ..
141* .. Local Scalars ..
142 LOGICAL IPROC
143 INTEGER I, IACOL, IAROW, ICTXT, IOFF, II, J, JJ, JL,
144 $ jt, jw, l, myrow, mycol, npcol, nprow, nq
145 COMPLEX EI
146* ..
147* .. Local Arrays ..
148 INTEGER DESCW( DLEN_ )
149* ..
150* .. External Functions ..
151 INTEGER NUMROC
152 EXTERNAL numroc
153* ..
154* .. External Subroutines ..
155 EXTERNAL blacs_gridinfo, caxpy, ccopy, cscal,
156 $ ctrmv, descset, infog2l, pcelset,
157 $ pcgemv, pclacgv, pclarfg, pcscal
158* ..
159* .. Intrinsic Functions ..
160 INTRINSIC min, mod
161* ..
162* .. Executable Statements ..
163*
164* Quick return if possible
165*
166 IF( n.LE.1 )
167 $ RETURN
168*
169 ictxt = desca( ctxt_ )
170 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
171*
172 ioff = mod( ja-1, desca( nb_ ) )
173 CALL infog2l( ia+k, ja, desca, nprow, npcol, myrow, mycol, ii,
174 $ jj, iarow, iacol )
175*
176 iproc = ( myrow.EQ.iarow .AND. mycol.EQ.iacol )
177 nq = numroc( n+ja-1, desca( nb_ ), mycol, iacol, npcol )
178 IF( mycol.EQ.iacol )
179 $ nq = nq - ioff
180*
181 ei = zero
182
183 jw = ioff + 1
184 CALL descset( descw, 1, desca( mb_ ), 1, desca( mb_ ), iarow,
185 $ iacol, ictxt, 1 )
186*
187 DO 10 l = 1, nb
188 i = ia + k + l - 2
189 j = ja + l - 1
190*
191 IF( l.GT.1 ) THEN
192*
193* Update A(ia:ia+n-1,j)
194*
195* Compute i-th column of A - Y * V'
196*
197 CALL pclacgv( l-1, a, i, ja, desca, desca( m_ ) )
198 CALL pcgemv( 'No transpose', n, l-1, -one, y, iy, jy, descy,
199 $ a, i, ja, desca, desca( m_ ), one, a, ia, j,
200 $ desca, 1 )
201 CALL pclacgv( l-1, a, i, ja, desca, desca( m_ ) )
202*
203* Apply I - V * T' * V' to this column (call it b) from the
204* left, using the last column of T as workspace
205*
206* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows)
207* ( V2 ) ( b2 )
208*
209* where V1 is unit lower triangular
210*
211* w := V1' * b1
212*
213 IF( iproc ) THEN
214 CALL ccopy( l-1, a( (jj+l-2)*desca( lld_ )+ii ), 1,
215 $ work( jw ), 1 )
216 CALL ctrmv( 'Lower', 'Conjugate transpose', 'Unit', l-1,
217 $ a( (jj-1)*desca( lld_ )+ii ), desca( lld_ ),
218 $ work( jw ), 1 )
219 END IF
220*
221* w := w + V2'*b2
222*
223 CALL pcgemv( 'Conjugate transpose', n-k-l+1, l-1, one, a,
224 $ i+1, ja, desca, a, i+1, j, desca, 1, one, work,
225 $ 1, jw, descw, descw( m_ ) )
226*
227* w := T'*w
228*
229 IF( iproc )
230 $ CALL ctrmv( 'Upper', 'Conjugate transpose', 'Non-unit',
231 $ l-1, t, desca( nb_ ), work( jw ), 1 )
232*
233* b2 := b2 - V2*w
234*
235 CALL pcgemv( 'No transpose', n-k-l+1, l-1, -one, a, i+1, ja,
236 $ desca, work, 1, jw, descw, descw( m_ ), one,
237 $ a, i+1, j, desca, 1 )
238*
239* b1 := b1 - V1*w
240*
241 IF( iproc ) THEN
242 CALL ctrmv( 'Lower', 'No transpose', 'Unit', l-1,
243 $ a( (jj-1)*desca( lld_ )+ii ), desca( lld_ ),
244 $ work( jw ), 1 )
245 CALL caxpy( l-1, -one, work( jw ), 1,
246 $ a( ( jj+l-2 )*desca( lld_ )+ii ), 1 )
247 END IF
248 CALL pcelset( a, i, j-1, desca, ei )
249 END IF
250*
251* Generate the elementary reflector H(i) to annihilate
252* A(ia+k+i:ia+n-1,j)
253*
254 CALL pclarfg( n-k-l+1, ei, i+1, j, a, min( i+2, n+ia-1 ), j,
255 $ desca, 1, tau )
256 CALL pcelset( a, i+1, j, desca, one )
257*
258* Compute Y(iy:y+n-1,jy+l-1)
259*
260 CALL pcgemv( 'No transpose', n, n-k-l+1, one, a, ia, j+1,
261 $ desca, a, i+1, j, desca, 1, zero, y, iy, jy+l-1,
262 $ descy, 1 )
263 CALL pcgemv( 'Conjugate transpose', n-k-l+1, l-1, one, a, i+1,
264 $ ja, desca, a, i+1, j, desca, 1, zero, work, 1, jw,
265 $ descw, descw( m_ ) )
266 CALL pcgemv( 'No transpose', n, l-1, -one, y, iy, jy, descy,
267 $ work, 1, jw, descw, descw( m_ ), one, y, iy,
268 $ jy+l-1, descy, 1 )
269 jl = min( jj+l-1, ja+nq-1 )
270 CALL pcscal( n, tau( jl ), y, iy, jy+l-1, descy, 1 )
271*
272* Compute T(1:i,i)
273*
274 IF( iproc ) THEN
275 jt = ( l-1 ) * desca( nb_ )
276 CALL cscal( l-1, -tau( jl ), work( jw ), 1 )
277 CALL ccopy( l-1, work( jw ), 1, t( jt+1 ), 1 )
278 CALL ctrmv( 'Upper', 'No transpose', 'Non-unit', l-1, t,
279 $ desca( nb_ ), t( jt+1 ), 1 )
280 t( jt+l ) = tau( jl )
281 END IF
282 10 CONTINUE
283*
284 CALL pcelset( a, k+nb+ia-1, j, desca, ei )
285*
286 RETURN
287*
288* End of PCLAHRD
289*
290 END
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pcelset(a, ia, ja, desca, alpha)
Definition pcelset.f:2
#define min(A, B)
Definition pcgemr.c:181
subroutine pclacgv(n, x, ix, jx, descx, incx)
Definition pclacgv.f:2
subroutine pclahrd(n, k, nb, a, ia, ja, desca, tau, t, y, iy, jy, descy, work)
Definition pclahrd.f:3
subroutine pclarfg(n, alpha, iax, jax, x, ix, jx, descx, incx, tau)
Definition pclarfg.f:3