SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
psgebrd.f
Go to the documentation of this file.
1 SUBROUTINE psgebrd( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP,
2 $ WORK, LWORK, INFO )
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 25, 2001
8*
9* .. Scalar Arguments ..
10 INTEGER IA, INFO, JA, LWORK, M, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 REAL A( * ), D( * ), E( * ), TAUP( * ), TAUQ( * ),
15 $ work( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PSGEBRD reduces a real general M-by-N distributed matrix
22* sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper or lower bidiagonal
23* form B by an orthogonal transformation: Q' * sub( A ) * P = B.
24*
25* If M >= N, B is upper bidiagonal; if M < N, B is lower bidiagonal.
26*
27* Notes
28* =====
29*
30* Each global data object is described by an associated description
31* vector. This vector stores the information required to establish
32* the mapping between an object element and its corresponding process
33* and memory location.
34*
35* Let A be a generic term for any 2D block cyclicly distributed array.
36* Such a global array has an associated description vector DESCA.
37* In the following comments, the character _ should be read as
38* "of the global array".
39*
40* NOTATION STORED IN EXPLANATION
41* --------------- -------------- --------------------------------------
42* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43* DTYPE_A = 1.
44* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
45* the BLACS process grid A is distribu-
46* ted over. The context itself is glo-
47* bal, but the handle (the integer
48* value) may vary.
49* M_A (global) DESCA( M_ ) The number of rows in the global
50* array A.
51* N_A (global) DESCA( N_ ) The number of columns in the global
52* array A.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
54* the rows of the array.
55* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
56* the columns of the array.
57* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
58* row of the array A is distributed.
59* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60* first column of the array A is
61* distributed.
62* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63* array. LLD_A >= MAX(1,LOCr(M_A)).
64*
65* Let K be the number of rows or columns of a distributed matrix,
66* and assume that its process grid has dimension p x q.
67* LOCr( K ) denotes the number of elements of K that a process
68* would receive if K were distributed over the p processes of its
69* process column.
70* Similarly, LOCc( K ) denotes the number of elements of K that a
71* process would receive if K were distributed over the q processes of
72* its process row.
73* The values of LOCr() and LOCc() may be determined via a call to the
74* ScaLAPACK tool function, NUMROC:
75* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77* An upper bound for these quantities may be computed by:
78* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80*
81* Arguments
82* =========
83*
84* M (global input) INTEGER
85* The number of rows to be operated on, i.e. the number of rows
86* of the distributed submatrix sub( A ). M >= 0.
87*
88* N (global input) INTEGER
89* The number of columns to be operated on, i.e. the number of
90* columns of the distributed submatrix sub( A ). N >= 0.
91*
92* A (local input/local output) REAL pointer into the
93* local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
94* On entry, this array contains the local pieces of the
95* general distributed matrix sub( A ). On exit, if M >= N,
96* the diagonal and the first superdiagonal of sub( A ) are
97* overwritten with the upper bidiagonal matrix B; the elements
98* below the diagonal, with the array TAUQ, represent the
99* orthogonal matrix Q as a product of elementary reflectors,
100* and the elements above the first superdiagonal, with the
101* array TAUP, represent the orthogonal matrix P as a product
102* of elementary reflectors. If M < N, the diagonal and the
103* first subdiagonal are overwritten with the lower bidiagonal
104* matrix B; the elements below the first subdiagonal, with the
105* array TAUQ, represent the orthogonal matrix Q as a product of
106* elementary reflectors, and the elements above the diagonal,
107* with the array TAUP, represent the orthogonal matrix P as a
108* product of elementary reflectors. See Further Details.
109*
110* IA (global input) INTEGER
111* The row index in the global array A indicating the first
112* row of sub( A ).
113*
114* JA (global input) INTEGER
115* The column index in the global array A indicating the
116* first column of sub( A ).
117*
118* DESCA (global and local input) INTEGER array of dimension DLEN_.
119* The array descriptor for the distributed matrix A.
120*
121* D (local output) REAL array, dimension
122* LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise.
123* The distributed diagonal elements of the bidiagonal matrix
124* B: D(i) = A(i,i). D is tied to the distributed matrix A.
125*
126* E (local output) REAL array, dimension
127* LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise.
128* The distributed off-diagonal elements of the bidiagonal
129* distributed matrix B:
130* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
131* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
132* E is tied to the distributed matrix A.
133*
134* TAUQ (local output) REAL array dimension
135* LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary
136* reflectors which represent the orthogonal matrix Q. TAUQ
137* is tied to the distributed matrix A. See Further Details.
138*
139* TAUP (local output) REAL array, dimension
140* LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary
141* reflectors which represent the orthogonal matrix P. TAUP
142* is tied to the distributed matrix A. See Further Details.
143*
144* WORK (local workspace/local output) REAL array,
145* dimension (LWORK)
146* On exit, WORK( 1 ) returns the minimal and optimal LWORK.
147*
148* LWORK (local or global input) INTEGER
149* The dimension of the array WORK.
150* LWORK is local input and must be at least
151* LWORK >= NB*( MpA0 + NqA0 + 1 ) + NqA0
152*
153* where NB = MB_A = NB_A,
154* IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ),
155* IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ),
156* IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ),
157* MpA0 = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ),
158* NqA0 = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ).
159*
160* INDXG2P and NUMROC are ScaLAPACK tool functions;
161* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
162* the subroutine BLACS_GRIDINFO.
163*
164* If LWORK = -1, then LWORK is global input and a workspace
165* query is assumed; the routine only calculates the minimum
166* and optimal size for all work arrays. Each of these
167* values is returned in the first entry of the corresponding
168* work array, and no error message is issued by PXERBLA.
169*
170* INFO (global output) INTEGER
171* = 0: successful exit
172* < 0: If the i-th argument is an array and the j-entry had
173* an illegal value, then INFO = -(i*100+j), if the i-th
174* argument is a scalar and had an illegal value, then
175* INFO = -i.
176*
177* Further Details
178* ===============
179*
180* The matrices Q and P are represented as products of elementary
181* reflectors:
182*
183* If m >= n,
184*
185* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
186*
187* Each H(i) and G(i) has the form:
188*
189* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
190*
191* where tauq and taup are real scalars, and v and u are real vectors;
192* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
193* A(ia+i:ia+m-1,ja+i-1);
194* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
195* A(ia+i-1,ja+i+1:ja+n-1);
196* tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1).
197*
198* If m < n,
199*
200* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
201*
202* Each H(i) and G(i) has the form:
203*
204* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
205*
206* where tauq and taup are real scalars, and v and u are real vectors;
207* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in
208* A(ia+i+1:ia+m-1,ja+i-1);
209* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in
210* A(ia+i-1,ja+i:ja+n-1);
211* tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1).
212*
213* The contents of sub( A ) on exit are illustrated by the following
214* examples:
215*
216* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
217*
218* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
219* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
220* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
221* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
222* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
223* ( v1 v2 v3 v4 v5 )
224*
225* where d and e denote diagonal and off-diagonal elements of B, vi
226* denotes an element of the vector defining H(i), and ui an element of
227* the vector defining G(i).
228*
229* Alignment requirements
230* ======================
231*
232* The distributed submatrix sub( A ) must verify some alignment proper-
233* ties, namely the following expressions should be true:
234* ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA )
235*
236* =====================================================================
237*
238* .. Parameters ..
239 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
240 $ lld_, mb_, m_, nb_, n_, rsrc_
241 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
242 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
243 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
244 REAL ONE
245 parameter( one = 1.0e+0 )
246* ..
247* .. Local Scalars ..
248 LOGICAL LQUERY
249 CHARACTER COLCTOP, ROWCTOP
250 INTEGER I, IACOL, IAROW, ICTXT, IINFO, IOFF, IPW, IPY,
251 $ iw, j, jb, js, jw, k, l, lwmin, mn, mp, mycol,
252 $ myrow, nb, npcol, nprow, nq
253* ..
254* .. Local Arrays ..
255 INTEGER DESCWX( DLEN_ ), DESCWY( DLEN_ ), IDUM1( 1 ),
256 $ idum2( 1 )
257* ..
258* .. External Subroutines ..
259 EXTERNAL blacs_gridinfo, chk1mat, descset, pchk1mat,
260 $ pselset, psgebd2, psgemm, pslabrd,
261 $ pb_topget, pb_topset, pxerbla
262* ..
263* .. External Functions ..
264 INTEGER INDXG2L, INDXG2P, NUMROC
265 EXTERNAL indxg2l, indxg2p, numroc
266* ..
267* .. Intrinsic Functions ..
268 INTRINSIC max, min, mod, real
269* ..
270* .. Executable Statements ..
271*
272* Get grid parameters
273*
274 ictxt = desca( ctxt_ )
275 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
276*
277* Test the input parameters
278*
279 info = 0
280 IF( nprow.EQ.-1 ) THEN
281 info = -(600+ctxt_)
282 ELSE
283 CALL chk1mat( m, 1, n, 2, ia, ja, desca, 6, info )
284 IF( info.EQ.0 ) THEN
285 nb = desca( mb_ )
286 ioff = mod( ia-1, desca( mb_ ) )
287 iarow = indxg2p( ia, nb, myrow, desca( rsrc_ ), nprow )
288 iacol = indxg2p( ja, nb, mycol, desca( csrc_ ), npcol )
289 mp = numroc( m+ioff, nb, myrow, iarow, nprow )
290 nq = numroc( n+ioff, nb, mycol, iacol, npcol )
291 lwmin = nb*( mp+nq+1 ) + nq
292*
293 work( 1 ) = real( lwmin )
294 lquery = ( lwork.EQ.-1 )
295 IF( ioff.NE.mod( ja-1, desca( nb_ ) ) ) THEN
296 info = -5
297 ELSE IF( nb.NE.desca( nb_ ) ) THEN
298 info = -(600+nb_)
299 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
300 info = -12
301 END IF
302 END IF
303 IF( lquery ) THEN
304 idum1( 1 ) = -1
305 ELSE
306 idum1( 1 ) = 1
307 END IF
308 idum2( 1 ) = 12
309 CALL pchk1mat( m, 1, n, 2, ia, ja, desca, 6, 1, idum1, idum2,
310 $ info )
311 END IF
312*
313 IF( info.LT.0 ) THEN
314 CALL pxerbla( ictxt, 'PSGEBRD', -info )
315 RETURN
316 ELSE IF( lquery ) THEN
317 RETURN
318 END IF
319*
320* Quick return if possible
321*
322 mn = min( m, n )
323 IF( mn.EQ.0 )
324 $ RETURN
325*
326* Initialize parameters.
327*
328 CALL pb_topget( ictxt, 'Combine', 'Columnwise', colctop )
329 CALL pb_topget( ictxt, 'Combine', 'Rowwise', rowctop )
330 CALL pb_topset( ictxt, 'Combine', 'Columnwise', '1-tree' )
331 CALL pb_topset( ictxt, 'Combine', 'Rowwise', '1-tree' )
332*
333 ipy = mp * nb + 1
334 ipw = nq * nb + ipy
335*
336 CALL descset( descwx, m+ioff, nb, nb, nb, iarow, iacol, ictxt,
337 $ max( 1, mp ) )
338 CALL descset( descwy, nb, n+ioff, nb, nb, iarow, iacol, ictxt,
339 $ nb )
340*
341 mp = numroc( m+ia-1, nb, myrow, desca( rsrc_ ), nprow )
342 nq = numroc( n+ja-1, nb, mycol, desca( csrc_ ), npcol )
343 k = 1
344 jb = nb - ioff
345 iw = ioff + 1
346 jw = ioff + 1
347*
348 DO 10 l = 1, mn+ioff-nb, nb
349 i = ia + k - 1
350 j = ja + k - 1
351*
352* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
353* the matrices X and Y which are needed to update the unreduced
354* part of the matrix.
355*
356 CALL pslabrd( m-k+1, n-k+1, jb, a, i, j, desca, d, e, tauq,
357 $ taup, work, iw, jw, descwx, work( ipy ), iw,
358 $ jw, descwy, work( ipw ) )
359*
360* Update the trailing submatrix A(i+nb:ia+m-1,j+nb:ja+n-1), using
361* an update of the form A := A - V*Y' - X*U'.
362*
363 CALL psgemm( 'No transpose', 'No transpose', m-k-jb+1,
364 $ n-k-jb+1, jb, -one, a, i+jb, j, desca,
365 $ work( ipy ), iw, jw+jb, descwy, one, a, i+jb,
366 $ j+jb, desca )
367 CALL psgemm( 'No transpose', 'No transpose', m-k-jb+1,
368 $ n-k-jb+1, jb, -one, work, iw+jb, jw, descwx, a, i,
369 $ j+jb, desca, one, a, i+jb, j+jb, desca )
370*
371* Copy last off-diagonal elements of B back into sub( A ).
372*
373 IF( m.GE.n ) THEN
374 js = min( indxg2l( i+jb-1, nb, 0, desca( rsrc_ ), nprow ),
375 $ mp )
376 IF( js.GT.0 )
377 $ CALL pselset( a, i+jb-1, j+jb, desca, e( js ) )
378 ELSE
379 js = min( indxg2l( j+jb-1, nb, 0, desca( csrc_ ), npcol ),
380 $ nq )
381 IF( js.GT.0 )
382 $ CALL pselset( a, i+jb, j+jb-1, desca, e( js ) )
383 END IF
384*
385 k = k + jb
386 jb = nb
387 iw = 1
388 jw = 1
389 descwx( m_ ) = descwx( m_ ) - jb
390 descwx( rsrc_ ) = mod( descwx( rsrc_ ) + 1, nprow )
391 descwx( csrc_ ) = mod( descwx( csrc_ ) + 1, npcol )
392 descwy( n_ ) = descwy( n_ ) - jb
393 descwy( rsrc_ ) = mod( descwy( rsrc_ ) + 1, nprow )
394 descwy( csrc_ ) = mod( descwy( csrc_ ) + 1, npcol )
395*
396 10 CONTINUE
397*
398* Use unblocked code to reduce the remainder of the matrix.
399*
400 CALL psgebd2( m-k+1, n-k+1, a, ia+k-1, ja+k-1, desca, d, e, tauq,
401 $ taup, work, lwork, iinfo )
402*
403 CALL pb_topset( ictxt, 'Combine', 'Columnwise', colctop )
404 CALL pb_topset( ictxt, 'Combine', 'Rowwise', rowctop )
405*
406 work( 1 ) = real( lwmin )
407*
408 RETURN
409*
410* End of PSGEBRD
411*
412 END
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pselset(a, ia, ja, desca, alpha)
Definition pselset.f:2
subroutine psgebd2(m, n, a, ia, ja, desca, d, e, tauq, taup, work, lwork, info)
Definition psgebd2.f:3
subroutine psgebrd(m, n, a, ia, ja, desca, d, e, tauq, taup, work, lwork, info)
Definition psgebrd.f:3
subroutine pslabrd(m, n, nb, a, ia, ja, desca, d, e, tauq, taup, x, ix, jx, descx, y, iy, jy, descy, work)
Definition pslabrd.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2