SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
psgebd2.f
Go to the documentation of this file.
1 SUBROUTINE psgebd2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP,
2 $ WORK, LWORK, INFO )
3*
4* -- ScaLAPACK auxiliary routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER IA, INFO, JA, LWORK, M, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 REAL A( * ), D( * ), E( * ), TAUP( * ), TAUQ( * ),
15 $ work( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PSGEBD2 reduces a real general M-by-N distributed matrix
22* sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper or lower bidiagonal
23* form B by an orthogonal transformation: Q' * sub( A ) * P = B.
24*
25* If M >= N, B is upper bidiagonal; if M < N, B is lower bidiagonal.
26*
27* Notes
28* =====
29*
30* Each global data object is described by an associated description
31* vector. This vector stores the information required to establish
32* the mapping between an object element and its corresponding process
33* and memory location.
34*
35* Let A be a generic term for any 2D block cyclicly distributed array.
36* Such a global array has an associated description vector DESCA.
37* In the following comments, the character _ should be read as
38* "of the global array".
39*
40* NOTATION STORED IN EXPLANATION
41* --------------- -------------- --------------------------------------
42* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43* DTYPE_A = 1.
44* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
45* the BLACS process grid A is distribu-
46* ted over. The context itself is glo-
47* bal, but the handle (the integer
48* value) may vary.
49* M_A (global) DESCA( M_ ) The number of rows in the global
50* array A.
51* N_A (global) DESCA( N_ ) The number of columns in the global
52* array A.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
54* the rows of the array.
55* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
56* the columns of the array.
57* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
58* row of the array A is distributed.
59* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60* first column of the array A is
61* distributed.
62* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63* array. LLD_A >= MAX(1,LOCr(M_A)).
64*
65* Let K be the number of rows or columns of a distributed matrix,
66* and assume that its process grid has dimension p x q.
67* LOCr( K ) denotes the number of elements of K that a process
68* would receive if K were distributed over the p processes of its
69* process column.
70* Similarly, LOCc( K ) denotes the number of elements of K that a
71* process would receive if K were distributed over the q processes of
72* its process row.
73* The values of LOCr() and LOCc() may be determined via a call to the
74* ScaLAPACK tool function, NUMROC:
75* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77* An upper bound for these quantities may be computed by:
78* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80*
81* Arguments
82* =========
83*
84* M (global input) INTEGER
85* The number of rows to be operated on, i.e. the number of rows
86* of the distributed submatrix sub( A ). M >= 0.
87*
88* N (global input) INTEGER
89* The number of columns to be operated on, i.e. the number of
90* columns of the distributed submatrix sub( A ). N >= 0.
91*
92* A (local input/local output) REAL pointer into the
93* local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
94* On entry, this array contains the local pieces of the
95* general distributed matrix sub( A ). On exit, if M >= N,
96* the diagonal and the first superdiagonal of sub( A ) are
97* overwritten with the upper bidiagonal matrix B; the elements
98* below the diagonal, with the array TAUQ, represent the
99* orthogonal matrix Q as a product of elementary reflectors,
100* and the elements above the first superdiagonal, with the
101* array TAUP, represent the orthogonal matrix P as a product
102* of elementary reflectors. If M < N, the diagonal and the
103* first subdiagonal are overwritten with the lower bidiagonal
104* matrix B; the elements below the first subdiagonal, with the
105* array TAUQ, represent the orthogonal matrix Q as a product of
106* elementary reflectors, and the elements above the diagonal,
107* with the array TAUP, represent the orthogonal matrix P as a
108* product of elementary reflectors. See Further Details.
109*
110* IA (global input) INTEGER
111* The row index in the global array A indicating the first
112* row of sub( A ).
113*
114* JA (global input) INTEGER
115* The column index in the global array A indicating the
116* first column of sub( A ).
117*
118* DESCA (global and local input) INTEGER array of dimension DLEN_.
119* The array descriptor for the distributed matrix A.
120*
121* D (local output) REAL array, dimension
122* LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise.
123* The distributed diagonal elements of the bidiagonal matrix
124* B: D(i) = A(i,i). D is tied to the distributed matrix A.
125*
126* E (local output) REAL array, dimension
127* LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise.
128* The distributed off-diagonal elements of the bidiagonal
129* distributed matrix B:
130* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
131* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
132* E is tied to the distributed matrix A.
133*
134* TAUQ (local output) REAL array dimension
135* LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary
136* reflectors which represent the orthogonal matrix Q. TAUQ
137* is tied to the distributed matrix A. See Further Details.
138*
139* TAUP (local output) REAL array, dimension
140* LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary
141* reflectors which represent the orthogonal matrix P. TAUP
142* is tied to the distributed matrix A. See Further Details.
143*
144* WORK (local workspace/local output) REAL array,
145* dimension (LWORK)
146* On exit, WORK(1) returns the minimal and optimal LWORK.
147*
148* LWORK (local or global input) INTEGER
149* The dimension of the array WORK.
150* LWORK is local input and must be at least
151* LWORK >= MAX( MpA0, NqA0 )
152*
153* where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB )
154* IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ),
155* IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ),
156* MpA0 = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ),
157* NqA0 = NUMROC( N+IROFFA, NB, MYCOL, IACOL, NPCOL ).
158*
159* INDXG2P and NUMROC are ScaLAPACK tool functions;
160* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
161* the subroutine BLACS_GRIDINFO.
162*
163* If LWORK = -1, then LWORK is global input and a workspace
164* query is assumed; the routine only calculates the minimum
165* and optimal size for all work arrays. Each of these
166* values is returned in the first entry of the corresponding
167* work array, and no error message is issued by PXERBLA.
168*
169* INFO (local output) INTEGER
170* = 0: successful exit
171* < 0: If the i-th argument is an array and the j-entry had
172* an illegal value, then INFO = -(i*100+j), if the i-th
173* argument is a scalar and had an illegal value, then
174* INFO = -i.
175*
176* Further Details
177* ===============
178*
179* The matrices Q and P are represented as products of elementary
180* reflectors:
181*
182* If m >= n,
183*
184* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
185*
186* Each H(i) and G(i) has the form:
187*
188* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
189*
190* where tauq and taup are real scalars, and v and u are real vectors;
191* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
192* A(ia+i:ia+m-1,ja+i-1);
193* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
194* A(ia+i-1,ja+i+1:ja+n-1);
195* tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1).
196*
197* If m < n,
198*
199* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
200*
201* Each H(i) and G(i) has the form:
202*
203* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
204*
205* where tauq and taup are real scalars, and v and u are real vectors;
206* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in
207* A(ia+i+1:ia+m-1,ja+i-1);
208* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in
209* A(ia+i-1,ja+i:ja+n-1);
210* tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1).
211*
212* The contents of sub( A ) on exit are illustrated by the following
213* examples:
214*
215* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
216*
217* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
218* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
219* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
220* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
221* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
222* ( v1 v2 v3 v4 v5 )
223*
224* where d and e denote diagonal and off-diagonal elements of B, vi
225* denotes an element of the vector defining H(i), and ui an element of
226* the vector defining G(i).
227*
228* Alignment requirements
229* ======================
230*
231* The distributed submatrix sub( A ) must verify some alignment proper-
232* ties, namely the following expressions should be true:
233* ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA )
234*
235* =====================================================================
236*
237* .. Parameters ..
238 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
239 $ lld_, mb_, m_, nb_, n_, rsrc_
240 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
241 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
242 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
243 REAL ONE, ZERO
244 parameter( one = 1.0e+0, zero = 0.0e+0 )
245* ..
246* .. Local Scalars ..
247 LOGICAL LQUERY
248 INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, II, IROFFA, J,
249 $ jj, k, lwmin, mpa0, mycol, myrow, npcol, nprow,
250 $ nqa0
251 REAL ALPHA
252* ..
253* .. Local Arrays ..
254 INTEGER DESCD( DLEN_ ), DESCE( DLEN_ )
255* ..
256* .. External Subroutines ..
257 EXTERNAL blacs_abort, blacs_gridinfo, chk1mat, descset,
259 $ pxerbla, sgebr2d, sgebs2d, slarfg
260* ..
261* .. External Functions ..
262 INTEGER INDXG2P, NUMROC
263 EXTERNAL indxg2p, numroc
264* ..
265* .. Intrinsic Functions ..
266 INTRINSIC max, min, mod, real
267* ..
268* .. Executable Statements ..
269*
270* Test the input parameters
271*
272 ictxt = desca( ctxt_ )
273 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
274*
275* Test the input parameters
276*
277 info = 0
278 IF( nprow.EQ.-1 ) THEN
279 info = -(600+ctxt_)
280 ELSE
281 CALL chk1mat( m, 1, n, 2, ia, ja, desca, 6, info )
282 IF( info.EQ.0 ) THEN
283 iroffa = mod( ia-1, desca( mb_ ) )
284 icoffa = mod( ja-1, desca( nb_ ) )
285 iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
286 $ nprow )
287 iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
288 $ npcol )
289 mpa0 = numroc( m+iroffa, desca( mb_ ), myrow, iarow, nprow )
290 nqa0 = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
291 lwmin = max( mpa0, nqa0 )
292*
293 work( 1 ) = real( lwmin )
294 lquery = ( lwork.EQ.-1 )
295 IF( iroffa.NE.icoffa ) THEN
296 info = -5
297 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
298 info = -(600+nb_)
299 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
300 info = -12
301 END IF
302 END IF
303 END IF
304*
305 IF( info.LT.0 ) THEN
306 CALL pxerbla( ictxt, 'PSGEBD2', -info )
307 CALL blacs_abort( ictxt, 1 )
308 RETURN
309 ELSE IF( lquery ) THEN
310 RETURN
311 END IF
312*
313 CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, ii, jj,
314 $ iarow, iacol )
315*
316 IF( m.EQ.1 .AND. n.EQ.1 ) THEN
317 IF( mycol.EQ.iacol ) THEN
318 IF( myrow.EQ.iarow ) THEN
319 i = ii+(jj-1)*desca( lld_ )
320 CALL slarfg( 1, a( i ), a( i ), 1, tauq( jj ) )
321 d( jj ) = a( i )
322 CALL sgebs2d( ictxt, 'Columnwise', ' ', 1, 1, d( jj ),
323 $ 1 )
324 CALL sgebs2d( ictxt, 'Columnwise', ' ', 1, 1, tauq( jj ),
325 $ 1 )
326 ELSE
327 CALL sgebr2d( ictxt, 'Columnwise', ' ', 1, 1, d( jj ),
328 $ 1, iarow, iacol )
329 CALL sgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauq( jj ),
330 $ 1, iarow, iacol )
331 END IF
332 END IF
333 IF( myrow.EQ.iarow )
334 $ taup( ii ) = zero
335 RETURN
336 END IF
337*
338 alpha = zero
339*
340 IF( m.GE.n ) THEN
341*
342* Reduce to upper bidiagonal form
343*
344 CALL descset( descd, 1, ja+min(m,n)-1, 1, desca( nb_ ), myrow,
345 $ desca( csrc_ ), desca( ctxt_ ), 1 )
346 CALL descset( desce, ia+min(m,n)-1, 1, desca( mb_ ), 1,
347 $ desca( rsrc_ ), mycol, desca( ctxt_ ),
348 $ desca( lld_ ) )
349 DO 10 k = 1, n
350 i = ia + k - 1
351 j = ja + k - 1
352*
353* Generate elementary reflector H(j) to annihilate
354* A(ia+i:ia+m-1,j)
355*
356 CALL pslarfg( m-k+1, alpha, i, j, a, min( i+1, m+ia-1 ),
357 $ j, desca, 1, tauq )
358 CALL pselset( d, 1, j, descd, alpha )
359 CALL pselset( a, i, j, desca, one )
360*
361* Apply H(i) to A(i:ia+m-1,i+1:ja+n-1) from the left
362*
363 CALL pslarf( 'Left', m-k+1, n-k, a, i, j, desca, 1, tauq, a,
364 $ i, j+1, desca, work )
365 CALL pselset( a, i, j, desca, alpha )
366*
367 IF( k.LT.n ) THEN
368*
369* Generate elementary reflector G(i) to annihilate
370* A(i,ja+j+1:ja+n-1)
371*
372 CALL pslarfg( n-k, alpha, i, j+1, a, i,
373 $ min( j+2, ja+n-1 ), desca, desca( m_ ),
374 $ taup )
375 CALL pselset( e, i, 1, desce, alpha )
376 CALL pselset( a, i, j+1, desca, one )
377*
378* Apply G(i) to A(i+1:ia+m-1,i+1:ja+n-1) from the right
379*
380 CALL pslarf( 'Right', m-k, n-k, a, i, j+1, desca,
381 $ desca( m_ ), taup, a, i+1, j+1, desca,
382 $ work )
383 CALL pselset( a, i, j+1, desca, alpha )
384 ELSE
385 CALL pselset( taup, i, 1, desce, zero )
386 END IF
387 10 CONTINUE
388*
389 ELSE
390*
391* Reduce to lower bidiagonal form
392*
393 CALL descset( descd, ia+min(m,n)-1, 1, desca( mb_ ), 1,
394 $ desca( rsrc_ ), mycol, desca( ctxt_ ),
395 $ desca( lld_ ) )
396 CALL descset( desce, 1, ja+min(m,n)-1, 1, desca( nb_ ), myrow,
397 $ desca( csrc_ ), desca( ctxt_ ), 1 )
398 DO 20 k = 1, m
399 i = ia + k - 1
400 j = ja + k - 1
401*
402* Generate elementary reflector G(i) to annihilate
403* A(i,ja+j:ja+n-1)
404*
405 CALL pslarfg( n-k+1, alpha, i, j, a, i,
406 $ min( j+1, ja+n-1 ), desca, desca( m_ ), taup )
407 CALL pselset( d, i, 1, descd, alpha )
408 CALL pselset( a, i, j, desca, one )
409*
410* Apply G(i) to A(i:ia+m-1,j:ja+n-1) from the right
411*
412 CALL pslarf( 'Right', m-k, n-k+1, a, i, j, desca,
413 $ desca( m_ ), taup, a, min( i+1, ia+m-1 ), j,
414 $ desca, work )
415 CALL pselset( a, i, j, desca, alpha )
416*
417 IF( k.LT.m ) THEN
418*
419* Generate elementary reflector H(i) to annihilate
420* A(i+2:ia+m-1,j)
421*
422 CALL pslarfg( m-k, alpha, i+1, j, a,
423 $ min( i+2, ia+m-1 ), j, desca, 1, tauq )
424 CALL pselset( e, 1, j, desce, alpha )
425 CALL pselset( a, i+1, j, desca, one )
426*
427* Apply H(i) to A(i+1:ia+m-1,j+1:ja+n-1) from the left
428*
429 CALL pslarf( 'Left', m-k, n-k, a, i+1, j, desca, 1, tauq,
430 $ a, i+1, j+1, desca, work )
431 CALL pselset( a, i+1, j, desca, alpha )
432 ELSE
433 CALL pselset( tauq, 1, j, desce, zero )
434 END IF
435 20 CONTINUE
436 END IF
437*
438 work( 1 ) = real( lwmin )
439*
440 RETURN
441*
442* End of PSGEBD2
443*
444 END
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pselset(a, ia, ja, desca, alpha)
Definition pselset.f:2
subroutine psgebd2(m, n, a, ia, ja, desca, d, e, tauq, taup, work, lwork, info)
Definition psgebd2.f:3
subroutine pslarf(side, m, n, v, iv, jv, descv, incv, tau, c, ic, jc, descc, work)
Definition pslarf.f:3
subroutine pslarfg(n, alpha, iax, jax, x, ix, jx, descx, incx, tau)
Definition pslarfg.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2