SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pcgebd2.f
Go to the documentation of this file.
1 SUBROUTINE pcgebd2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP,
2 $ WORK, LWORK, INFO )
3*
4* -- ScaLAPACK auxiliary routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER IA, INFO, JA, LWORK, M, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 REAL D( * ), E( * )
15 COMPLEX A( * ), TAUP( * ), TAUQ( * ), WORK( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PCGEBD2 reduces a complex general M-by-N distributed matrix
22* sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper or lower bidiagonal
23* form B by an unitary transformation: Q' * sub( A ) * P = B.
24*
25* If M >= N, B is upper bidiagonal; if M < N, B is lower bidiagonal.
26*
27* Notes
28* =====
29*
30* Each global data object is described by an associated description
31* vector. This vector stores the information required to establish
32* the mapping between an object element and its corresponding process
33* and memory location.
34*
35* Let A be a generic term for any 2D block cyclicly distributed array.
36* Such a global array has an associated description vector DESCA.
37* In the following comments, the character _ should be read as
38* "of the global array".
39*
40* NOTATION STORED IN EXPLANATION
41* --------------- -------------- --------------------------------------
42* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43* DTYPE_A = 1.
44* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
45* the BLACS process grid A is distribu-
46* ted over. The context itself is glo-
47* bal, but the handle (the integer
48* value) may vary.
49* M_A (global) DESCA( M_ ) The number of rows in the global
50* array A.
51* N_A (global) DESCA( N_ ) The number of columns in the global
52* array A.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
54* the rows of the array.
55* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
56* the columns of the array.
57* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
58* row of the array A is distributed.
59* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60* first column of the array A is
61* distributed.
62* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63* array. LLD_A >= MAX(1,LOCr(M_A)).
64*
65* Let K be the number of rows or columns of a distributed matrix,
66* and assume that its process grid has dimension p x q.
67* LOCr( K ) denotes the number of elements of K that a process
68* would receive if K were distributed over the p processes of its
69* process column.
70* Similarly, LOCc( K ) denotes the number of elements of K that a
71* process would receive if K were distributed over the q processes of
72* its process row.
73* The values of LOCr() and LOCc() may be determined via a call to the
74* ScaLAPACK tool function, NUMROC:
75* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77* An upper bound for these quantities may be computed by:
78* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80*
81* Arguments
82* =========
83*
84* M (global input) INTEGER
85* The number of rows to be operated on, i.e. the number of rows
86* of the distributed submatrix sub( A ). M >= 0.
87*
88* N (global input) INTEGER
89* The number of columns to be operated on, i.e. the number of
90* columns of the distributed submatrix sub( A ). N >= 0.
91*
92* A (local input/local output) COMPLEX pointer into the
93* local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
94* On entry, this array contains the local pieces of the
95* general distributed matrix sub( A ). On exit, if M >= N,
96* the diagonal and the first superdiagonal of sub( A ) are
97* overwritten with the upper bidiagonal matrix B; the elements
98* below the diagonal, with the array TAUQ, represent the
99* unitary matrix Q as a product of elementary reflectors, and
100* the elements above the first superdiagonal, with the array
101* TAUP, represent the orthogonal matrix P as a product of
102* elementary reflectors. If M < N, the diagonal and the first
103* subdiagonal are overwritten with the lower bidiagonal
104* matrix B; the elements below the first subdiagonal, with the
105* array TAUQ, represent the unitary matrix Q as a product of
106* elementary reflectors, and the elements above the diagonal,
107* with the array TAUP, represent the orthogonal matrix P as a
108* product of elementary reflectors. See Further Details.
109*
110* IA (global input) INTEGER
111* The row index in the global array A indicating the first
112* row of sub( A ).
113*
114* JA (global input) INTEGER
115* The column index in the global array A indicating the
116* first column of sub( A ).
117*
118* DESCA (global and local input) INTEGER array of dimension DLEN_.
119* The array descriptor for the distributed matrix A.
120*
121* D (local output) REAL array, dimension
122* LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise.
123* The distributed diagonal elements of the bidiagonal matrix
124* B: D(i) = A(i,i). D is tied to the distributed matrix A.
125*
126* E (local output) REAL array, dimension
127* LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise.
128* The distributed off-diagonal elements of the bidiagonal
129* distributed matrix B:
130* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
131* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
132* E is tied to the distributed matrix A.
133*
134* TAUQ (local output) COMPLEX array dimension
135* LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary
136* reflectors which represent the unitary matrix Q. TAUQ is
137* tied to the distributed matrix A. See Further Details.
138*
139* TAUP (local output) COMPLEX array, dimension
140* LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary
141* reflectors which represent the unitary matrix P. TAUP is
142* tied to the distributed matrix A. See Further Details.
143*
144* WORK (local workspace/local output) COMPLEX array,
145* dimension (LWORK)
146* On exit, WORK(1) returns the minimal and optimal LWORK.
147*
148* LWORK (local or global input) INTEGER
149* The dimension of the array WORK.
150* LWORK is local input and must be at least
151* LWORK >= MAX( MpA0, NqA0 )
152*
153* where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB )
154* IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ),
155* IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ),
156* MpA0 = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ),
157* NqA0 = NUMROC( N+IROFFA, NB, MYCOL, IACOL, NPCOL ).
158*
159* INDXG2P and NUMROC are ScaLAPACK tool functions;
160* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
161* the subroutine BLACS_GRIDINFO.
162*
163* If LWORK = -1, then LWORK is global input and a workspace
164* query is assumed; the routine only calculates the minimum
165* and optimal size for all work arrays. Each of these
166* values is returned in the first entry of the corresponding
167* work array, and no error message is issued by PXERBLA.
168*
169* INFO (local output) INTEGER
170* = 0: successful exit
171* < 0: If the i-th argument is an array and the j-entry had
172* an illegal value, then INFO = -(i*100+j), if the i-th
173* argument is a scalar and had an illegal value, then
174* INFO = -i.
175*
176* Further Details
177* ===============
178*
179* The matrices Q and P are represented as products of elementary
180* reflectors:
181*
182* If m >= n,
183*
184* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
185*
186* Each H(i) and G(i) has the form:
187*
188* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
189*
190* where tauq and taup are complex scalars, and v and u are complex
191* vectors;
192* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
193* A(ia+i:ia+m-1,ja+i-1);
194* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
195* A(ia+i-1,ja+i+1:ja+n-1);
196* tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1).
197*
198* If m < n,
199*
200* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
201*
202* Each H(i) and G(i) has the form:
203*
204* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
205*
206* where tauq and taup are complex scalars, and v and u are complex
207* vectors;
208* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in
209* A(ia+i+1:ia+m-1,ja+i-1);
210* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in
211* A(ia+i-1,ja+i:ja+n-1);
212* tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1).
213*
214* The contents of sub( A ) on exit are illustrated by the following
215* examples:
216*
217* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
218*
219* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
220* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
221* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
222* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
223* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
224* ( v1 v2 v3 v4 v5 )
225*
226* where d and e denote diagonal and off-diagonal elements of B, vi
227* denotes an element of the vector defining H(i), and ui an element of
228* the vector defining G(i).
229*
230* Alignment requirements
231* ======================
232*
233* The distributed submatrix sub( A ) must verify some alignment proper-
234* ties, namely the following expressions should be true:
235* ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA )
236*
237* =====================================================================
238*
239* .. Parameters ..
240 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
241 $ lld_, mb_, m_, nb_, n_, rsrc_
242 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
243 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
244 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
245 COMPLEX ONE, ZERO
246 parameter( one = ( 1.0e+0, 0.0e+0 ),
247 $ zero = ( 0.0e+0, 0.0e+0 ) )
248* ..
249* .. Local Scalars ..
250 LOGICAL LQUERY
251 INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, II, IROFFA, J,
252 $ jj, k, lwmin, mpa0, mycol, myrow, npcol, nprow,
253 $ nqa0
254 COMPLEX ALPHA
255* ..
256* .. Local Arrays ..
257 INTEGER DESCD( DLEN_ ), DESCE( DLEN_ )
258* ..
259* .. External Subroutines ..
260 EXTERNAL blacs_abort, blacs_gridinfo, cgebr2d,
261 $ cgebs2d, chk1mat, clarfg, descset, infog2l,
263 $ pclarfg, pselset, pxerbla, sgebr2d,
264 $ sgebs2d
265* ..
266* .. External Functions ..
267 INTEGER INDXG2P, NUMROC
268 EXTERNAL indxg2p, numroc
269* ..
270* .. Intrinsic Functions ..
271 INTRINSIC cmplx, max, min, mod, real
272* ..
273* .. Executable Statements ..
274*
275* Test the input parameters
276*
277 ictxt = desca( ctxt_ )
278 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
279*
280* Test the input parameters
281*
282 info = 0
283 IF( nprow.EQ.-1 ) THEN
284 info = -(600+ctxt_)
285 ELSE
286 CALL chk1mat( m, 1, n, 2, ia, ja, desca, 6, info )
287 IF( info.EQ.0 ) THEN
288 iroffa = mod( ia-1, desca( mb_ ) )
289 icoffa = mod( ja-1, desca( nb_ ) )
290 iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
291 $ nprow )
292 iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
293 $ npcol )
294 mpa0 = numroc( m+iroffa, desca( mb_ ), myrow, iarow, nprow )
295 nqa0 = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
296 lwmin = max( mpa0, nqa0 )
297*
298 work( 1 ) = cmplx( real( lwmin ) )
299 lquery = ( lwork.EQ.-1 )
300 IF( iroffa.NE.icoffa ) THEN
301 info = -5
302 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
303 info = -(600+nb_)
304 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
305 info = -12
306 END IF
307 END IF
308 END IF
309*
310 IF( info.LT.0 ) THEN
311 CALL pxerbla( ictxt, 'PCGEBD2', -info )
312 CALL blacs_abort( ictxt, 1 )
313 RETURN
314 ELSE IF( lquery ) THEN
315 RETURN
316 END IF
317*
318 CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, ii, jj,
319 $ iarow, iacol )
320*
321 IF( m.EQ.1 .AND. n.EQ.1 ) THEN
322 IF( mycol.EQ.iacol ) THEN
323 IF( myrow.EQ.iarow ) THEN
324 i = ii+(jj-1)*desca( lld_ )
325 CALL clarfg( 1, a( i ), a( i ), 1, tauq( jj ) )
326 d( jj ) = real( a( i ) )
327 CALL sgebs2d( ictxt, 'Columnwise', ' ', 1, 1, d( jj ),
328 $ 1 )
329 CALL cgebs2d( ictxt, 'Columnwise', ' ', 1, 1, tauq( jj ),
330 $ 1 )
331 ELSE
332 CALL sgebr2d( ictxt, 'Columnwise', ' ', 1, 1, d( jj ),
333 $ 1, iarow, iacol )
334 CALL cgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauq( jj ),
335 $ 1, iarow, iacol )
336 END IF
337 END IF
338 IF( myrow.EQ.iarow )
339 $ taup( ii ) = zero
340 RETURN
341 END IF
342*
343 alpha = zero
344*
345 IF( m.GE.n ) THEN
346*
347* Reduce to upper bidiagonal form
348*
349 CALL descset( descd, 1, ja+min(m,n)-1, 1, desca( nb_ ), myrow,
350 $ desca( csrc_ ), desca( ctxt_ ), 1 )
351 CALL descset( desce, ia+min(m,n)-1, 1, desca( mb_ ), 1,
352 $ desca( rsrc_ ), mycol, desca( ctxt_ ),
353 $ desca( lld_ ) )
354 DO 10 k = 1, n
355 i = ia + k - 1
356 j = ja + k - 1
357*
358* Generate elementary reflector H(j) to annihilate
359* A(ia+i:ia+m-1,j)
360*
361 CALL pclarfg( m-k+1, alpha, i, j, a, min( i+1, m+ia-1 ),
362 $ j, desca, 1, tauq )
363 CALL pselset( d, 1, j, descd, real( alpha ) )
364 CALL pcelset( a, i, j, desca, one )
365*
366* Apply H(i) to A(i:ia+m-1,i+1:ja+n-1) from the left
367*
368 CALL pclarfc( 'Left', m-k+1, n-k, a, i, j, desca, 1, tauq,
369 $ a, i, j+1, desca, work )
370 CALL pcelset( a, i, j, desca, cmplx( real( alpha ) ) )
371*
372 IF( k.LT.n ) THEN
373*
374* Generate elementary reflector G(i) to annihilate
375* A(i,ja+j+1:ja+n-1)
376*
377 CALL pclacgv( n-k, a, i, j+1, desca, desca( m_ ) )
378 CALL pclarfg( n-k, alpha, i, j+1, a, i,
379 $ min( j+2, ja+n-1 ), desca, desca( m_ ),
380 $ taup )
381 CALL pselset( e, i, 1, desce, real( alpha ) )
382 CALL pcelset( a, i, j+1, desca, one )
383*
384* Apply G(i) to A(i+1:ia+m-1,i+1:ja+n-1) from the right
385*
386 CALL pclarf( 'Right', m-k, n-k, a, i, j+1, desca,
387 $ desca( m_ ), taup, a, i+1, j+1, desca,
388 $ work )
389 CALL pcelset( a, i, j+1, desca, cmplx( real( alpha ) ) )
390 CALL pclacgv( n-k, a, i, j+1, desca, desca( m_ ) )
391 ELSE
392 CALL pcelset( taup, i, 1, desce, zero )
393 END IF
394 10 CONTINUE
395*
396 ELSE
397*
398* Reduce to lower bidiagonal form
399*
400 CALL descset( descd, ia+min(m,n)-1, 1, desca( mb_ ), 1,
401 $ desca( rsrc_ ), mycol, desca( ctxt_ ),
402 $ desca( lld_ ) )
403 CALL descset( desce, 1, ja+min(m,n)-1, 1, desca( nb_ ), myrow,
404 $ desca( csrc_ ), desca( ctxt_ ), 1 )
405 DO 20 k = 1, m
406 i = ia + k - 1
407 j = ja + k - 1
408*
409* Generate elementary reflector G(i) to annihilate
410* A(i,ja+j:ja+n-1)
411*
412 CALL pclacgv( n-k+1, a, i, j, desca, desca( m_ ) )
413 CALL pclarfg( n-k+1, alpha, i, j, a, i,
414 $ min( j+1, ja+n-1 ), desca, desca( m_ ), taup )
415 CALL pselset( d, i, 1, descd, real( alpha ) )
416 CALL pcelset( a, i, j, desca, one )
417*
418* Apply G(i) to A(i:ia+m-1,j:ja+n-1) from the right
419*
420 CALL pclarf( 'Right', m-k, n-k+1, a, i, j, desca,
421 $ desca( m_ ), taup, a, min( i+1, ia+m-1 ), j,
422 $ desca, work )
423 CALL pcelset( a, i, j, desca, cmplx( real( alpha ) ) )
424 CALL pclacgv( n-k+1, a, i, j, desca, desca( m_ ) )
425*
426 IF( k.LT.m ) THEN
427*
428* Generate elementary reflector H(i) to annihilate
429* A(i+2:ia+m-1,j)
430*
431 CALL pclarfg( m-k, alpha, i+1, j, a,
432 $ min( i+2, ia+m-1 ), j, desca, 1, tauq )
433 CALL pselset( e, 1, j, desce, real( alpha ) )
434 CALL pcelset( a, i+1, j, desca, one )
435*
436* Apply H(i) to A(i+1:ia+m-1,j+1:ja+n-1) from the left
437*
438 CALL pclarfc( 'Left', m-k, n-k, a, i+1, j, desca, 1,
439 $ tauq, a, i+1, j+1, desca, work )
440 CALL pcelset( a, i+1, j, desca, cmplx( real( alpha ) ) )
441 ELSE
442 CALL pcelset( tauq, 1, j, desce, zero )
443 END IF
444 20 CONTINUE
445 END IF
446*
447 work( 1 ) = cmplx( real( lwmin ) )
448*
449 RETURN
450*
451* End of PCGEBD2
452*
453 END
float cmplx[2]
Definition pblas.h:136
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pcelset(a, ia, ja, desca, alpha)
Definition pcelset.f:2
subroutine pcgebd2(m, n, a, ia, ja, desca, d, e, tauq, taup, work, lwork, info)
Definition pcgebd2.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pclacgv(n, x, ix, jx, descx, incx)
Definition pclacgv.f:2
subroutine pclarf(side, m, n, v, iv, jv, descv, incv, tau, c, ic, jc, descc, work)
Definition pclarf.f:3
subroutine pclarfc(side, m, n, v, iv, jv, descv, incv, tau, c, ic, jc, descc, work)
Definition pclarfc.f:3
subroutine pclarfg(n, alpha, iax, jax, x, ix, jx, descx, incx, tau)
Definition pclarfg.f:3
subroutine pselset(a, ia, ja, desca, alpha)
Definition pselset.f:2
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2