SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
psggrqf.f
Go to the documentation of this file.
1 SUBROUTINE psggrqf( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB,
2 $ DESCB, TAUB, WORK, LWORK, INFO )
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * ), DESCB( * )
14 REAL A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PSGGRQF computes a generalized RQ factorization of
21* an M-by-N matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
22* and a P-by-N matrix sub( B ) = B(IB:IB+P-1,JB:JB+N-1):
23*
24* sub( A ) = R*Q, sub( B ) = Z*T*Q,
25*
26* where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal
27* matrix, and R and T assume one of the forms:
28*
29* if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N,
30* N-M M ( R21 ) N
31* N
32*
33* where R12 or R21 is upper triangular, and
34*
35* if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P,
36* ( 0 ) P-N P N-P
37* N
38*
39* where T11 is upper triangular.
40*
41* In particular, if sub( B ) is square and nonsingular, the GRQ
42* factorization of sub( A ) and sub( B ) implicitly gives the RQ
43* factorization of sub( A )*inv( sub( B ) ):
44*
45* sub( A )*inv( sub( B ) ) = (R*inv(T))*Z'
46*
47* where inv( sub( B ) ) denotes the inverse of the matrix sub( B ),
48* and Z' denotes the transpose of matrix Z.
49*
50* Notes
51* =====
52*
53* Each global data object is described by an associated description
54* vector. This vector stores the information required to establish
55* the mapping between an object element and its corresponding process
56* and memory location.
57*
58* Let A be a generic term for any 2D block cyclicly distributed array.
59* Such a global array has an associated description vector DESCA.
60* In the following comments, the character _ should be read as
61* "of the global array".
62*
63* NOTATION STORED IN EXPLANATION
64* --------------- -------------- --------------------------------------
65* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
66* DTYPE_A = 1.
67* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
68* the BLACS process grid A is distribu-
69* ted over. The context itself is glo-
70* bal, but the handle (the integer
71* value) may vary.
72* M_A (global) DESCA( M_ ) The number of rows in the global
73* array A.
74* N_A (global) DESCA( N_ ) The number of columns in the global
75* array A.
76* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
77* the rows of the array.
78* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
79* the columns of the array.
80* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
81* row of the array A is distributed.
82* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
83* first column of the array A is
84* distributed.
85* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
86* array. LLD_A >= MAX(1,LOCr(M_A)).
87*
88* Let K be the number of rows or columns of a distributed matrix,
89* and assume that its process grid has dimension p x q.
90* LOCr( K ) denotes the number of elements of K that a process
91* would receive if K were distributed over the p processes of its
92* process column.
93* Similarly, LOCc( K ) denotes the number of elements of K that a
94* process would receive if K were distributed over the q processes of
95* its process row.
96* The values of LOCr() and LOCc() may be determined via a call to the
97* ScaLAPACK tool function, NUMROC:
98* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
99* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
100* An upper bound for these quantities may be computed by:
101* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
102* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
103*
104* Arguments
105* =========
106*
107* M (global input) INTEGER
108* The number of rows to be operated on i.e the number of
109* rows of the distributed submatrix sub( A ). M >= 0.
110*
111* P (global input) INTEGER
112* The number of rows to be operated on i.e the number of
113* rows of the distributed submatrix sub( B ). P >= 0.
114*
115* N (global input) INTEGER
116* The number of columns to be operated on i.e the number of
117* columns of the distributed submatrices sub( A ) and sub( B ).
118* N >= 0.
119*
120* A (local input/local output) REAL pointer into the
121* local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
122* On entry, the local pieces of the M-by-N distributed matrix
123* sub( A ) which is to be factored. On exit, if M <= N, the
124* upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the
125* M by M upper triangular matrix R; if M >= N, the elements on
126* and above the (M-N)-th subdiagonal contain the M by N upper
127* trapezoidal matrix R; the remaining elements, with the array
128* TAUA, represent the orthogonal matrix Q as a product of
129* elementary reflectors (see Further Details).
130*
131* IA (global input) INTEGER
132* The row index in the global array A indicating the first
133* row of sub( A ).
134*
135* JA (global input) INTEGER
136* The column index in the global array A indicating the
137* first column of sub( A ).
138*
139* DESCA (global and local input) INTEGER array of dimension DLEN_.
140* The array descriptor for the distributed matrix A.
141*
142* TAUA (local output) REAL, array, dimension LOCr(IA+M-1)
143* This array contains the scalar factors of the elementary
144* reflectors which represent the orthogonal unitary matrix Q.
145* TAUA is tied to the distributed matrix A (see Further
146* Details).
147*
148* B (local input/local output) REAL pointer into the
149* local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
150* On entry, the local pieces of the P-by-N distributed matrix
151* sub( B ) which is to be factored. On exit, the elements on
152* and above the diagonal of sub( B ) contain the min(P,N) by N
153* upper trapezoidal matrix T (T is upper triangular if P >= N);
154* the elements below the diagonal, with the array TAUB,
155* represent the orthogonal matrix Z as a product of elementary
156* reflectors (see Further Details).
157*
158* IB (global input) INTEGER
159* The row index in the global array B indicating the first
160* row of sub( B ).
161*
162* JB (global input) INTEGER
163* The column index in the global array B indicating the
164* first column of sub( B ).
165*
166* DESCB (global and local input) INTEGER array of dimension DLEN_.
167* The array descriptor for the distributed matrix B.
168*
169* TAUB (local output) REAL, array, dimension
170* LOCc(JB+MIN(P,N)-1). This array contains the scalar factors
171* TAUB of the elementary reflectors which represent the
172* orthogonal matrix Z. TAUB is tied to the distributed matrix
173* B (see Further Details).
174*
175* WORK (local workspace/local output) REAL array,
176* dimension (LWORK)
177* On exit, WORK(1) returns the minimal and optimal LWORK.
178*
179* LWORK (local or global input) INTEGER
180* The dimension of the array WORK.
181* LWORK is local input and must be at least
182* LWORK >= MAX( MB_A * ( MpA0 + NqA0 + MB_A ),
183* MAX( (MB_A*(MB_A-1))/2, (PpB0 + NqB0)*MB_A ) +
184* MB_A * MB_A,
185* NB_B * ( PpB0 + NqB0 + NB_B ) ), where
186*
187* IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
188* IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
189* IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
190* MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
191* NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
192*
193* IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ),
194* IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ),
195* IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ),
196* PpB0 = NUMROC( P+IROFFB, MB_B, MYROW, IBROW, NPROW ),
197* NqB0 = NUMROC( N+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ),
198*
199* and NUMROC, INDXG2P are ScaLAPACK tool functions;
200* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
201* the subroutine BLACS_GRIDINFO.
202*
203* If LWORK = -1, then LWORK is global input and a workspace
204* query is assumed; the routine only calculates the minimum
205* and optimal size for all work arrays. Each of these
206* values is returned in the first entry of the corresponding
207* work array, and no error message is issued by PXERBLA.
208*
209* INFO (global output) INTEGER
210* = 0: successful exit
211* < 0: If the i-th argument is an array and the j-entry had
212* an illegal value, then INFO = -(i*100+j), if the i-th
213* argument is a scalar and had an illegal value, then
214* INFO = -i.
215*
216* Further Details
217* ===============
218*
219* The matrix Q is represented as a product of elementary reflectors
220*
221* Q = H(ia) H(ia+1) . . . H(ia+k-1), where k = min(m,n).
222*
223* Each H(i) has the form
224*
225* H(i) = I - taua * v * v'
226*
227* where taua is a real scalar, and v is a real vector with
228* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in
229* A(ia+m-k+i-1,ja:ja+n-k+i-2), and taua in TAUA(ia+m-k+i-1).
230* To form Q explicitly, use ScaLAPACK subroutine PSORGRQ.
231* To use Q to update another matrix, use ScaLAPACK subroutine PSORMRQ.
232*
233* The matrix Z is represented as a product of elementary reflectors
234*
235* Z = H(jb) H(jb+1) . . . H(jb+k-1), where k = min(p,n).
236*
237* Each H(i) has the form
238*
239* H(i) = I - taub * v * v'
240*
241* where taub is a real scalar, and v is a real vector with
242* v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in
243* B(ib+i:ib+p-1,jb+i-1), and taub in TAUB(jb+i-1).
244* To form Z explicitly, use ScaLAPACK subroutine PSORGQR.
245* To use Z to update another matrix, use ScaLAPACK subroutine PSORMQR.
246*
247* Alignment requirements
248* ======================
249*
250* The distributed submatrices sub( A ) and sub( B ) must verify some
251* alignment properties, namely the following expression should be true:
252*
253* ( NB_A.EQ.NB_B .AND. ICOFFA.EQ.ICOFFB .AND. IACOL.EQ.IBCOL )
254*
255* =====================================================================
256*
257* .. Parameters ..
258 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
259 $ lld_, mb_, m_, nb_, n_, rsrc_
260 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
261 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
262 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
263* .. Local Scalars ..
264 LOGICAL LQUERY
265 INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB,
266 $ ictxt, iroffa, iroffb, lwmin, mpa0, mycol,
267 $ myrow, npcol, nprow, nqa0, nqb0, ppb0
268* ..
269* .. External Subroutines ..
270 EXTERNAL blacs_gridinfo, chk1mat, pchk2mat, psgeqrf,
272* ..
273* .. Local Arrays ..
274 INTEGER IDUM1( 1 ), IDUM2( 1 )
275* ..
276* .. External Functions ..
277 INTEGER INDXG2P, NUMROC
278 EXTERNAL indxg2p, numroc
279* ..
280* .. Intrinsic Functions ..
281 INTRINSIC int, max, min, mod, real
282* ..
283* .. Executable Statements ..
284*
285* Get grid parameters
286*
287 ictxt = desca( ctxt_ )
288 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
289*
290* Test the input parameters
291*
292 info = 0
293 IF( nprow.EQ.-1 ) THEN
294 info = -707
295 ELSE
296 CALL chk1mat( m, 1, n, 3, ia, ja, desca, 7, info )
297 CALL chk1mat( p, 2, n, 3, ib, jb, descb, 12, info )
298 IF( info.EQ.0 ) THEN
299 iroffa = mod( ia-1, desca( mb_ ) )
300 icoffa = mod( ja-1, desca( nb_ ) )
301 iroffb = mod( ib-1, descb( mb_ ) )
302 icoffb = mod( jb-1, descb( nb_ ) )
303 iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
304 $ nprow )
305 iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
306 $ npcol )
307 ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),
308 $ nprow )
309 ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),
310 $ npcol )
311 mpa0 = numroc( m+iroffa, desca( mb_ ), myrow, iarow, nprow )
312 nqa0 = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
313 ppb0 = numroc( p+iroffb, descb( mb_ ), myrow, ibrow, nprow )
314 nqb0 = numroc( n+icoffb, descb( nb_ ), mycol, ibcol, npcol )
315 lwmin = max( desca( mb_ ) * ( mpa0 + nqa0 + desca( mb_ ) ),
316 $ max( max( ( desca( mb_ )*( desca( mb_ ) - 1 ) ) / 2,
317 $ ( ppb0 + nqb0 ) * desca( mb_ ) ) +
318 $ desca( mb_ ) * desca( mb_ ),
319 $ descb( nb_ ) * ( ppb0 + nqb0 + descb( nb_ ) ) ) )
320*
321 work( 1 ) = real( lwmin )
322 lquery = ( lwork.EQ.-1 )
323 IF( iacol.NE.ibcol .OR. icoffa.NE.icoffb ) THEN
324 info = -11
325 ELSE IF( desca( nb_ ).NE.descb( nb_ ) ) THEN
326 info = -1204
327 ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
328 info = -1207
329 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
330 info = -15
331 END IF
332 END IF
333 IF( lwork.EQ.-1 ) THEN
334 idum1( 1 ) = -1
335 ELSE
336 idum1( 1 ) = 1
337 END IF
338 idum2( 1 ) = 15
339 CALL pchk2mat( m, 1, n, 3, ia, ja, desca, 7, p, 2, n, 3, ib,
340 $ jb, descb, 12, 1, idum1, idum2, info )
341 END IF
342*
343 IF( info.NE.0 ) THEN
344 CALL pxerbla( ictxt, 'PSGGRQF', -info )
345 RETURN
346 ELSE IF( lquery ) THEN
347 RETURN
348 END IF
349*
350* RQ factorization of M-by-N matrix sub( A ): sub( A ) = R*Q
351*
352 CALL psgerqf( m, n, a, ia, ja, desca, taua, work, lwork, info )
353 lwmin = int( work( 1 ) )
354*
355* Update sub( B ) := sub( B )*Q'
356*
357 CALL psormrq( 'Right', 'Transpose', p, n, min( m, n ), a,
358 $ max( ia, ia+m-n ), ja, desca, taua, b, ib, jb,
359 $ descb, work, lwork, info )
360 lwmin = max( lwmin, int( work( 1 ) ) )
361*
362* QR factorization of P-by-N matrix sub( B ): sub( B ) = Z*T
363*
364 CALL psgeqrf( p, n, b, ib, jb, descb, taub, work, lwork, info )
365 work( 1 ) = real( max( lwmin, int( work( 1 ) ) ) )
366*
367 RETURN
368*
369* End of PSGGRQF
370*
371 END
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
subroutine psgeqrf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition psgeqrf.f:3
subroutine psgerqf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition psgerqf.f:3
subroutine psggrqf(m, p, n, a, ia, ja, desca, taua, b, ib, jb, descb, taub, work, lwork, info)
Definition psggrqf.f:3
subroutine psormrq(side, trans, m, n, k, a, ia, ja, desca, tau, c, ic, jc, descc, work, lwork, info)
Definition psormrq.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2