ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
pzggrqf.f
Go to the documentation of this file.
1  SUBROUTINE pzggrqf( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB,
2  \$ DESCB, TAUB, WORK, LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * ), DESCB( * )
14  COMPLEX*16 A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PZGGRQF computes a generalized RQ factorization of
21 * an M-by-N matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
22 * and a P-by-N matrix sub( B ) = B(IB:IB+P-1,JB:JB+N-1):
23 *
24 * sub( A ) = R*Q, sub( B ) = Z*T*Q,
25 *
26 * where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix,
27 * and R and T assume one of the forms:
28 *
29 * if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N,
30 * N-M M ( R21 ) N
31 * N
32 *
33 * where R12 or R21 is upper triangular, and
34 *
35 * if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P,
36 * ( 0 ) P-N P N-P
37 * N
38 *
39 * where T11 is upper triangular.
40 *
41 * In particular, if sub( B ) is square and nonsingular, the GRQ
42 * factorization of sub( A ) and sub( B ) implicitly gives the RQ
43 * factorization of sub( A )*inv( sub( B ) ):
44 *
45 * sub( A )*inv( sub( B ) ) = (R*inv(T))*Z'
46 *
47 * where inv( sub( B ) ) denotes the inverse of the matrix sub( B ),
48 * and Z' denotes the conjugate transpose of matrix Z.
49 *
50 * Notes
51 * =====
52 *
53 * Each global data object is described by an associated description
54 * vector. This vector stores the information required to establish
55 * the mapping between an object element and its corresponding process
56 * and memory location.
57 *
58 * Let A be a generic term for any 2D block cyclicly distributed array.
59 * Such a global array has an associated description vector DESCA.
60 * In the following comments, the character _ should be read as
61 * "of the global array".
62 *
63 * NOTATION STORED IN EXPLANATION
64 * --------------- -------------- --------------------------------------
65 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
66 * DTYPE_A = 1.
67 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
68 * the BLACS process grid A is distribu-
69 * ted over. The context itself is glo-
70 * bal, but the handle (the integer
71 * value) may vary.
72 * M_A (global) DESCA( M_ ) The number of rows in the global
73 * array A.
74 * N_A (global) DESCA( N_ ) The number of columns in the global
75 * array A.
76 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
77 * the rows of the array.
78 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
79 * the columns of the array.
80 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
81 * row of the array A is distributed.
82 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
83 * first column of the array A is
84 * distributed.
85 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
86 * array. LLD_A >= MAX(1,LOCr(M_A)).
87 *
88 * Let K be the number of rows or columns of a distributed matrix,
89 * and assume that its process grid has dimension p x q.
90 * LOCr( K ) denotes the number of elements of K that a process
91 * would receive if K were distributed over the p processes of its
92 * process column.
93 * Similarly, LOCc( K ) denotes the number of elements of K that a
94 * process would receive if K were distributed over the q processes of
95 * its process row.
96 * The values of LOCr() and LOCc() may be determined via a call to the
97 * ScaLAPACK tool function, NUMROC:
98 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
99 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
100 * An upper bound for these quantities may be computed by:
101 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
102 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
103 *
104 * Arguments
105 * =========
106 *
107 * M (global input) INTEGER
108 * The number of rows to be operated on i.e the number of
109 * rows of the distributed submatrix sub( A ). M >= 0.
110 *
111 * P (global input) INTEGER
112 * The number of rows to be operated on i.e the number of
113 * rows of the distributed submatrix sub( B ). P >= 0.
114 *
115 * N (global input) INTEGER
116 * The number of columns to be operated on i.e the number of
117 * columns of the distributed submatrices sub( A ) and sub( B ).
118 * N >= 0.
119 *
120 * A (local input/local output) COMPLEX*16 pointer into the
121 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
122 * On entry, the local pieces of the M-by-N distributed matrix
123 * sub( A ) which is to be factored. On exit, if M <= N, the
124 * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the
125 * M by M upper triangular matrix R; if M >= N, the elements on
126 * and above the (M-N)-th subdiagonal contain the M by N upper
127 * trapezoidal matrix R; the remaining elements, with the array
128 * TAUA, represent the unitary matrix Q as a product of
129 * elementary reflectors (see Further Details).
130 *
131 * IA (global input) INTEGER
132 * The row index in the global array A indicating the first
133 * row of sub( A ).
134 *
135 * JA (global input) INTEGER
136 * The column index in the global array A indicating the
137 * first column of sub( A ).
138 *
139 * DESCA (global and local input) INTEGER array of dimension DLEN_.
140 * The array descriptor for the distributed matrix A.
141 *
142 * TAUA (local output) COMPLEX*16, array, dimension LOCr(IA+M-1)
143 * This array contains the scalar factors of the elementary
144 * reflectors which represent the unitary matrix Q. TAUA is
145 * tied to the distributed matrix A (see Further Details).
146 *
147 * B (local input/local output) COMPLEX*16 pointer into the
148 * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
149 * On entry, the local pieces of the P-by-N distributed matrix
150 * sub( B ) which is to be factored. On exit, the elements on
151 * and above the diagonal of sub( B ) contain the min(P,N) by N
152 * upper trapezoidal matrix T (T is upper triangular if P >= N);
153 * the elements below the diagonal, with the array TAUB,
154 * represent the unitary matrix Z as a product of elementary
155 * reflectors (see Further Details).
156 *
157 * IB (global input) INTEGER
158 * The row index in the global array B indicating the first
159 * row of sub( B ).
160 *
161 * JB (global input) INTEGER
162 * The column index in the global array B indicating the
163 * first column of sub( B ).
164 *
165 * DESCB (global and local input) INTEGER array of dimension DLEN_.
166 * The array descriptor for the distributed matrix B.
167 *
168 * TAUB (local output) COMPLEX*16, array, dimension
169 * LOCc(JB+MIN(P,N)-1). This array contains the scalar factors
170 * TAUB of the elementary reflectors which represent the unitary
171 * matrix Z. TAUB is tied to the distributed matrix B (see
172 * Further Details).
173 *
174 * WORK (local workspace/local output) COMPLEX*16 array,
175 * dimension (LWORK)
176 * On exit, WORK(1) returns the minimal and optimal LWORK.
177 *
178 * LWORK (local or global input) INTEGER
179 * The dimension of the array WORK.
180 * LWORK is local input and must be at least
181 * LWORK >= MAX( MB_A * ( MpA0 + NqA0 + MB_A ),
182 * MAX( (MB_A*(MB_A-1))/2, (PpB0 + NqB0)*MB_A ) +
183 * MB_A * MB_A,
184 * NB_B * ( PpB0 + NqB0 + NB_B ) ), where
185 *
186 * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
187 * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
188 * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
189 * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
190 * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
191 *
192 * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ),
193 * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ),
194 * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ),
195 * PpB0 = NUMROC( P+IROFFB, MB_B, MYROW, IBROW, NPROW ),
196 * NqB0 = NUMROC( N+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ),
197 *
198 * and NUMROC, INDXG2P are ScaLAPACK tool functions;
199 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
200 * the subroutine BLACS_GRIDINFO.
201 *
202 * If LWORK = -1, then LWORK is global input and a workspace
203 * query is assumed; the routine only calculates the minimum
204 * and optimal size for all work arrays. Each of these
205 * values is returned in the first entry of the corresponding
206 * work array, and no error message is issued by PXERBLA.
207 *
208 * INFO (global output) INTEGER
209 * = 0: successful exit
210 * < 0: If the i-th argument is an array and the j-entry had
211 * an illegal value, then INFO = -(i*100+j), if the i-th
212 * argument is a scalar and had an illegal value, then
213 * INFO = -i.
214 *
215 * Further Details
216 * ===============
217 *
218 * The matrix Q is represented as a product of elementary reflectors
219 *
220 * Q = H(ia)' H(ia+1)' . . . H(ia+k-1)', where k = min(m,n).
221 *
222 * Each H(i) has the form
223 *
224 * H(i) = I - taua * v * v'
225 *
226 * where taua is a complex scalar, and v is a complex vector with
227 * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on
228 * exit in A(ia+m-k+i-1,ja:ja+n-k+i-2), and taua in TAUA(ia+m-k+i-1).
229 * To form Q explicitly, use ScaLAPACK subroutine PZUNGRQ.
230 * To use Q to update another matrix, use ScaLAPACK subroutine PZUNMRQ.
231 *
232 * The matrix Z is represented as a product of elementary reflectors
233 *
234 * Z = H(jb) H(jb+1) . . . H(jb+k-1), where k = min(p,n).
235 *
236 * Each H(i) has the form
237 *
238 * H(i) = I - taub * v * v'
239 *
240 * where taub is a complex scalar, and v is a complex vector with
241 * v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in
242 * B(ib+i:ib+p-1,jb+i-1), and taub in TAUB(jb+i-1).
243 * To form Z explicitly, use ScaLAPACK subroutine PZUNGQR.
244 * To use Z to update another matrix, use ScaLAPACK subroutine PZUNMQR.
245 *
246 * Alignment requirements
247 * ======================
248 *
249 * The distributed submatrices sub( A ) and sub( B ) must verify some
250 * alignment properties, namely the following expression should be true:
251 *
252 * ( NB_A.EQ.NB_B .AND. ICOFFA.EQ.ICOFFB .AND. IACOL.EQ.IBCOL )
253 *
254 * =====================================================================
255 *
256 * .. Parameters ..
257  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
258  \$ lld_, mb_, m_, nb_, n_, rsrc_
259  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
260  \$ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
261  \$ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
262 * .. Local Scalars ..
263  LOGICAL LQUERY
264  INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB,
265  \$ ictxt, iroffa, iroffb, lwmin, mpa0, mycol,
266  \$ myrow, npcol, nprow, nqa0, nqb0, ppb0
267 * ..
268 * .. External Subroutines ..
269  EXTERNAL blacs_gridinfo, chk1mat, pchk2mat, pxerbla,
271 * ..
272 * .. Local Arrays ..
273  INTEGER IDUM1( 1 ), IDUM2( 1 )
274 * ..
275 * .. External Functions ..
276  INTEGER INDXG2P, NUMROC
277  EXTERNAL indxg2p, numroc
278 * ..
279 * .. Intrinsic Functions ..
280  INTRINSIC dble, dcmplx, int, max, min, mod
281 * ..
282 * .. Executable Statements ..
283 *
284 * Get grid parameters
285 *
286  ictxt = desca( ctxt_ )
287  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
288 *
289 * Test the input parameters
290 *
291  info = 0
292  IF( nprow.EQ.-1 ) THEN
293  info = -707
294  ELSE
295  CALL chk1mat( m, 1, n, 3, ia, ja, desca, 7, info )
296  CALL chk1mat( p, 2, n, 3, ib, jb, descb, 12, info )
297  IF( info.EQ.0 ) THEN
298  iroffa = mod( ia-1, desca( mb_ ) )
299  icoffa = mod( ja-1, desca( nb_ ) )
300  iroffb = mod( ib-1, descb( mb_ ) )
301  icoffb = mod( jb-1, descb( nb_ ) )
302  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
303  \$ nprow )
304  iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
305  \$ npcol )
306  ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),
307  \$ nprow )
308  ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),
309  \$ npcol )
310  mpa0 = numroc( m+iroffa, desca( mb_ ), myrow, iarow, nprow )
311  nqa0 = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
312  ppb0 = numroc( p+iroffb, descb( mb_ ), myrow, ibrow, nprow )
313  nqb0 = numroc( n+icoffb, descb( nb_ ), mycol, ibcol, npcol )
314  lwmin = max( desca( mb_ ) * ( mpa0 + nqa0 + desca( mb_ ) ),
315  \$ max( max( ( desca( mb_ )*( desca( mb_ ) - 1 ) ) / 2,
316  \$ ( ppb0 + nqb0 ) * desca( mb_ ) ) +
317  \$ desca( mb_ ) * desca( mb_ ),
318  \$ descb( nb_ ) * ( ppb0 + nqb0 + descb( nb_ ) ) ) )
319 *
320  work( 1 ) = dcmplx( dble( lwmin ) )
321  lquery = ( lwork.EQ.-1 )
322  IF( iacol.NE.ibcol .OR. icoffa.NE.icoffb ) THEN
323  info = -11
324  ELSE IF( desca( nb_ ).NE.descb( nb_ ) ) THEN
325  info = -1204
326  ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
327  info = -1207
328  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
329  info = -15
330  END IF
331  END IF
332  IF( lwork.EQ.-1 ) THEN
333  idum1( 1 ) = -1
334  ELSE
335  idum1( 1 ) = 1
336  END IF
337  idum2( 1 ) = 15
338  CALL pchk2mat( m, 1, n, 3, ia, ja, desca, 7, p, 2, n, 3, ib,
339  \$ jb, descb, 12, 1, idum1, idum2, info )
340  END IF
341 *
342  IF( info.NE.0 ) THEN
343  CALL pxerbla( ictxt, 'PZGGRQF', -info )
344  RETURN
345  ELSE IF( lquery ) THEN
346  RETURN
347  END IF
348 *
349 * RQ factorization of M-by-N matrix sub( A ): sub( A ) = R*Q
350 *
351  CALL pzgerqf( m, n, a, ia, ja, desca, taua, work, lwork, info )
352  lwmin = int( work( 1 ) )
353 *
354 * Update sub( B ) := sub( B )*Q'
355 *
356  CALL pzunmrq( 'Right', 'Conjugate Transpose', p, n, min( m, n ),
357  \$ a, max( ia, ia+m-n ), ja, desca, taua, b, ib, jb,
358  \$ descb, work, lwork, info )
359  lwmin = max( lwmin, int( work( 1 ) ) )
360 *
361 * QR factorization of P-by-N matrix sub( B ): sub( B ) = Z*T
362 *
363  CALL pzgeqrf( p, n, b, ib, jb, descb, taub, work, lwork, info )
364  work( 1 ) = dcmplx( dble( max( lwmin, int( work( 1 ) ) ) ) )
365 *
366  RETURN
367 *
368 * End of PZGGRQF
369 *
370  END
pzgeqrf
subroutine pzgeqrf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pzgeqrf.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
pzunmrq
subroutine pzunmrq(SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, C, IC, JC, DESCC, WORK, LWORK, INFO)
Definition: pzunmrq.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pzgerqf
subroutine pzgerqf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pzgerqf.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pzggrqf
subroutine pzggrqf(M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO)
Definition: pzggrqf.f:3
min
#define min(A, B)
Definition: pcgemr.c:181