ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzlarzc.f
Go to the documentation of this file.
1  SUBROUTINE pzlarzc( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C,
2  $ IC, JC, DESCC, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 25, 2001
8 *
9 * .. Scalar Arguments ..
10  CHARACTER SIDE
11  INTEGER IC, INCV, IV, JC, JV, L, M, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCC( * ), DESCV( * )
15  COMPLEX*16 C( * ), TAU( * ), V( * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PZLARZC applies a complex elementary reflector Q**H to a
22 * complex M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1),
23 * from either the left or the right. Q is represented in the form
24 *
25 * Q = I - tau * v * v'
26 *
27 * where tau is a complex scalar and v is a complex vector.
28 *
29 * If tau = 0, then Q is taken to be the unit matrix.
30 *
31 * Q is a product of k elementary reflectors as returned by PZTZRZF.
32 *
33 * Notes
34 * =====
35 *
36 * Each global data object is described by an associated description
37 * vector. This vector stores the information required to establish
38 * the mapping between an object element and its corresponding process
39 * and memory location.
40 *
41 * Let A be a generic term for any 2D block cyclicly distributed array.
42 * Such a global array has an associated description vector DESCA.
43 * In the following comments, the character _ should be read as
44 * "of the global array".
45 *
46 * NOTATION STORED IN EXPLANATION
47 * --------------- -------------- --------------------------------------
48 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
49 * DTYPE_A = 1.
50 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51 * the BLACS process grid A is distribu-
52 * ted over. The context itself is glo-
53 * bal, but the handle (the integer
54 * value) may vary.
55 * M_A (global) DESCA( M_ ) The number of rows in the global
56 * array A.
57 * N_A (global) DESCA( N_ ) The number of columns in the global
58 * array A.
59 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
60 * the rows of the array.
61 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
62 * the columns of the array.
63 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64 * row of the array A is distributed.
65 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66 * first column of the array A is
67 * distributed.
68 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
69 * array. LLD_A >= MAX(1,LOCr(M_A)).
70 *
71 * Let K be the number of rows or columns of a distributed matrix,
72 * and assume that its process grid has dimension p x q.
73 * LOCr( K ) denotes the number of elements of K that a process
74 * would receive if K were distributed over the p processes of its
75 * process column.
76 * Similarly, LOCc( K ) denotes the number of elements of K that a
77 * process would receive if K were distributed over the q processes of
78 * its process row.
79 * The values of LOCr() and LOCc() may be determined via a call to the
80 * ScaLAPACK tool function, NUMROC:
81 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83 * An upper bound for these quantities may be computed by:
84 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86 *
87 * Because vectors may be viewed as a subclass of matrices, a
88 * distributed vector is considered to be a distributed matrix.
89 *
90 * Restrictions
91 * ============
92 *
93 * If SIDE = 'Left' and INCV = 1, then the row process having the first
94 * entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover,
95 * MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only
96 * the last equality must be satisfied.
97 *
98 * If SIDE = 'Right' and INCV = M_V then the column process having the
99 * first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and
100 * MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only
101 * the last equality must be satisfied.
102 *
103 * Arguments
104 * =========
105 *
106 * SIDE (global input) CHARACTER
107 * = 'L': form Q**H * sub( C ),
108 * = 'R': form sub( C ) * Q**H.
109 *
110 * M (global input) INTEGER
111 * The number of rows to be operated on i.e the number of rows
112 * of the distributed submatrix sub( C ). M >= 0.
113 *
114 * N (global input) INTEGER
115 * The number of columns to be operated on i.e the number of
116 * columns of the distributed submatrix sub( C ). N >= 0.
117 *
118 * L (global input) INTEGER
119 * The columns of the distributed submatrix sub( A ) containing
120 * the meaningful part of the Householder reflectors.
121 * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
122 *
123 * V (local input) COMPLEX*16 pointer into the local memory
124 * to an array of dimension (LLD_V,*) containing the local
125 * pieces of the distributed vectors V representing the
126 * Householder transformation Q,
127 * V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1,
128 * V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V,
129 * V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1,
130 * V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V,
131 *
132 * The vector v in the representation of Q. V is not used if
133 * TAU = 0.
134 *
135 * IV (global input) INTEGER
136 * The row index in the global array V indicating the first
137 * row of sub( V ).
138 *
139 * JV (global input) INTEGER
140 * The column index in the global array V indicating the
141 * first column of sub( V ).
142 *
143 * DESCV (global and local input) INTEGER array of dimension DLEN_.
144 * The array descriptor for the distributed matrix V.
145 *
146 * INCV (global input) INTEGER
147 * The global increment for the elements of V. Only two values
148 * of INCV are supported in this version, namely 1 and M_V.
149 * INCV must not be zero.
150 *
151 * TAU (local input) COMPLEX*16, array, dimension LOCc(JV) if
152 * INCV = 1, and LOCr(IV) otherwise. This array contains the
153 * Householder scalars related to the Householder vectors.
154 * TAU is tied to the distributed matrix V.
155 *
156 * C (local input/local output) COMPLEX*16 pointer into the
157 * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
158 * containing the local pieces of sub( C ). On exit, sub( C )
159 * is overwritten by the Q**H * sub( C ) if SIDE = 'L', or
160 * sub( C ) * Q**H if SIDE = 'R'.
161 *
162 * IC (global input) INTEGER
163 * The row index in the global array C indicating the first
164 * row of sub( C ).
165 *
166 * JC (global input) INTEGER
167 * The column index in the global array C indicating the
168 * first column of sub( C ).
169 *
170 * DESCC (global and local input) INTEGER array of dimension DLEN_.
171 * The array descriptor for the distributed matrix C.
172 *
173 * WORK (local workspace) COMPLEX*16 array, dimension (LWORK)
174 * If INCV = 1,
175 * if SIDE = 'L',
176 * if IVCOL = ICCOL,
177 * LWORK >= NqC0
178 * else
179 * LWORK >= MpC0 + MAX( 1, NqC0 )
180 * end if
181 * else if SIDE = 'R',
182 * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
183 * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
184 * end if
185 * else if INCV = M_V,
186 * if SIDE = 'L',
187 * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
188 * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
189 * else if SIDE = 'R',
190 * if IVROW = ICROW,
191 * LWORK >= MpC0
192 * else
193 * LWORK >= NqC0 + MAX( 1, MpC0 )
194 * end if
195 * end if
196 * end if
197 *
198 * where LCM is the least common multiple of NPROW and NPCOL and
199 * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
200 * LCMQ = LCM / NPCOL,
201 *
202 * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
203 * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
204 * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
205 * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
206 * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
207 *
208 * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
209 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
210 * the subroutine BLACS_GRIDINFO.
211 *
212 * Alignment requirements
213 * ======================
214 *
215 * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
216 * must verify some alignment properties, namely the following
217 * expressions should be true:
218 *
219 * MB_V = NB_V,
220 *
221 * If INCV = 1,
222 * If SIDE = 'Left',
223 * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
224 * If SIDE = 'Right',
225 * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
226 * else if INCV = M_V,
227 * If SIDE = 'Left',
228 * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
229 * If SIDE = 'Right',
230 * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
231 * end if
232 *
233 * =====================================================================
234 *
235 * .. Parameters ..
236  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
237  $ lld_, mb_, m_, nb_, n_, rsrc_
238  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
239  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
240  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
241  COMPLEX*16 ONE, ZERO
242  parameter( one = ( 1.0d+0, 0.0d+0 ),
243  $ zero = ( 0.0d+0, 0.0d+0 ) )
244 * ..
245 * .. Local Scalars ..
246  LOGICAL CCBLCK, CRBLCK, LEFT
247  CHARACTER COLBTOP, ROWBTOP
248  INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
249  $ icrow1, icrow2, ictxt, iic1, iic2, iiv, ioffc1,
250  $ ioffc2, ioffv, ipw, iroffc1, iroffc2, iroffv,
251  $ ivcol, ivrow, jjc1, jjc2, jjv, ldc, ldv, mpc2,
252  $ mpv, mycol, myrow, ncc, ncv, npcol, nprow,
253  $ nqc2, nqv, rdest
254  COMPLEX*16 TAULOC
255 * ..
256 * .. External Subroutines ..
257  EXTERNAL blacs_gridinfo, infog2l, pb_topget, pbztrnv,
258  $ zaxpy, zcopy, zgebr2d, zgebs2d,
259  $ zgemv, zgerc, zgerv2d, zgesd2d,
260  $ zgsum2d, zlaset
261 * ..
262 * .. External Functions ..
263  LOGICAL LSAME
264  INTEGER NUMROC
265  EXTERNAL lsame, numroc
266 * ..
267 * .. Intrinsic Functions ..
268  INTRINSIC min, mod
269 * ..
270 * .. Executable Statements ..
271 *
272 * Quick return if possible
273 *
274  IF( m.LE.0 .OR. n.LE.0 )
275  $ RETURN
276 *
277 * Get grid parameters.
278 *
279  ictxt = descc( ctxt_ )
280  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
281 *
282 * Figure local indexes
283 *
284  left = lsame( side, 'L' )
285  CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
286  $ ivrow, ivcol )
287  iroffv = mod( iv-1, descv( nb_ ) )
288  mpv = numroc( l+iroffv, descv( mb_ ), myrow, ivrow, nprow )
289  IF( myrow.EQ.ivrow )
290  $ mpv = mpv - iroffv
291  icoffv = mod( jv-1, descv( nb_ ) )
292  nqv = numroc( l+icoffv, descv( nb_ ), mycol, ivcol, npcol )
293  IF( mycol.EQ.ivcol )
294  $ nqv = nqv - icoffv
295  ldv = descv( lld_ )
296  ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),
297  $ npcol )
298  ldv = descv( lld_ )
299  iiv = min( iiv, ldv )
300  jjv = min( jjv, ncv )
301  ioffv = iiv+(jjv-1)*ldv
302  ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),
303  $ npcol )
304  CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol,
305  $ iic1, jjc1, icrow1, iccol1 )
306  iroffc1 = mod( ic-1, descc( mb_ ) )
307  icoffc1 = mod( jc-1, descc( nb_ ) )
308  ldc = descc( lld_ )
309  iic1 = min( iic1, ldc )
310  jjc1 = min( jjc1, max( 1, ncc ) )
311  ioffc1 = iic1 + ( jjc1-1 ) * ldc
312 *
313  IF( left ) THEN
314  CALL infog2l( ic+m-l, jc, descc, nprow, npcol, myrow, mycol,
315  $ iic2, jjc2, icrow2, iccol2 )
316  iroffc2 = mod( ic+m-l-1, descc( mb_ ) )
317  icoffc2 = mod( jc-1, descc( nb_ ) )
318  nqc2 = numroc( n+icoffc2, descc( nb_ ), mycol, iccol2, npcol )
319  IF( mycol.EQ.iccol2 )
320  $ nqc2 = nqc2 - icoffc2
321  ELSE
322  CALL infog2l( ic, jc+n-l, descc, nprow, npcol, myrow, mycol,
323  $ iic2, jjc2, icrow2, iccol2 )
324  iroffc2 = mod( ic-1, descc( mb_ ) )
325  mpc2 = numroc( m+iroffc2, descc( mb_ ), myrow, icrow2, nprow )
326  IF( myrow.EQ.icrow2 )
327  $ mpc2 = mpc2 - iroffc2
328  icoffc2 = mod( jc+n-l-1, descc( nb_ ) )
329  END IF
330  iic2 = min( iic2, ldc )
331  jjc2 = min( jjc2, ncc )
332  ioffc2 = iic2 + ( jjc2-1 ) * ldc
333 *
334 * Is sub( C ) only distributed over a process row ?
335 *
336  crblck = ( m.LE.(descc( mb_ )-iroffc1) )
337 *
338 * Is sub( C ) only distributed over a process column ?
339 *
340  ccblck = ( n.LE.(descc( nb_ )-icoffc1) )
341 *
342  IF( left ) THEN
343 *
344  IF( crblck ) THEN
345  rdest = icrow2
346  ELSE
347  rdest = -1
348  END IF
349 *
350  IF( ccblck ) THEN
351 *
352 * sub( C ) is distributed over a process column
353 *
354  IF( descv( m_ ).EQ.incv ) THEN
355 *
356 * Transpose row vector V (ICOFFV = IROFFC2)
357 *
358  ipw = mpv+1
359  CALL pbztrnv( ictxt, 'Rowwise', 'Transpose', m,
360  $ descv( nb_ ), iroffc2, v( ioffv ), ldv,
361  $ zero,
362  $ work, 1, ivrow, ivcol, icrow2, iccol2,
363  $ work( ipw ) )
364 *
365 * Perform the local computation within a process column
366 *
367  IF( mycol.EQ.iccol2 ) THEN
368 *
369  IF( myrow.EQ.ivrow ) THEN
370 *
371  CALL zgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
372  $ tau( iiv ), 1 )
373  tauloc = dconjg( tau( iiv ) )
374 *
375  ELSE
376 *
377  CALL zgebr2d( ictxt, 'Columnwise', ' ', 1, 1,
378  $ tauloc, 1, ivrow, mycol )
379  tauloc = dconjg( tauloc )
380 *
381  END IF
382 *
383  IF( tauloc.NE.zero ) THEN
384 *
385 * w := sub( C )' * v
386 *
387  IF( mpv.GT.0 ) THEN
388  CALL zgemv( 'Conjugate transpose', mpv, nqc2,
389  $ one, c( ioffc2 ), ldc, work, 1,
390  $ zero, work( ipw ), 1 )
391  ELSE
392  CALL zlaset( 'All', nqc2, 1, zero, zero,
393  $ work( ipw ), max( 1, nqc2 ) )
394  END IF
395  IF( myrow.EQ.icrow1 )
396  $ CALL zaxpy( nqc2, one, c( ioffc1 ), ldc,
397  $ work( ipw ), max( 1, nqc2 ) )
398 *
399  CALL zgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
400  $ work( ipw ), max( 1, nqc2 ), rdest,
401  $ mycol )
402 *
403 * sub( C ) := sub( C ) - v * w'
404 *
405  IF( myrow.EQ.icrow1 )
406  $ CALL zaxpy( nqc2, -tauloc, work( ipw ),
407  $ max( 1, nqc2 ), c( ioffc1 ), ldc )
408  CALL zgerc( mpv, nqc2, -tauloc, work, 1,
409  $ work( ipw ), 1, c( ioffc2 ), ldc )
410  END IF
411 *
412  END IF
413 *
414  ELSE
415 *
416 * V is a column vector
417 *
418  IF( ivcol.EQ.iccol2 ) THEN
419 *
420 * Perform the local computation within a process column
421 *
422  IF( mycol.EQ.iccol2 ) THEN
423 *
424  tauloc = dconjg( tau( jjv ) )
425 *
426  IF( tauloc.NE.zero ) THEN
427 *
428 * w := sub( C )' * v
429 *
430  IF( mpv.GT.0 ) THEN
431  CALL zgemv( 'Conjugate transpose', mpv, nqc2,
432  $ one, c( ioffc2 ), ldc, v( ioffv ),
433  $ 1, zero, work, 1 )
434  ELSE
435  CALL zlaset( 'All', nqc2, 1, zero, zero,
436  $ work, max( 1, nqc2 ) )
437  END IF
438  IF( myrow.EQ.icrow1 )
439  $ CALL zaxpy( nqc2, one, c( ioffc1 ), ldc,
440  $ work, max( 1, nqc2 ) )
441 *
442  CALL zgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
443  $ work, max( 1, nqc2 ), rdest,
444  $ mycol )
445 *
446 * sub( C ) := sub( C ) - v * w'
447 *
448  IF( myrow.EQ.icrow1 )
449  $ CALL zaxpy( nqc2, -tauloc, work,
450  $ max( 1, nqc2 ), c( ioffc1 ),
451  $ ldc )
452  CALL zgerc( mpv, nqc2, -tauloc, v( ioffv ), 1,
453  $ work, 1, c( ioffc2 ), ldc )
454  END IF
455 *
456  END IF
457 *
458  ELSE
459 *
460 * Send V and TAU to the process column ICCOL2
461 *
462  IF( mycol.EQ.ivcol ) THEN
463 *
464  ipw = mpv+1
465  CALL zcopy( mpv, v( ioffv ), 1, work, 1 )
466  work( ipw ) = tau( jjv )
467  CALL zgesd2d( ictxt, ipw, 1, work, ipw, myrow,
468  $ iccol2 )
469 *
470  ELSE IF( mycol.EQ.iccol2 ) THEN
471 *
472  ipw = mpv+1
473  CALL zgerv2d( ictxt, ipw, 1, work, ipw, myrow,
474  $ ivcol )
475  tauloc = dconjg( work( ipw ) )
476 *
477  IF( tauloc.NE.zero ) THEN
478 *
479 * w := sub( C )' * v
480 *
481  IF( mpv.GT.0 ) THEN
482  CALL zgemv( 'Conjugate transpose', mpv, nqc2,
483  $ one, c( ioffc2 ), ldc, work, 1,
484  $ zero, work( ipw ), 1 )
485  ELSE
486  CALL zlaset( 'All', nqc2, 1, zero, zero,
487  $ work( ipw ), max( 1, nqc2 ) )
488  END IF
489  IF( myrow.EQ.icrow1 )
490  $ CALL zaxpy( nqc2, one, c( ioffc1 ), ldc,
491  $ work( ipw ), max( 1, nqc2 ) )
492 *
493  CALL zgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
494  $ work( ipw ), max( 1, nqc2 ),
495  $ rdest, mycol )
496 *
497 * sub( C ) := sub( C ) - v * w'
498 *
499  IF( myrow.EQ.icrow1 )
500  $ CALL zaxpy( nqc2, -tauloc, work( ipw ),
501  $ max( 1, nqc2 ), c( ioffc1 ),
502  $ ldc )
503  CALL zgerc( mpv, nqc2, -tauloc, work, 1,
504  $ work( ipw ), 1, c( ioffc2 ), ldc )
505  END IF
506 *
507  END IF
508 *
509  END IF
510 *
511  END IF
512 *
513  ELSE
514 *
515 * sub( C ) is a proper distributed matrix
516 *
517  IF( descv( m_ ).EQ.incv ) THEN
518 *
519 * Transpose and broadcast row vector V (ICOFFV=IROFFC2)
520 *
521  ipw = mpv+1
522  CALL pbztrnv( ictxt, 'Rowwise', 'Transpose', m,
523  $ descv( nb_ ), iroffc2, v( ioffv ), ldv,
524  $ zero,
525  $ work, 1, ivrow, ivcol, icrow2, -1,
526  $ work( ipw ) )
527 *
528 * Perform the local computation within a process column
529 *
530  IF( myrow.EQ.ivrow ) THEN
531 *
532  CALL zgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
533  $ tau( iiv ), 1 )
534  tauloc = dconjg( tau( iiv ) )
535 *
536  ELSE
537 *
538  CALL zgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,
539  $ 1, ivrow, mycol )
540  tauloc = dconjg( tauloc )
541 *
542  END IF
543 *
544  IF( tauloc.NE.zero ) THEN
545 *
546 * w := sub( C )' * v
547 *
548  IF( mpv.GT.0 ) THEN
549  CALL zgemv( 'Conjugate transpose', mpv, nqc2, one,
550  $ c( ioffc2 ), ldc, work, 1, zero,
551  $ work( ipw ), 1 )
552  ELSE
553  CALL zlaset( 'All', nqc2, 1, zero, zero,
554  $ work( ipw ), max( 1, nqc2 ) )
555  END IF
556  IF( myrow.EQ.icrow1 )
557  $ CALL zaxpy( nqc2, one, c( ioffc1 ), ldc,
558  $ work( ipw ), max( 1, nqc2 ) )
559 *
560  CALL zgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
561  $ work( ipw ), max( 1, nqc2 ), rdest,
562  $ mycol )
563 *
564 * sub( C ) := sub( C ) - v * w'
565 *
566  IF( myrow.EQ.icrow1 )
567  $ CALL zaxpy( nqc2, -tauloc, work( ipw ),
568  $ max( 1, nqc2 ), c( ioffc1 ), ldc )
569  CALL zgerc( mpv, nqc2, -tauloc, work, 1, work( ipw ),
570  $ 1, c( ioffc2 ), ldc )
571  END IF
572 *
573  ELSE
574 *
575 * Broadcast column vector V
576 *
577  CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
578  IF( mycol.EQ.ivcol ) THEN
579 *
580  ipw = mpv+1
581  CALL zcopy( mpv, v( ioffv ), 1, work, 1 )
582  work( ipw ) = tau( jjv )
583  CALL zgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,
584  $ work, ipw )
585  tauloc = dconjg( tau( jjv ) )
586 *
587  ELSE
588 *
589  ipw = mpv+1
590  CALL zgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,
591  $ ipw, myrow, ivcol )
592  tauloc = dconjg( work( ipw ) )
593 *
594  END IF
595 *
596  IF( tauloc.NE.zero ) THEN
597 *
598 * w := sub( C )' * v
599 *
600  IF( mpv.GT.0 ) THEN
601  CALL zgemv( 'Conjugate transpose', mpv, nqc2, one,
602  $ c( ioffc2 ), ldc, work, 1, zero,
603  $ work( ipw ), 1 )
604  ELSE
605  CALL zlaset( 'All', nqc2, 1, zero, zero,
606  $ work( ipw ), max( 1, nqc2 ) )
607  END IF
608  IF( myrow.EQ.icrow1 )
609  $ CALL zaxpy( nqc2, one, c( ioffc1 ), ldc,
610  $ work( ipw ), max( 1, nqc2 ) )
611 *
612  CALL zgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
613  $ work( ipw ), max( 1, nqc2 ), rdest,
614  $ mycol )
615 *
616 * sub( C ) := sub( C ) - v * w'
617 *
618  IF( myrow.EQ.icrow1 )
619  $ CALL zaxpy( nqc2, -tauloc, work( ipw ),
620  $ max( 1, nqc2 ), c( ioffc1 ), ldc )
621  CALL zgerc( mpv, nqc2, -tauloc, work, 1, work( ipw ),
622  $ 1, c( ioffc2 ), ldc )
623  END IF
624 *
625  END IF
626 *
627  END IF
628 *
629  ELSE
630 *
631  IF( ccblck ) THEN
632  rdest = myrow
633  ELSE
634  rdest = -1
635  END IF
636 *
637  IF( crblck ) THEN
638 *
639 * sub( C ) is distributed over a process row
640 *
641  IF( descv( m_ ).EQ.incv ) THEN
642 *
643 * V is a row vector
644 *
645  IF( ivrow.EQ.icrow2 ) THEN
646 *
647 * Perform the local computation within a process row
648 *
649  IF( myrow.EQ.icrow2 ) THEN
650 *
651  tauloc = dconjg( tau( iiv ) )
652 *
653  IF( tauloc.NE.zero ) THEN
654 *
655 * w := sub( C ) * v
656 *
657  IF( nqv.GT.0 ) THEN
658  CALL zgemv( 'No transpose', mpc2, nqv, one,
659  $ c( ioffc2 ), ldc, v( ioffv ),
660  $ ldv, zero, work, 1 )
661  ELSE
662  CALL zlaset( 'All', mpc2, 1, zero, zero,
663  $ work, max( 1, mpc2 ) )
664  END IF
665  IF( mycol.EQ.iccol1 )
666  $ CALL zaxpy( mpc2, one, c( ioffc1 ), 1,
667  $ work, 1 )
668 *
669  CALL zgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
670  $ work, max( 1, mpc2 ), rdest,
671  $ iccol2 )
672 *
673  IF( mycol.EQ.iccol1 )
674  $ CALL zaxpy( mpc2, -tauloc, work, 1,
675  $ c( ioffc1 ), 1 )
676 *
677 * sub( C ) := sub( C ) - w * v'
678 *
679  CALL zgerc( mpc2, nqv, -tauloc, work, 1,
680  $ v( ioffv ), ldv, c( ioffc2 ), ldc )
681  END IF
682 *
683  END IF
684 *
685  ELSE
686 *
687 * Send V and TAU to the process row ICROW2
688 *
689  IF( myrow.EQ.ivrow ) THEN
690 *
691  ipw = nqv+1
692  CALL zcopy( nqv, v( ioffv ), ldv, work, 1 )
693  work( ipw ) = tau( iiv )
694  CALL zgesd2d( ictxt, ipw, 1, work, ipw, icrow2,
695  $ mycol )
696 *
697  ELSE IF( myrow.EQ.icrow2 ) THEN
698 *
699  ipw = nqv+1
700  CALL zgerv2d( ictxt, ipw, 1, work, ipw, ivrow,
701  $ mycol )
702  tauloc = dconjg( work( ipw ) )
703 *
704  IF( tauloc.NE.zero ) THEN
705 *
706 * w := sub( C ) * v
707 *
708  IF( nqv.GT.0 ) THEN
709  CALL zgemv( 'No transpose', mpc2, nqv, one,
710  $ c( ioffc2 ), ldc, work, 1, zero,
711  $ work( ipw ), 1 )
712  ELSE
713  CALL zlaset( 'All', mpc2, 1, zero, zero,
714  $ work( ipw ), max( 1, mpc2 ) )
715  END IF
716  IF( mycol.EQ.iccol1 )
717  $ CALL zaxpy( mpc2, one, c( ioffc1 ), 1,
718  $ work( ipw ), 1 )
719  CALL zgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
720  $ work( ipw ), max( 1, mpc2 ),
721  $ rdest, iccol2 )
722  IF( mycol.EQ.iccol1 )
723  $ CALL zaxpy( mpc2, -tauloc, work( ipw ), 1,
724  $ c( ioffc1 ), 1 )
725 *
726 * sub( C ) := sub( C ) - w * v'
727 *
728  CALL zgerc( mpc2, nqv, -tauloc, work( ipw ), 1,
729  $ work, 1, c( ioffc2 ), ldc )
730  END IF
731 *
732  END IF
733 *
734  END IF
735 *
736  ELSE
737 *
738 * Transpose column vector V (IROFFV = ICOFFC2)
739 *
740  ipw = nqv+1
741  CALL pbztrnv( ictxt, 'Columnwise', 'Transpose', n,
742  $ descv( mb_ ), icoffc2, v( ioffv ), 1, zero,
743  $ work, 1, ivrow, ivcol, icrow2, iccol2,
744  $ work( ipw ) )
745 *
746 * Perform the local computation within a process column
747 *
748  IF( myrow.EQ.icrow2 ) THEN
749 *
750  IF( mycol.EQ.ivcol ) THEN
751 *
752  CALL zgebs2d( ictxt, 'Rowwise', ' ', 1, 1,
753  $ tau( jjv ), 1 )
754  tauloc = dconjg( tau( jjv ) )
755 *
756  ELSE
757 *
758  CALL zgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,
759  $ 1, myrow, ivcol )
760  tauloc = dconjg( tauloc )
761 *
762  END IF
763 *
764  IF( tauloc.NE.zero ) THEN
765 *
766 * w := sub( C ) * v
767 *
768  IF( nqv.GT.0 ) THEN
769  CALL zgemv( 'No transpose', mpc2, nqv, one,
770  $ c( ioffc2 ), ldc, work, 1, zero,
771  $ work( ipw ), 1 )
772  ELSE
773  CALL zlaset( 'All', mpc2, 1, zero, zero,
774  $ work( ipw ), max( 1, mpc2 ) )
775  END IF
776  IF( mycol.EQ.iccol1 )
777  $ CALL zaxpy( mpc2, one, c( ioffc1 ), 1,
778  $ work( ipw ), 1 )
779  CALL zgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
780  $ work( ipw ), max( 1, mpc2 ), rdest,
781  $ iccol2 )
782  IF( mycol.EQ.iccol1 )
783  $ CALL zaxpy( mpc2, -tauloc, work( ipw ), 1,
784  $ c( ioffc1 ), 1 )
785 *
786 * sub( C ) := sub( C ) - w * v'
787 *
788  CALL zgerc( mpc2, nqv, -tauloc, work( ipw ), 1,
789  $ work, 1, c( ioffc2 ), ldc )
790  END IF
791 *
792  END IF
793 *
794  END IF
795 *
796  ELSE
797 *
798 * sub( C ) is a proper distributed matrix
799 *
800  IF( descv( m_ ).EQ.incv ) THEN
801 *
802 * Broadcast row vector V
803 *
804  CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',
805  $ colbtop )
806  IF( myrow.EQ.ivrow ) THEN
807 *
808  ipw = nqv+1
809  CALL zcopy( nqv, v( ioffv ), ldv, work, 1 )
810  work( ipw ) = tau( iiv )
811  CALL zgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,
812  $ work, ipw )
813  tauloc = dconjg( tau( iiv ) )
814 *
815  ELSE
816 *
817  ipw = nqv+1
818  CALL zgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,
819  $ work, ipw, ivrow, mycol )
820  tauloc = dconjg( work( ipw ) )
821 *
822  END IF
823 *
824  IF( tauloc.NE.zero ) THEN
825 *
826 * w := sub( C ) * v
827 *
828  IF( nqv.GT.0 ) THEN
829  CALL zgemv( 'No Transpose', mpc2, nqv, one,
830  $ c( ioffc2 ), ldc, work, 1, zero,
831  $ work( ipw ), 1 )
832  ELSE
833  CALL zlaset( 'All', mpc2, 1, zero, zero,
834  $ work( ipw ), max( 1, mpc2 ) )
835  END IF
836  IF( mycol.EQ.iccol1 )
837  $ CALL zaxpy( mpc2, one, c( ioffc1 ), 1,
838  $ work( ipw ), 1 )
839 *
840  CALL zgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
841  $ work( ipw ), max( 1, mpc2 ), rdest,
842  $ iccol2 )
843  IF( mycol.EQ.iccol1 )
844  $ CALL zaxpy( mpc2, -tauloc, work( ipw ), 1,
845  $ c( ioffc1 ), 1 )
846 *
847 * sub( C ) := sub( C ) - w * v'
848 *
849  CALL zgerc( mpc2, nqv, -tauloc, work( ipw ), 1, work,
850  $ 1, c( ioffc2 ), ldc )
851  END IF
852 *
853  ELSE
854 *
855 * Transpose and broadcast column vector V (ICOFFC2=IROFFV)
856 *
857  ipw = nqv+1
858  CALL pbztrnv( ictxt, 'Columnwise', 'Transpose', n,
859  $ descv( mb_ ), icoffc2, v( ioffv ), 1, zero,
860  $ work, 1, ivrow, ivcol, -1, iccol2,
861  $ work( ipw ) )
862 *
863 * Perform the local computation within a process column
864 *
865  IF( mycol.EQ.ivcol ) THEN
866 *
867  CALL zgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),
868  $ 1 )
869  tauloc = dconjg( tau( jjv ) )
870 *
871  ELSE
872 *
873  CALL zgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,
874  $ myrow, ivcol )
875  tauloc = dconjg( tauloc )
876 *
877  END IF
878 *
879  IF( tauloc.NE.zero ) THEN
880 *
881 * w := sub( C ) * v
882 *
883  IF( nqv.GT.0 ) THEN
884  CALL zgemv( 'No transpose', mpc2, nqv, one,
885  $ c( ioffc2 ), ldc, work, 1, zero,
886  $ work( ipw ), 1 )
887  ELSE
888  CALL zlaset( 'All', mpc2, 1, zero, zero,
889  $ work( ipw ), max( 1, mpc2 ) )
890  END IF
891  IF( mycol.EQ.iccol1 )
892  $ CALL zaxpy( mpc2, one, c( ioffc1 ), 1,
893  $ work( ipw ), 1 )
894  CALL zgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
895  $ work( ipw ), max( 1, mpc2 ), rdest,
896  $ iccol2 )
897  IF( mycol.EQ.iccol1 )
898  $ CALL zaxpy( mpc2, -tauloc, work( ipw ), 1,
899  $ c( ioffc1 ), 1 )
900 *
901 * sub( C ) := sub( C ) - w * v'
902 *
903  CALL zgerc( mpc2, nqv, -tauloc, work( ipw ), 1, work,
904  $ 1, c( ioffc2 ), ldc )
905  END IF
906 *
907  END IF
908 *
909  END IF
910 *
911  END IF
912 *
913  RETURN
914 *
915 * End of PZLARZC
916 *
917  END
max
#define max(A, B)
Definition: pcgemr.c:180
pbztrnv
subroutine pbztrnv(ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX, BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL, WORK)
Definition: pbztrnv.f:4
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pzlarzc
subroutine pzlarzc(SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, IC, JC, DESCC, WORK)
Definition: pzlarzc.f:3
min
#define min(A, B)
Definition: pcgemr.c:181