ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlarz.f
Go to the documentation of this file.
1  SUBROUTINE pdlarz( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C,
2  $ IC, JC, DESCC, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 25, 2001
8 *
9 * .. Scalar Arguments ..
10  CHARACTER SIDE
11  INTEGER IC, INCV, IV, JC, JV, L, M, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCC( * ), DESCV( * )
15  DOUBLE PRECISION C( * ), TAU( * ), V( * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PDLARZ applies a real elementary reflector Q (or Q**T) to a real
22 * M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from
23 * either the left or the right. Q is represented in the form
24 *
25 * Q = I - tau * v * v'
26 *
27 * where tau is a real scalar and v is a real vector.
28 *
29 * If tau = 0, then Q is taken to be the unit matrix.
30 *
31 * Q is a product of k elementary reflectors as returned by PDTZRZF.
32 *
33 * Notes
34 * =====
35 *
36 * Each global data object is described by an associated description
37 * vector. This vector stores the information required to establish
38 * the mapping between an object element and its corresponding process
39 * and memory location.
40 *
41 * Let A be a generic term for any 2D block cyclicly distributed array.
42 * Such a global array has an associated description vector DESCA.
43 * In the following comments, the character _ should be read as
44 * "of the global array".
45 *
46 * NOTATION STORED IN EXPLANATION
47 * --------------- -------------- --------------------------------------
48 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
49 * DTYPE_A = 1.
50 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51 * the BLACS process grid A is distribu-
52 * ted over. The context itself is glo-
53 * bal, but the handle (the integer
54 * value) may vary.
55 * M_A (global) DESCA( M_ ) The number of rows in the global
56 * array A.
57 * N_A (global) DESCA( N_ ) The number of columns in the global
58 * array A.
59 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
60 * the rows of the array.
61 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
62 * the columns of the array.
63 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64 * row of the array A is distributed.
65 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66 * first column of the array A is
67 * distributed.
68 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
69 * array. LLD_A >= MAX(1,LOCr(M_A)).
70 *
71 * Let K be the number of rows or columns of a distributed matrix,
72 * and assume that its process grid has dimension p x q.
73 * LOCr( K ) denotes the number of elements of K that a process
74 * would receive if K were distributed over the p processes of its
75 * process column.
76 * Similarly, LOCc( K ) denotes the number of elements of K that a
77 * process would receive if K were distributed over the q processes of
78 * its process row.
79 * The values of LOCr() and LOCc() may be determined via a call to the
80 * ScaLAPACK tool function, NUMROC:
81 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83 * An upper bound for these quantities may be computed by:
84 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86 *
87 * Because vectors may be viewed as a subclass of matrices, a
88 * distributed vector is considered to be a distributed matrix.
89 *
90 * Restrictions
91 * ============
92 *
93 * If SIDE = 'Left' and INCV = 1, then the row process having the first
94 * entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover,
95 * MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only
96 * the last equality must be satisfied.
97 *
98 * If SIDE = 'Right' and INCV = M_V then the column process having the
99 * first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and
100 * MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only
101 * the last equality must be satisfied.
102 *
103 * Arguments
104 * =========
105 *
106 * SIDE (global input) CHARACTER
107 * = 'L': form Q * sub( C ),
108 * = 'R': form sub( C ) * Q, Q = Q**T.
109 *
110 * M (global input) INTEGER
111 * The number of rows to be operated on i.e the number of rows
112 * of the distributed submatrix sub( C ). M >= 0.
113 *
114 * N (global input) INTEGER
115 * The number of columns to be operated on i.e the number of
116 * columns of the distributed submatrix sub( C ). N >= 0.
117 *
118 * L (global input) INTEGER
119 * The columns of the distributed submatrix sub( A ) containing
120 * the meaningful part of the Householder reflectors.
121 * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
122 *
123 * V (local input) DOUBLE PRECISION pointer into the local memory
124 * to an array of dimension (LLD_V,*) containing the local
125 * pieces of the distributed vectors V representing the
126 * Householder transformation Q,
127 * V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1,
128 * V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V,
129 * V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1,
130 * V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V,
131 *
132 * The vector v in the representation of Q. V is not used if
133 * TAU = 0.
134 *
135 * IV (global input) INTEGER
136 * The row index in the global array V indicating the first
137 * row of sub( V ).
138 *
139 * JV (global input) INTEGER
140 * The column index in the global array V indicating the
141 * first column of sub( V ).
142 *
143 * DESCV (global and local input) INTEGER array of dimension DLEN_.
144 * The array descriptor for the distributed matrix V.
145 *
146 * INCV (global input) INTEGER
147 * The global increment for the elements of V. Only two values
148 * of INCV are supported in this version, namely 1 and M_V.
149 * INCV must not be zero.
150 *
151 * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JV) if
152 * INCV = 1, and LOCr(IV) otherwise. This array contains the
153 * Householder scalars related to the Householder vectors.
154 * TAU is tied to the distributed matrix V.
155 *
156 * C (local input/local output) DOUBLE PRECISION pointer into the
157 * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
158 * containing the local pieces of sub( C ). On exit, sub( C )
159 * is overwritten by the Q * sub( C ) if SIDE = 'L', or
160 * sub( C ) * Q if SIDE = 'R'.
161 *
162 * IC (global input) INTEGER
163 * The row index in the global array C indicating the first
164 * row of sub( C ).
165 *
166 * JC (global input) INTEGER
167 * The column index in the global array C indicating the
168 * first column of sub( C ).
169 *
170 * DESCC (global and local input) INTEGER array of dimension DLEN_.
171 * The array descriptor for the distributed matrix C.
172 *
173 * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
174 * If INCV = 1,
175 * if SIDE = 'L',
176 * if IVCOL = ICCOL,
177 * LWORK >= NqC0
178 * else
179 * LWORK >= MpC0 + MAX( 1, NqC0 )
180 * end if
181 * else if SIDE = 'R',
182 * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
183 * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
184 * end if
185 * else if INCV = M_V,
186 * if SIDE = 'L',
187 * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
188 * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
189 * else if SIDE = 'R',
190 * if IVROW = ICROW,
191 * LWORK >= MpC0
192 * else
193 * LWORK >= NqC0 + MAX( 1, MpC0 )
194 * end if
195 * end if
196 * end if
197 *
198 * where LCM is the least common multiple of NPROW and NPCOL and
199 * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
200 * LCMQ = LCM / NPCOL,
201 *
202 * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
203 * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
204 * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
205 * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
206 * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
207 *
208 * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
209 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
210 * the subroutine BLACS_GRIDINFO.
211 *
212 * Alignment requirements
213 * ======================
214 *
215 * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
216 * must verify some alignment properties, namely the following
217 * expressions should be true:
218 *
219 * MB_V = NB_V,
220 *
221 * If INCV = 1,
222 * If SIDE = 'Left',
223 * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
224 * If SIDE = 'Right',
225 * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
226 * else if INCV = M_V,
227 * If SIDE = 'Left',
228 * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
229 * If SIDE = 'Right',
230 * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
231 * end if
232 *
233 * =====================================================================
234 *
235 * .. Parameters ..
236  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
237  $ lld_, mb_, m_, nb_, n_, rsrc_
238  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
239  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
240  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
241  DOUBLE PRECISION ONE, ZERO
242  parameter( one = 1.0d+0, zero = 0.0d+0 )
243 * ..
244 * .. Local Scalars ..
245  LOGICAL CCBLCK, CRBLCK, LEFT
246  CHARACTER COLBTOP, ROWBTOP
247  INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
248  $ icrow1, icrow2, ictxt, iic1, iic2, iiv, ioffc1,
249  $ ioffc2, ioffv, ipw, iroffc1, iroffc2, iroffv,
250  $ ivcol, ivrow, jjc1, jjc2, jjv, ldc, ldv, mpc2,
251  $ mpv, mycol, myrow, ncc, ncv, npcol, nprow,
252  $ nqc2, nqv, rdest
253  DOUBLE PRECISION TAULOC
254 * ..
255 * .. External Subroutines ..
256  EXTERNAL blacs_gridinfo, daxpy, dcopy, dgebr2d,
257  $ dgebs2d, dgemv, dger, dgerv2d,
258  $ dgesd2d, dgsum2d, dlaset, infog2l,
259  $ pb_topget, pbdtrnv
260 * ..
261 * .. External Functions ..
262  LOGICAL LSAME
263  INTEGER NUMROC
264  EXTERNAL lsame, numroc
265 * ..
266 * .. Intrinsic Functions ..
267  INTRINSIC min, mod
268 * ..
269 * .. Executable Statements ..
270 *
271 * Quick return if possible
272 *
273  IF( m.LE.0 .OR. n.LE.0 )
274  $ RETURN
275 *
276 * Get grid parameters.
277 *
278  ictxt = descc( ctxt_ )
279  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
280 *
281 * Figure local indexes
282 *
283  left = lsame( side, 'L' )
284  CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
285  $ ivrow, ivcol )
286  iroffv = mod( iv-1, descv( nb_ ) )
287  mpv = numroc( l+iroffv, descv( mb_ ), myrow, ivrow, nprow )
288  IF( myrow.EQ.ivrow )
289  $ mpv = mpv - iroffv
290  icoffv = mod( jv-1, descv( nb_ ) )
291  nqv = numroc( l+icoffv, descv( nb_ ), mycol, ivcol, npcol )
292  IF( mycol.EQ.ivcol )
293  $ nqv = nqv - icoffv
294  ldv = descv( lld_ )
295  ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),
296  $ npcol )
297  ldv = descv( lld_ )
298  iiv = min( iiv, ldv )
299  jjv = min( jjv, ncv )
300  ioffv = iiv+(jjv-1)*ldv
301  ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),
302  $ npcol )
303  CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol,
304  $ iic1, jjc1, icrow1, iccol1 )
305  iroffc1 = mod( ic-1, descc( mb_ ) )
306  icoffc1 = mod( jc-1, descc( nb_ ) )
307  ldc = descc( lld_ )
308  iic1 = min( iic1, ldc )
309  jjc1 = min( jjc1, max( 1, ncc ) )
310  ioffc1 = iic1 + ( jjc1-1 ) * ldc
311 *
312  IF( left ) THEN
313  CALL infog2l( ic+m-l, jc, descc, nprow, npcol, myrow, mycol,
314  $ iic2, jjc2, icrow2, iccol2 )
315  iroffc2 = mod( ic+m-l-1, descc( mb_ ) )
316  icoffc2 = mod( jc-1, descc( nb_ ) )
317  nqc2 = numroc( n+icoffc2, descc( nb_ ), mycol, iccol2, npcol )
318  IF( mycol.EQ.iccol2 )
319  $ nqc2 = nqc2 - icoffc2
320  ELSE
321  CALL infog2l( ic, jc+n-l, descc, nprow, npcol, myrow, mycol,
322  $ iic2, jjc2, icrow2, iccol2 )
323  iroffc2 = mod( ic-1, descc( mb_ ) )
324  mpc2 = numroc( m+iroffc2, descc( mb_ ), myrow, icrow2, nprow )
325  IF( myrow.EQ.icrow2 )
326  $ mpc2 = mpc2 - iroffc2
327  icoffc2 = mod( jc+n-l-1, descc( nb_ ) )
328  END IF
329  iic2 = min( iic2, ldc )
330  jjc2 = min( jjc2, ncc )
331  ioffc2 = iic2 + ( jjc2-1 ) * ldc
332 *
333 * Is sub( C ) only distributed over a process row ?
334 *
335  crblck = ( m.LE.(descc( mb_ )-iroffc1) )
336 *
337 * Is sub( C ) only distributed over a process column ?
338 *
339  ccblck = ( n.LE.(descc( nb_ )-icoffc1) )
340 *
341  IF( left ) THEN
342 *
343  IF( crblck ) THEN
344  rdest = icrow2
345  ELSE
346  rdest = -1
347  END IF
348 *
349  IF( ccblck ) THEN
350 *
351 * sub( C ) is distributed over a process column
352 *
353  IF( descv( m_ ).EQ.incv ) THEN
354 *
355 * Transpose row vector V (ICOFFV = IROFFC2)
356 *
357  ipw = mpv+1
358  CALL pbdtrnv( ictxt, 'Rowwise', 'Transpose', m,
359  $ descv( nb_ ), iroffc2, v( ioffv ), ldv,
360  $ zero,
361  $ work, 1, ivrow, ivcol, icrow2, iccol2,
362  $ work( ipw ) )
363 *
364 * Perform the local computation within a process column
365 *
366  IF( mycol.EQ.iccol2 ) THEN
367 *
368  IF( myrow.EQ.ivrow ) THEN
369 *
370  CALL dgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
371  $ tau( iiv ), 1 )
372  tauloc = tau( iiv )
373 *
374  ELSE
375 *
376  CALL dgebr2d( ictxt, 'Columnwise', ' ', 1, 1,
377  $ tauloc, 1, ivrow, mycol )
378 *
379  END IF
380 *
381  IF( tauloc.NE.zero ) THEN
382 *
383 * w := sub( C )' * v
384 *
385  IF( mpv.GT.0 ) THEN
386  CALL dgemv( 'Transpose', mpv, nqc2, one,
387  $ c( ioffc2 ), ldc, work, 1, zero,
388  $ work( ipw ), 1 )
389  ELSE
390  CALL dlaset( 'All', nqc2, 1, zero, zero,
391  $ work( ipw ), max( 1, nqc2 ) )
392  END IF
393  IF( myrow.EQ.icrow1 )
394  $ CALL daxpy( nqc2, one, c( ioffc1 ), ldc,
395  $ work( ipw ), max( 1, nqc2 ) )
396 *
397  CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
398  $ work( ipw ), max( 1, nqc2 ), rdest,
399  $ mycol )
400 *
401 * sub( C ) := sub( C ) - v * w'
402 *
403  IF( myrow.EQ.icrow1 )
404  $ CALL daxpy( nqc2, -tauloc, work( ipw ),
405  $ max( 1, nqc2 ), c( ioffc1 ), ldc )
406  CALL dger( mpv, nqc2, -tauloc, work, 1,
407  $ work( ipw ), 1, c( ioffc2 ), ldc )
408  END IF
409 *
410  END IF
411 *
412  ELSE
413 *
414 * V is a column vector
415 *
416  IF( ivcol.EQ.iccol2 ) THEN
417 *
418 * Perform the local computation within a process column
419 *
420  IF( mycol.EQ.iccol2 ) THEN
421 *
422  tauloc = tau( jjv )
423 *
424  IF( tauloc.NE.zero ) THEN
425 *
426 * w := sub( C )' * v
427 *
428  IF( mpv.GT.0 ) THEN
429  CALL dgemv( 'Transpose', mpv, nqc2, one,
430  $ c( ioffc2 ), ldc, v( ioffv ), 1,
431  $ zero, work, 1 )
432  ELSE
433  CALL dlaset( 'All', nqc2, 1, zero, zero,
434  $ work, max( 1, nqc2 ) )
435  END IF
436  IF( myrow.EQ.icrow1 )
437  $ CALL daxpy( nqc2, one, c( ioffc1 ), ldc,
438  $ work, max( 1, nqc2 ) )
439 *
440  CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
441  $ work, max( 1, nqc2 ), rdest,
442  $ mycol )
443 *
444 * sub( C ) := sub( C ) - v * w'
445 *
446  IF( myrow.EQ.icrow1 )
447  $ CALL daxpy( nqc2, -tauloc, work,
448  $ max( 1, nqc2 ), c( ioffc1 ),
449  $ ldc )
450  CALL dger( mpv, nqc2, -tauloc, v( ioffv ), 1,
451  $ work, 1, c( ioffc2 ), ldc )
452  END IF
453 *
454  END IF
455 *
456  ELSE
457 *
458 * Send V and TAU to the process column ICCOL2
459 *
460  IF( mycol.EQ.ivcol ) THEN
461 *
462  ipw = mpv+1
463  CALL dcopy( mpv, v( ioffv ), 1, work, 1 )
464  work( ipw ) = tau( jjv )
465  CALL dgesd2d( ictxt, ipw, 1, work, ipw, myrow,
466  $ iccol2 )
467 *
468  ELSE IF( mycol.EQ.iccol2 ) THEN
469 *
470  ipw = mpv+1
471  CALL dgerv2d( ictxt, ipw, 1, work, ipw, myrow,
472  $ ivcol )
473  tauloc = work( ipw )
474 *
475  IF( tauloc.NE.zero ) THEN
476 *
477 * w := sub( C )' * v
478 *
479  IF( mpv.GT.0 ) THEN
480  CALL dgemv( 'Transpose', mpv, nqc2, one,
481  $ c( ioffc2 ), ldc, work, 1, zero,
482  $ work( ipw ), 1 )
483  ELSE
484  CALL dlaset( 'All', nqc2, 1, zero, zero,
485  $ work( ipw ), max( 1, nqc2 ) )
486  END IF
487  IF( myrow.EQ.icrow1 )
488  $ CALL daxpy( nqc2, one, c( ioffc1 ), ldc,
489  $ work( ipw ), max( 1, nqc2 ) )
490 *
491  CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
492  $ work( ipw ), max( 1, nqc2 ),
493  $ rdest, mycol )
494 *
495 * sub( C ) := sub( C ) - v * w'
496 *
497  IF( myrow.EQ.icrow1 )
498  $ CALL daxpy( nqc2, -tauloc, work( ipw ),
499  $ max( 1, nqc2 ), c( ioffc1 ),
500  $ ldc )
501  CALL dger( mpv, nqc2, -tauloc, work, 1,
502  $ work( ipw ), 1, c( ioffc2 ), ldc )
503  END IF
504 *
505  END IF
506 *
507  END IF
508 *
509  END IF
510 *
511  ELSE
512 *
513 * sub( C ) is a proper distributed matrix
514 *
515  IF( descv( m_ ).EQ.incv ) THEN
516 *
517 * Transpose and broadcast row vector V (ICOFFV=IROFFC2)
518 *
519  ipw = mpv+1
520  CALL pbdtrnv( ictxt, 'Rowwise', 'Transpose', m,
521  $ descv( nb_ ), iroffc2, v( ioffv ), ldv,
522  $ zero,
523  $ work, 1, ivrow, ivcol, icrow2, -1,
524  $ work( ipw ) )
525 *
526 * Perform the local computation within a process column
527 *
528  IF( myrow.EQ.ivrow ) THEN
529 *
530  CALL dgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
531  $ tau( iiv ), 1 )
532  tauloc = tau( iiv )
533 *
534  ELSE
535 *
536  CALL dgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,
537  $ 1, ivrow, mycol )
538 *
539  END IF
540 *
541  IF( tauloc.NE.zero ) THEN
542 *
543 * w := sub( C )' * v
544 *
545  IF( mpv.GT.0 ) THEN
546  CALL dgemv( 'Transpose', mpv, nqc2, one,
547  $ c( ioffc2 ), ldc, work, 1, zero,
548  $ work( ipw ), 1 )
549  ELSE
550  CALL dlaset( 'All', nqc2, 1, zero, zero,
551  $ work( ipw ), max( 1, nqc2 ) )
552  END IF
553  IF( myrow.EQ.icrow1 )
554  $ CALL daxpy( nqc2, one, c( ioffc1 ), ldc,
555  $ work( ipw ), max( 1, nqc2 ) )
556 *
557  CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
558  $ work( ipw ), max( 1, nqc2 ), rdest,
559  $ mycol )
560 *
561 * sub( C ) := sub( C ) - v * w'
562 *
563  IF( myrow.EQ.icrow1 )
564  $ CALL daxpy( nqc2, -tauloc, work( ipw ),
565  $ max( 1, nqc2 ), c( ioffc1 ), ldc )
566  CALL dger( mpv, nqc2, -tauloc, work, 1, work( ipw ),
567  $ 1, c( ioffc2 ), ldc )
568  END IF
569 *
570  ELSE
571 *
572 * Broadcast column vector V
573 *
574  CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
575  IF( mycol.EQ.ivcol ) THEN
576 *
577  ipw = mpv+1
578  CALL dcopy( mpv, v( ioffv ), 1, work, 1 )
579  work( ipw ) = tau( jjv )
580  CALL dgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,
581  $ work, ipw )
582  tauloc = tau( jjv )
583 *
584  ELSE
585 *
586  ipw = mpv+1
587  CALL dgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,
588  $ ipw, myrow, ivcol )
589  tauloc = work( ipw )
590 *
591  END IF
592 *
593  IF( tauloc.NE.zero ) THEN
594 *
595 * w := sub( C )' * v
596 *
597  IF( mpv.GT.0 ) THEN
598  CALL dgemv( 'Transpose', mpv, nqc2, one,
599  $ c( ioffc2 ), ldc, work, 1, zero,
600  $ work( ipw ), 1 )
601  ELSE
602  CALL dlaset( 'All', nqc2, 1, zero, zero,
603  $ work( ipw ), max( 1, nqc2 ) )
604  END IF
605  IF( myrow.EQ.icrow1 )
606  $ CALL daxpy( nqc2, one, c( ioffc1 ), ldc,
607  $ work( ipw ), max( 1, nqc2 ) )
608 *
609  CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
610  $ work( ipw ), max( 1, nqc2 ), rdest,
611  $ mycol )
612 *
613 * sub( C ) := sub( C ) - v * w'
614 *
615  IF( myrow.EQ.icrow1 )
616  $ CALL daxpy( nqc2, -tauloc, work( ipw ),
617  $ max( 1, nqc2 ), c( ioffc1 ), ldc )
618  CALL dger( mpv, nqc2, -tauloc, work, 1, work( ipw ),
619  $ 1, c( ioffc2 ), ldc )
620  END IF
621 *
622  END IF
623 *
624  END IF
625 *
626  ELSE
627 *
628  IF( ccblck ) THEN
629  rdest = myrow
630  ELSE
631  rdest = -1
632  END IF
633 *
634  IF( crblck ) THEN
635 *
636 * sub( C ) is distributed over a process row
637 *
638  IF( descv( m_ ).EQ.incv ) THEN
639 *
640 * V is a row vector
641 *
642  IF( ivrow.EQ.icrow2 ) THEN
643 *
644 * Perform the local computation within a process row
645 *
646  IF( myrow.EQ.icrow2 ) THEN
647 *
648  tauloc = tau( iiv )
649 *
650  IF( tauloc.NE.zero ) THEN
651 *
652 * w := sub( C ) * v
653 *
654  IF( nqv.GT.0 ) THEN
655  CALL dgemv( 'No transpose', mpc2, nqv, one,
656  $ c( ioffc2 ), ldc, v( ioffv ),
657  $ ldv, zero, work, 1 )
658  ELSE
659  CALL dlaset( 'All', mpc2, 1, zero, zero,
660  $ work, max( 1, mpc2 ) )
661  END IF
662  IF( mycol.EQ.iccol1 )
663  $ CALL daxpy( mpc2, one, c( ioffc1 ), 1,
664  $ work, 1 )
665 *
666  CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
667  $ work, max( 1, mpc2 ), rdest,
668  $ iccol2 )
669 *
670  IF( mycol.EQ.iccol1 )
671  $ CALL daxpy( mpc2, -tauloc, work, 1,
672  $ c( ioffc1 ), 1 )
673 *
674 * sub( C ) := sub( C ) - w * v'
675 *
676  IF( mpc2.GT.0 .AND. nqv.GT.0 )
677  $ CALL dger( mpc2, nqv, -tauloc, work, 1,
678  $ v( ioffv ), ldv, c( ioffc2 ),
679  $ ldc )
680  END IF
681 *
682  END IF
683 *
684  ELSE
685 *
686 * Send V and TAU to the process row ICROW2
687 *
688  IF( myrow.EQ.ivrow ) THEN
689 *
690  ipw = nqv+1
691  CALL dcopy( nqv, v( ioffv ), ldv, work, 1 )
692  work( ipw ) = tau( iiv )
693  CALL dgesd2d( ictxt, ipw, 1, work, ipw, icrow2,
694  $ mycol )
695 *
696  ELSE IF( myrow.EQ.icrow2 ) THEN
697 *
698  ipw = nqv+1
699  CALL dgerv2d( ictxt, ipw, 1, work, ipw, ivrow,
700  $ mycol )
701  tauloc = work( ipw )
702 *
703  IF( tauloc.NE.zero ) THEN
704 *
705 * w := sub( C ) * v
706 *
707  IF( nqv.GT.0 ) THEN
708  CALL dgemv( 'No transpose', mpc2, nqv, one,
709  $ c( ioffc2 ), ldc, work, 1, zero,
710  $ work( ipw ), 1 )
711  ELSE
712  CALL dlaset( 'All', mpc2, 1, zero, zero,
713  $ work( ipw ), max( 1, mpc2 ) )
714  END IF
715  IF( mycol.EQ.iccol1 )
716  $ CALL daxpy( mpc2, one, c( ioffc1 ), 1,
717  $ work( ipw ), 1 )
718  CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
719  $ work( ipw ), max( 1, mpc2 ),
720  $ rdest, iccol2 )
721  IF( mycol.EQ.iccol1 )
722  $ CALL daxpy( mpc2, -tauloc, work( ipw ), 1,
723  $ c( ioffc1 ), 1 )
724 *
725 * sub( C ) := sub( C ) - w * v'
726 *
727  CALL dger( mpc2, nqv, -tauloc, work( ipw ), 1,
728  $ work, 1, c( ioffc2 ), ldc )
729  END IF
730 *
731  END IF
732 *
733  END IF
734 *
735  ELSE
736 *
737 * Transpose column vector V (IROFFV = ICOFFC2)
738 *
739  ipw = nqv+1
740  CALL pbdtrnv( ictxt, 'Columnwise', 'Transpose', n,
741  $ descv( mb_ ), icoffc2, v( ioffv ), 1, zero,
742  $ work, 1, ivrow, ivcol, icrow2, iccol2,
743  $ work( ipw ) )
744 *
745 * Perform the local computation within a process column
746 *
747  IF( myrow.EQ.icrow2 ) THEN
748 *
749  IF( mycol.EQ.ivcol ) THEN
750 *
751  CALL dgebs2d( ictxt, 'Rowwise', ' ', 1, 1,
752  $ tau( jjv ), 1 )
753  tauloc = tau( jjv )
754 *
755  ELSE
756 *
757  CALL dgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,
758  $ 1, myrow, ivcol )
759 *
760  END IF
761 *
762  IF( tauloc.NE.zero ) THEN
763 *
764 * w := sub( C ) * v
765 *
766  IF( nqv.GT.0 ) THEN
767  CALL dgemv( 'No transpose', mpc2, nqv, one,
768  $ c( ioffc2 ), ldc, work, 1, zero,
769  $ work( ipw ), 1 )
770  ELSE
771  CALL dlaset( 'All', mpc2, 1, zero, zero,
772  $ work( ipw ), max( 1, mpc2 ) )
773  END IF
774  IF( mycol.EQ.iccol1 )
775  $ CALL daxpy( mpc2, one, c( ioffc1 ), 1,
776  $ work( ipw ), 1 )
777  CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
778  $ work( ipw ), max( 1, mpc2 ), rdest,
779  $ iccol2 )
780  IF( mycol.EQ.iccol1 )
781  $ CALL daxpy( mpc2, -tauloc, work( ipw ), 1,
782  $ c( ioffc1 ), 1 )
783 *
784 * sub( C ) := sub( C ) - w * v'
785 *
786  CALL dger( mpc2, nqv, -tauloc, work( ipw ), 1,
787  $ work, 1, c( ioffc2 ), ldc )
788  END IF
789 *
790  END IF
791 *
792  END IF
793 *
794  ELSE
795 *
796 * sub( C ) is a proper distributed matrix
797 *
798  IF( descv( m_ ).EQ.incv ) THEN
799 *
800 * Broadcast row vector V
801 *
802  CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',
803  $ colbtop )
804  IF( myrow.EQ.ivrow ) THEN
805 *
806  ipw = nqv+1
807  CALL dcopy( nqv, v( ioffv ), ldv, work, 1 )
808  work( ipw ) = tau( iiv )
809  CALL dgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,
810  $ work, ipw )
811  tauloc = tau( iiv )
812 *
813  ELSE
814 *
815  ipw = nqv+1
816  CALL dgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,
817  $ work, ipw, ivrow, mycol )
818  tauloc = work( ipw )
819 *
820  END IF
821 *
822  IF( tauloc.NE.zero ) THEN
823 *
824 * w := sub( C ) * v
825 *
826  IF( nqv.GT.0 ) THEN
827  CALL dgemv( 'No Transpose', mpc2, nqv, one,
828  $ c( ioffc2 ), ldc, work, 1, zero,
829  $ work( ipw ), 1 )
830  ELSE
831  CALL dlaset( 'All', mpc2, 1, zero, zero,
832  $ work( ipw ), max( 1, mpc2 ) )
833  END IF
834  IF( mycol.EQ.iccol1 )
835  $ CALL daxpy( mpc2, one, c( ioffc1 ), 1,
836  $ work( ipw ), 1 )
837 *
838  CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
839  $ work( ipw ), max( 1, mpc2 ), rdest,
840  $ iccol2 )
841  IF( mycol.EQ.iccol1 )
842  $ CALL daxpy( mpc2, -tauloc, work( ipw ), 1,
843  $ c( ioffc1 ), 1 )
844 *
845 * sub( C ) := sub( C ) - w * v'
846 *
847  CALL dger( mpc2, nqv, -tauloc, work( ipw ), 1, work,
848  $ 1, c( ioffc2 ), ldc )
849  END IF
850 *
851  ELSE
852 *
853 * Transpose and broadcast column vector V (ICOFFC2=IROFFV)
854 *
855  ipw = nqv+1
856  CALL pbdtrnv( ictxt, 'Columnwise', 'Transpose', n,
857  $ descv( mb_ ), icoffc2, v( ioffv ), 1, zero,
858  $ work, 1, ivrow, ivcol, -1, iccol2,
859  $ work( ipw ) )
860 *
861 * Perform the local computation within a process column
862 *
863  IF( mycol.EQ.ivcol ) THEN
864 *
865  CALL dgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),
866  $ 1 )
867  tauloc = tau( jjv )
868 *
869  ELSE
870 *
871  CALL dgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,
872  $ myrow, ivcol )
873 *
874  END IF
875 *
876  IF( tauloc.NE.zero ) THEN
877 *
878 * w := sub( C ) * v
879 *
880  IF( nqv.GT.0 ) THEN
881  CALL dgemv( 'No transpose', mpc2, nqv, one,
882  $ c( ioffc2 ), ldc, work, 1, zero,
883  $ work( ipw ), 1 )
884  ELSE
885  CALL dlaset( 'All', mpc2, 1, zero, zero,
886  $ work( ipw ), max( 1, mpc2 ) )
887  END IF
888  IF( mycol.EQ.iccol1 )
889  $ CALL daxpy( mpc2, one, c( ioffc1 ), 1,
890  $ work( ipw ), 1 )
891  CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
892  $ work( ipw ), max( 1, mpc2 ), rdest,
893  $ iccol2 )
894  IF( mycol.EQ.iccol1 )
895  $ CALL daxpy( mpc2, -tauloc, work( ipw ), 1,
896  $ c( ioffc1 ), 1 )
897 *
898 * sub( C ) := sub( C ) - w * v'
899 *
900  CALL dger( mpc2, nqv, -tauloc, work( ipw ), 1, work,
901  $ 1, c( ioffc2 ), ldc )
902  END IF
903 *
904  END IF
905 *
906  END IF
907 *
908  END IF
909 *
910  RETURN
911 *
912 * End of PDLARZ
913 *
914  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pdlarz
subroutine pdlarz(SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, IC, JC, DESCC, WORK)
Definition: pdlarz.f:3
pbdtrnv
subroutine pbdtrnv(ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX, BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL, WORK)
Definition: pbdtrnv.f:4
min
#define min(A, B)
Definition: pcgemr.c:181