ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pclarf.f
Go to the documentation of this file.
1  SUBROUTINE pclarf( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU,
2  $ C, IC, JC, DESCC, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 25, 2001
8 *
9 * .. Scalar Arguments ..
10  CHARACTER SIDE
11  INTEGER IC, INCV, IV, JC, JV, M, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCC( * ), DESCV( * )
15  COMPLEX C( * ), TAU( * ), V( * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PCLARF applies a complex elementary reflector Q to a complex M-by-N
22 * distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from either the
23 * left or the right. Q is represented in the form
24 *
25 * Q = I - tau * v * v'
26 *
27 * where tau is a complex scalar and v is a complex vector.
28 *
29 * If tau = 0, then Q is taken to be the unit matrix.
30 *
31 * Notes
32 * =====
33 *
34 * Each global data object is described by an associated description
35 * vector. This vector stores the information required to establish
36 * the mapping between an object element and its corresponding process
37 * and memory location.
38 *
39 * Let A be a generic term for any 2D block cyclicly distributed array.
40 * Such a global array has an associated description vector DESCA.
41 * In the following comments, the character _ should be read as
42 * "of the global array".
43 *
44 * NOTATION STORED IN EXPLANATION
45 * --------------- -------------- --------------------------------------
46 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
47 * DTYPE_A = 1.
48 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49 * the BLACS process grid A is distribu-
50 * ted over. The context itself is glo-
51 * bal, but the handle (the integer
52 * value) may vary.
53 * M_A (global) DESCA( M_ ) The number of rows in the global
54 * array A.
55 * N_A (global) DESCA( N_ ) The number of columns in the global
56 * array A.
57 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
58 * the rows of the array.
59 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
60 * the columns of the array.
61 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62 * row of the array A is distributed.
63 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64 * first column of the array A is
65 * distributed.
66 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
67 * array. LLD_A >= MAX(1,LOCr(M_A)).
68 *
69 * Let K be the number of rows or columns of a distributed matrix,
70 * and assume that its process grid has dimension p x q.
71 * LOCr( K ) denotes the number of elements of K that a process
72 * would receive if K were distributed over the p processes of its
73 * process column.
74 * Similarly, LOCc( K ) denotes the number of elements of K that a
75 * process would receive if K were distributed over the q processes of
76 * its process row.
77 * The values of LOCr() and LOCc() may be determined via a call to the
78 * ScaLAPACK tool function, NUMROC:
79 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81 * An upper bound for these quantities may be computed by:
82 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84 *
85 * Because vectors may be viewed as a subclass of matrices, a
86 * distributed vector is considered to be a distributed matrix.
87 *
88 * Restrictions
89 * ============
90 *
91 * If SIDE = 'Left' and INCV = 1, then the row process having the first
92 * entry V(IV,JV) must also have the first row of sub( C ). Moreover,
93 * MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only
94 * the last equality must be satisfied.
95 *
96 * If SIDE = 'Right' and INCV = M_V then the column process having the
97 * first entry V(IV,JV) must also have the first column of sub( C ) and
98 * MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the
99 * last equality must be satisfied.
100 *
101 * Arguments
102 * =========
103 *
104 * SIDE (global input) CHARACTER
105 * = 'L': form Q * sub( C ),
106 * = 'R': form sub( C ) * Q.
107 *
108 * M (global input) INTEGER
109 * The number of rows to be operated on i.e the number of rows
110 * of the distributed submatrix sub( C ). M >= 0.
111 *
112 * N (global input) INTEGER
113 * The number of columns to be operated on i.e the number of
114 * columns of the distributed submatrix sub( C ). N >= 0.
115 *
116 * V (local input) COMPLEX pointer into the local memory
117 * to an array of dimension (LLD_V,*) containing the local
118 * pieces of the distributed vectors V representing the
119 * Householder transformation Q,
120 * V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1,
121 * V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V,
122 * V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1,
123 * V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V,
124 *
125 * The vector v in the representation of Q. V is not used if
126 * TAU = 0.
127 *
128 * IV (global input) INTEGER
129 * The row index in the global array V indicating the first
130 * row of sub( V ).
131 *
132 * JV (global input) INTEGER
133 * The column index in the global array V indicating the
134 * first column of sub( V ).
135 *
136 * DESCV (global and local input) INTEGER array of dimension DLEN_.
137 * The array descriptor for the distributed matrix V.
138 *
139 * INCV (global input) INTEGER
140 * The global increment for the elements of V. Only two values
141 * of INCV are supported in this version, namely 1 and M_V.
142 * INCV must not be zero.
143 *
144 * TAU (local input) COMPLEX, array, dimension LOCc(JV) if
145 * INCV = 1, and LOCr(IV) otherwise. This array contains the
146 * Householder scalars related to the Householder vectors.
147 * TAU is tied to the distributed matrix V.
148 *
149 * C (local input/local output) COMPLEX pointer into the
150 * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
151 * containing the local pieces of sub( C ). On exit, sub( C )
152 * is overwritten by the Q * sub( C ) if SIDE = 'L', or
153 * sub( C ) * Q if SIDE = 'R'.
154 *
155 * IC (global input) INTEGER
156 * The row index in the global array C indicating the first
157 * row of sub( C ).
158 *
159 * JC (global input) INTEGER
160 * The column index in the global array C indicating the
161 * first column of sub( C ).
162 *
163 * DESCC (global and local input) INTEGER array of dimension DLEN_.
164 * The array descriptor for the distributed matrix C.
165 *
166 * WORK (local workspace) COMPLEX array, dimension (LWORK)
167 * If INCV = 1,
168 * if SIDE = 'L',
169 * if IVCOL = ICCOL,
170 * LWORK >= NqC0
171 * else
172 * LWORK >= MpC0 + MAX( 1, NqC0 )
173 * end if
174 * else if SIDE = 'R',
175 * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
176 * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
177 * end if
178 * else if INCV = M_V,
179 * if SIDE = 'L',
180 * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
181 * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
182 * else if SIDE = 'R',
183 * if IVROW = ICROW,
184 * LWORK >= MpC0
185 * else
186 * LWORK >= NqC0 + MAX( 1, MpC0 )
187 * end if
188 * end if
189 * end if
190 *
191 * where LCM is the least common multiple of NPROW and NPCOL and
192 * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
193 * LCMQ = LCM / NPCOL,
194 *
195 * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
196 * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
197 * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
198 * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
199 * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
200 *
201 * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
202 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
203 * the subroutine BLACS_GRIDINFO.
204 *
205 * Alignment requirements
206 * ======================
207 *
208 * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
209 * must verify some alignment properties, namely the following
210 * expressions should be true:
211 *
212 * MB_V = NB_V,
213 *
214 * If INCV = 1,
215 * If SIDE = 'Left',
216 * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
217 * If SIDE = 'Right',
218 * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
219 * else if INCV = M_V,
220 * If SIDE = 'Left',
221 * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
222 * If SIDE = 'Right',
223 * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
224 * end if
225 *
226 * =====================================================================
227 *
228 * .. Parameters ..
229  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
230  $ lld_, mb_, m_, nb_, n_, rsrc_
231  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
232  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
233  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
234  COMPLEX ONE, ZERO
235  parameter( one = ( 1.0e+0, 0.0e+0 ),
236  $ zero = ( 0.0e+0, 0.0e+0 ) )
237 * ..
238 * .. Local Scalars ..
239  LOGICAL CCBLCK, CRBLCK
240  CHARACTER COLBTOP, ROWBTOP
241  INTEGER ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC,
242  $ ioffv, ipw, iroff, ivcol, ivrow, jjc, jjv, ldc,
243  $ ldv, mycol, myrow, mp, ncc, ncv, npcol, nprow,
244  $ nq, rdest
245  COMPLEX TAULOC
246 * ..
247 * .. External Subroutines ..
248  EXTERNAL blacs_gridinfo, ccopy, cgebr2d, cgebs2d,
249  $ cgemv, cgerc, cgerv2d, cgesd2d,
250  $ cgsum2d, claset, infog2l, pb_topget,
251  $ pbctrnv
252 * ..
253 * .. External Functions ..
254  LOGICAL LSAME
255  INTEGER NUMROC
256  EXTERNAL lsame, numroc
257 * ..
258 * .. Intrinsic Functions ..
259  INTRINSIC min, mod
260 * ..
261 * .. Executable Statements ..
262 *
263 * Quick return if possible
264 *
265  IF( m.LE.0 .OR. n.LE.0 )
266  $ RETURN
267 *
268 * Get grid parameters.
269 *
270  ictxt = descc( ctxt_ )
271  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
272 *
273 * Figure local indexes
274 *
275  CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
276  $ icrow, iccol )
277  CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
278  $ ivrow, ivcol )
279  ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),
280  $ npcol )
281  ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),
282  $ npcol )
283  ldc = descc( lld_ )
284  ldv = descv( lld_ )
285  iic = min( iic, ldc )
286  iiv = min( iiv, ldv )
287  jjc = min( jjc, ncc )
288  jjv = min( jjv, ncv )
289  ioffc = iic+(jjc-1)*ldc
290  ioffv = iiv+(jjv-1)*ldv
291 *
292  iroff = mod( ic-1, descc( mb_ ) )
293  icoff = mod( jc-1, descc( nb_ ) )
294  mp = numroc( m+iroff, descc( mb_ ), myrow, icrow, nprow )
295  nq = numroc( n+icoff, descc( nb_ ), mycol, iccol, npcol )
296  IF( myrow.EQ.icrow )
297  $ mp = mp - iroff
298  IF( mycol.EQ.iccol )
299  $ nq = nq - icoff
300 *
301 * Is sub( C ) only distributed over a process row ?
302 *
303  crblck = ( m.LE.(descc( mb_ )-iroff) )
304 *
305 * Is sub( C ) only distributed over a process column ?
306 *
307  ccblck = ( n.LE.(descc( nb_ )-icoff) )
308 *
309  IF( lsame( side, 'L' ) ) THEN
310 *
311  IF( crblck ) THEN
312  rdest = icrow
313  ELSE
314  rdest = -1
315  END IF
316 *
317  IF( ccblck ) THEN
318 *
319 * sub( C ) is distributed over a process column
320 *
321  IF( descv( m_ ).EQ.incv ) THEN
322 *
323 * Transpose row vector V
324 *
325  ipw = mp+1
326  CALL pbctrnv( ictxt, 'Rowwise', 'Transpose', m,
327  $ descv( nb_ ), iroff, v( ioffv ), ldv, zero,
328  $ work, 1, ivrow, ivcol, icrow, iccol,
329  $ work( ipw ) )
330 *
331 * Perform the local computation within a process column
332 *
333  IF( mycol.EQ.iccol ) THEN
334 *
335  IF( myrow.EQ.ivrow ) THEN
336 *
337  CALL cgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
338  $ tau( iiv ), 1 )
339  tauloc = tau( iiv )
340 *
341  ELSE
342 *
343  CALL cgebr2d( ictxt, 'Columnwise', ' ', 1, 1,
344  $ tauloc, 1, ivrow, mycol )
345 *
346  END IF
347 *
348  IF( tauloc.NE.zero ) THEN
349 *
350 * w := sub( C )' * v
351 *
352  IF( mp.GT.0 ) THEN
353  CALL cgemv( 'Conjugate transpose', mp, nq, one,
354  $ c( ioffc ), ldc, work, 1, zero,
355  $ work( ipw ), 1 )
356  ELSE
357  CALL claset( 'All', nq, 1, zero, zero,
358  $ work( ipw ), max( 1, nq ) )
359  END IF
360  CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
361  $ work( ipw ), max( 1, nq ), rdest,
362  $ mycol )
363 *
364 * sub( C ) := sub( C ) - v * w'
365 *
366  CALL cgerc( mp, nq, -tauloc, work, 1, work( ipw ),
367  $ 1, c( ioffc ), ldc )
368  END IF
369 *
370  END IF
371 *
372  ELSE
373 *
374 * V is a column vector
375 *
376  IF( ivcol.EQ.iccol ) THEN
377 *
378 * Perform the local computation within a process column
379 *
380  IF( mycol.EQ.iccol ) THEN
381 *
382  tauloc = tau( jjv )
383 *
384  IF( tauloc.NE.zero ) THEN
385 *
386 * w := sub( C )' * v
387 *
388  IF( mp.GT.0 ) THEN
389  CALL cgemv( 'Conjugate transpose', mp, nq,
390  $ one, c( ioffc ), ldc, v( ioffv ), 1,
391  $ zero, work, 1 )
392  ELSE
393  CALL claset( 'All', nq, 1, zero, zero,
394  $ work, max( 1, nq ) )
395  END IF
396  CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
397  $ work, max( 1, nq ), rdest, mycol )
398 *
399 * sub( C ) := sub( C ) - v * w'
400 *
401  CALL cgerc( mp, nq, -tauloc, v( ioffv ), 1,
402  $ work, 1, c( ioffc ), ldc )
403  END IF
404 *
405  END IF
406 *
407  ELSE
408 *
409 * Send V and TAU to the process column ICCOL
410 *
411  IF( mycol.EQ.ivcol ) THEN
412 *
413  ipw = mp+1
414  CALL ccopy( mp, v( ioffv ), 1, work, 1 )
415  work( ipw ) = tau( jjv )
416  CALL cgesd2d( ictxt, ipw, 1, work, ipw, myrow,
417  $ iccol )
418 *
419  ELSE IF( mycol.EQ.iccol ) THEN
420 *
421  ipw = mp+1
422  CALL cgerv2d( ictxt, ipw, 1, work, ipw, myrow,
423  $ ivcol )
424  tauloc = work( ipw )
425 *
426  IF( tauloc.NE.zero ) THEN
427 *
428 * w := sub( C )' * v
429 *
430  IF( mp.GT.0 ) THEN
431  CALL cgemv( 'Conjugate transpose', mp, nq,
432  $ one, c( ioffc ), ldc, work, 1,
433  $ zero, work( ipw ), 1 )
434  ELSE
435  CALL claset( 'All', nq, 1, zero, zero,
436  $ work( ipw ), max( 1, nq ) )
437  END IF
438  CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
439  $ work( ipw ), max( 1, nq ), rdest,
440  $ mycol )
441 *
442 * sub( C ) := sub( C ) - v * w'
443 *
444  CALL cgerc( mp, nq, -tauloc, work, 1,
445  $ work( ipw ), 1, c( ioffc ), ldc )
446  END IF
447 *
448  END IF
449 *
450  END IF
451 *
452  END IF
453 *
454  ELSE
455 *
456 * sub( C ) is a proper distributed matrix
457 *
458  IF( descv( m_ ).EQ.incv ) THEN
459 *
460 * Transpose and broadcast row vector V
461 *
462  ipw = mp+1
463  CALL pbctrnv( ictxt, 'Rowwise', 'Transpose', m,
464  $ descv( nb_ ), iroff, v( ioffv ), ldv, zero,
465  $ work, 1, ivrow, ivcol, icrow, -1,
466  $ work( ipw ) )
467 *
468 * Perform the local computation within a process column
469 *
470  IF( myrow.EQ.ivrow ) THEN
471 *
472  CALL cgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
473  $ tau( iiv ), 1 )
474  tauloc = tau( iiv )
475 *
476  ELSE
477 *
478  CALL cgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,
479  $ 1, ivrow, mycol )
480 *
481  END IF
482 *
483  IF( tauloc.NE.zero ) THEN
484 *
485 * w := sub( C )' * v
486 *
487  IF( mp.GT.0 ) THEN
488  IF( ioffc.GT.0 )
489  $ CALL cgemv( 'Conjugate transpose', mp, nq, one,
490  $ c( ioffc ), ldc, work, 1, zero,
491  $ work( ipw ), 1 )
492  ELSE
493  CALL claset( 'All', nq, 1, zero, zero,
494  $ work( ipw ), max( 1, nq ) )
495  END IF
496  CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
497  $ work( ipw ), max( 1, nq ), rdest,
498  $ mycol )
499 *
500 * sub( C ) := sub( C ) - v * w'
501 *
502  IF( ioffc.GT.0 )
503  $ CALL cgerc( mp, nq, -tauloc, work, 1, work( ipw ),
504  $ 1, c( ioffc ), ldc )
505  END IF
506 *
507  ELSE
508 *
509 * Broadcast column vector V
510 *
511  CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
512  IF( mycol.EQ.ivcol ) THEN
513 *
514  ipw = mp+1
515  CALL ccopy( mp, v( ioffv ), 1, work, 1 )
516  work(ipw) = tau( jjv )
517  CALL cgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,
518  $ work, ipw )
519  tauloc = tau( jjv )
520 *
521  ELSE
522 *
523  ipw = mp+1
524  CALL cgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,
525  $ ipw, myrow, ivcol )
526  tauloc = work( ipw )
527 *
528  END IF
529 *
530  IF( tauloc.NE.zero ) THEN
531 *
532 * w := sub( C )' * v
533 *
534  IF( mp.GT.0 ) THEN
535  IF( ioffc.GT.0 )
536  $ CALL cgemv( 'Conjugate transpose', mp, nq, one,
537  $ c( ioffc ), ldc, work, 1, zero,
538  $ work( ipw ), 1 )
539  ELSE
540  CALL claset( 'All', nq, 1, zero, zero,
541  $ work( ipw ), max( 1, nq ) )
542  END IF
543  CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
544  $ work( ipw ), max( 1, nq ), rdest,
545  $ mycol )
546 *
547 * sub( C ) := sub( C ) - v * w'
548 *
549  IF( ioffc.GT.0 )
550  $ CALL cgerc( mp, nq, -tauloc, work, 1, work( ipw ),
551  $ 1, c( ioffc ), ldc )
552  END IF
553 *
554  END IF
555 *
556  END IF
557 *
558  ELSE
559 *
560  IF( ccblck ) THEN
561  rdest = myrow
562  ELSE
563  rdest = -1
564  END IF
565 *
566  IF( crblck ) THEN
567 *
568 * sub( C ) is distributed over a process row
569 *
570  IF( descv( m_ ).EQ.incv ) THEN
571 *
572 * V is a row vector
573 *
574  IF( ivrow.EQ.icrow ) THEN
575 *
576 * Perform the local computation within a process row
577 *
578  IF( myrow.EQ.icrow ) THEN
579 *
580  tauloc = tau( iiv )
581 *
582  IF( tauloc.NE.zero ) THEN
583 *
584 * w := sub( C ) * v
585 *
586  IF( nq.GT.0 ) THEN
587  CALL cgemv( 'No transpose', mp, nq, one,
588  $ c( ioffc ), ldc, v( ioffv ), ldv,
589  $ zero, work, 1 )
590  ELSE
591  CALL claset( 'All', mp, 1, zero, zero,
592  $ work, max( 1, mp ) )
593  END IF
594  CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
595  $ work, max( 1, mp ), rdest, iccol )
596 *
597 * sub( C ) := sub( C ) - w * v'
598 *
599  IF( ioffv.GT.0 .AND. ioffc.GT.0 )
600  $ CALL cgerc( mp, nq, -tauloc, work, 1,
601  $ v( ioffv ), ldv, c( ioffc ),
602  $ ldc )
603  END IF
604 *
605  END IF
606 *
607  ELSE
608 *
609 * Send V and TAU to the process row ICROW
610 *
611  IF( myrow.EQ.ivrow ) THEN
612 *
613  ipw = nq+1
614  CALL ccopy( nq, v( ioffv ), ldv, work, 1 )
615  work(ipw) = tau( iiv )
616  CALL cgesd2d( ictxt, ipw, 1, work, ipw, icrow,
617  $ mycol )
618 *
619  ELSE IF( myrow.EQ.icrow ) THEN
620 *
621  ipw = nq+1
622  CALL cgerv2d( ictxt, ipw, 1, work, ipw, ivrow,
623  $ mycol )
624  tauloc = work( ipw )
625 *
626  IF( tauloc.NE.zero ) THEN
627 *
628 * w := sub( C ) * v
629 *
630  IF( nq.GT.0 ) THEN
631  CALL cgemv( 'No transpose', mp, nq, one,
632  $ c( ioffc ), ldc, work, 1, zero,
633  $ work( ipw ), 1 )
634  ELSE
635  CALL claset( 'All', mp, 1, zero, zero,
636  $ work( ipw ), max( 1, mp ) )
637  END IF
638  CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
639  $ work( ipw ), max( 1, mp ), rdest,
640  $ iccol )
641 *
642 * sub( C ) := sub( C ) - w * v'
643 *
644  CALL cgerc( mp, nq, -tauloc, work( ipw ), 1,
645  $ work, 1, c( ioffc ), ldc )
646  END IF
647 *
648  END IF
649 *
650  END IF
651 *
652  ELSE
653 *
654 * Transpose column vector V
655 *
656  ipw = nq+1
657  CALL pbctrnv( ictxt, 'Columnwise', 'Transpose', n,
658  $ descv( mb_ ), icoff, v( ioffv ), 1, zero,
659  $ work, 1, ivrow, ivcol, icrow, iccol,
660  $ work( ipw ) )
661 *
662 * Perform the local computation within a process column
663 *
664  IF( myrow.EQ.icrow ) THEN
665 *
666  IF( mycol.EQ.ivcol ) THEN
667 *
668  CALL cgebs2d( ictxt, 'Rowwise', ' ', 1, 1,
669  $ tau( jjv ), 1 )
670  tauloc = tau( jjv )
671 *
672  ELSE
673 *
674  CALL cgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,
675  $ 1, myrow, ivcol )
676 *
677  END IF
678 *
679  IF( tauloc.NE.zero ) THEN
680 *
681 * w := sub( C ) * v
682 *
683  IF( nq.GT.0 ) THEN
684  CALL cgemv( 'No transpose', mp, nq, one,
685  $ c( ioffc ), ldc, work, 1, zero,
686  $ work( ipw ), 1 )
687  ELSE
688  CALL claset( 'All', mp, 1, zero, zero,
689  $ work( ipw ), max( 1, mp ) )
690  END IF
691  CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
692  $ work( ipw ), max( 1, mp ), rdest,
693  $ iccol )
694 *
695 * sub( C ) := sub( C ) - w * v'
696 *
697  CALL cgerc( mp, nq, -tauloc, work( ipw ), 1, work,
698  $ 1, c( ioffc ), ldc )
699  END IF
700 *
701  END IF
702 *
703  END IF
704 *
705  ELSE
706 *
707 * sub( C ) is a proper distributed matrix
708 *
709  IF( descv( m_ ).EQ.incv ) THEN
710 *
711 * Broadcast row vector V
712 *
713  CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',
714  $ colbtop )
715  IF( myrow.EQ.ivrow ) THEN
716 *
717  ipw = nq+1
718  IF( ioffv.GT.0 )
719  $ CALL ccopy( nq, v( ioffv ), ldv, work, 1 )
720  work(ipw) = tau( iiv )
721  CALL cgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,
722  $ work, ipw )
723  tauloc = tau( iiv )
724 *
725  ELSE
726 *
727  ipw = nq+1
728  CALL cgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,
729  $ work, ipw, ivrow, mycol )
730  tauloc = work( ipw )
731 *
732  END IF
733 *
734  IF( tauloc.NE.zero ) THEN
735 *
736 * w := sub( C ) * v
737 *
738  IF( nq.GT.0 ) THEN
739  CALL cgemv( 'No Transpose', mp, nq, one,
740  $ c( ioffc ), ldc, work, 1, zero,
741  $ work( ipw ), 1 )
742  ELSE
743  CALL claset( 'All', mp, 1, zero, zero,
744  $ work( ipw ), max( 1, mp ) )
745  END IF
746  CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
747  $ work( ipw ), max( 1, mp ), rdest,
748  $ iccol )
749 *
750 * sub( C ) := sub( C ) - w * v'
751 *
752  IF( ioffc.GT.0 )
753  $ CALL cgerc( mp, nq, -tauloc, work( ipw ), 1, work,
754  $ 1, c( ioffc ), ldc )
755  END IF
756 *
757  ELSE
758 *
759 * Transpose and broadcast column vector V
760 *
761  ipw = nq+1
762  CALL pbctrnv( ictxt, 'Columnwise', 'Transpose', n,
763  $ descv( mb_ ), icoff, v( ioffv ), 1, zero,
764  $ work, 1, ivrow, ivcol, -1, iccol,
765  $ work( ipw ) )
766 *
767 * Perform the local computation within a process column
768 *
769  IF( mycol.EQ.ivcol ) THEN
770 *
771  CALL cgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),
772  $ 1 )
773  tauloc = tau( jjv )
774 *
775  ELSE
776 *
777  CALL cgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,
778  $ myrow, ivcol )
779 *
780  END IF
781 *
782  IF( tauloc.NE.zero ) THEN
783 *
784 * w := sub( C ) * v
785 *
786  IF( nq.GT.0 ) THEN
787  CALL cgemv( 'No transpose', mp, nq, one,
788  $ c( ioffc ), ldc, work, 1, zero,
789  $ work( ipw ), 1 )
790  ELSE
791  CALL claset( 'All', mp, 1, zero, zero, work( ipw ),
792  $ max( 1, mp ) )
793  END IF
794  CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
795  $ work( ipw ), max( 1, mp ), rdest,
796  $ iccol )
797 *
798 * sub( C ) := sub( C ) - w * v'
799 *
800  CALL cgerc( mp, nq, -tauloc, work( ipw ), 1, work, 1,
801  $ c( ioffc ), ldc )
802  END IF
803 *
804  END IF
805 *
806  END IF
807 *
808  END IF
809 *
810  RETURN
811 *
812 * End of PCLARF
813 *
814  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pbctrnv
subroutine pbctrnv(ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX, BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL, WORK)
Definition: pbctrnv.f:4
pclarf
subroutine pclarf(SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, C, IC, JC, DESCC, WORK)
Definition: pclarf.f:3
min
#define min(A, B)
Definition: pcgemr.c:181