ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pclarfc.f
Go to the documentation of this file.
1  SUBROUTINE pclarfc( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU,
2  $ C, IC, JC, DESCC, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 25, 2001
8 *
9 * .. Scalar Arguments ..
10  CHARACTER SIDE
11  INTEGER IC, INCV, IV, JC, JV, M, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCC( * ), DESCV( * )
15  COMPLEX C( * ), TAU( * ), V( * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PCLARFC applies a complex elementary reflector Q**H to a
22 * complex M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1),
23 * from either the left or the right. Q is represented in the form
24 *
25 * Q = I - tau * v * v'
26 *
27 * where tau is a complex scalar and v is a complex vector.
28 *
29 * If tau = 0, then Q is taken to be the unit matrix.
30 *
31 * Notes
32 * =====
33 *
34 * Each global data object is described by an associated description
35 * vector. This vector stores the information required to establish
36 * the mapping between an object element and its corresponding process
37 * and memory location.
38 *
39 * Let A be a generic term for any 2D block cyclicly distributed array.
40 * Such a global array has an associated description vector DESCA.
41 * In the following comments, the character _ should be read as
42 * "of the global array".
43 *
44 * NOTATION STORED IN EXPLANATION
45 * --------------- -------------- --------------------------------------
46 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
47 * DTYPE_A = 1.
48 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49 * the BLACS process grid A is distribu-
50 * ted over. The context itself is glo-
51 * bal, but the handle (the integer
52 * value) may vary.
53 * M_A (global) DESCA( M_ ) The number of rows in the global
54 * array A.
55 * N_A (global) DESCA( N_ ) The number of columns in the global
56 * array A.
57 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
58 * the rows of the array.
59 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
60 * the columns of the array.
61 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62 * row of the array A is distributed.
63 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64 * first column of the array A is
65 * distributed.
66 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
67 * array. LLD_A >= MAX(1,LOCr(M_A)).
68 *
69 * Let K be the number of rows or columns of a distributed matrix,
70 * and assume that its process grid has dimension p x q.
71 * LOCr( K ) denotes the number of elements of K that a process
72 * would receive if K were distributed over the p processes of its
73 * process column.
74 * Similarly, LOCc( K ) denotes the number of elements of K that a
75 * process would receive if K were distributed over the q processes of
76 * its process row.
77 * The values of LOCr() and LOCc() may be determined via a call to the
78 * ScaLAPACK tool function, NUMROC:
79 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81 * An upper bound for these quantities may be computed by:
82 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84 *
85 * Because vectors may be viewed as a subclass of matrices, a
86 * distributed vector is considered to be a distributed matrix.
87 *
88 * Restrictions
89 * ============
90 *
91 * If SIDE = 'Left' and INCV = 1, then the row process having the first
92 * entry V(IV,JV) must also have the first row of sub( C ). Moreover,
93 * MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only
94 * the last equality must be satisfied.
95 *
96 * If SIDE = 'Right' and INCV = M_V then the column process having the
97 * first entry V(IV,JV) must also have the first column of sub( C ) and
98 * MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the
99 * last equality must be satisfied.
100 *
101 * Arguments
102 * =========
103 *
104 * SIDE (global input) CHARACTER
105 * = 'L': form Q**H * sub( C ),
106 * = 'R': form sub( C ) * Q**H.
107 *
108 * M (global input) INTEGER
109 * The number of rows to be operated on i.e the number of rows
110 * of the distributed submatrix sub( C ). M >= 0.
111 *
112 * N (global input) INTEGER
113 * The number of columns to be operated on i.e the number of
114 * columns of the distributed submatrix sub( C ). N >= 0.
115 *
116 * V (local input) COMPLEX pointer into the local memory
117 * to an array of dimension (LLD_V,*) containing the local
118 * pieces of the distributed vectors V representing the
119 * Householder transformation Q,
120 * V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1,
121 * V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V,
122 * V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1,
123 * V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V,
124 *
125 * The vector v in the representation of Q. V is not used if
126 * TAU = 0.
127 *
128 * IV (global input) INTEGER
129 * The row index in the global array V indicating the first
130 * row of sub( V ).
131 *
132 * JV (global input) INTEGER
133 * The column index in the global array V indicating the
134 * first column of sub( V ).
135 *
136 * DESCV (global and local input) INTEGER array of dimension DLEN_.
137 * The array descriptor for the distributed matrix V.
138 *
139 * INCV (global input) INTEGER
140 * The global increment for the elements of V. Only two values
141 * of INCV are supported in this version, namely 1 and M_V.
142 * INCV must not be zero.
143 *
144 * TAU (local input) COMPLEX, array, dimension LOCc(JV) if
145 * INCV = 1, and LOCr(IV) otherwise. This array contains the
146 * Householder scalars related to the Householder vectors.
147 * TAU is tied to the distributed matrix V.
148 *
149 * C (local input/local output) COMPLEX pointer into the
150 * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
151 * containing the local pieces of sub( C ). On exit, sub( C )
152 * is overwritten by the Q**H * sub( C ) if SIDE = 'L', or
153 * sub( C ) * Q**H if SIDE = 'R'.
154 *
155 * IC (global input) INTEGER
156 * The row index in the global array C indicating the first
157 * row of sub( C ).
158 *
159 * JC (global input) INTEGER
160 * The column index in the global array C indicating the
161 * first column of sub( C ).
162 *
163 * DESCC (global and local input) INTEGER array of dimension DLEN_.
164 * The array descriptor for the distributed matrix C.
165 *
166 * WORK (local workspace) COMPLEX array, dimension (LWORK)
167 * If INCV = 1,
168 * if SIDE = 'L',
169 * if IVCOL = ICCOL,
170 * LWORK >= NqC0
171 * else
172 * LWORK >= MpC0 + MAX( 1, NqC0 )
173 * end if
174 * else if SIDE = 'R',
175 * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
176 * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
177 * end if
178 * else if INCV = M_V,
179 * if SIDE = 'L',
180 * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
181 * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
182 * else if SIDE = 'R',
183 * if IVROW = ICROW,
184 * LWORK >= MpC0
185 * else
186 * LWORK >= NqC0 + MAX( 1, MpC0 )
187 * end if
188 * end if
189 * end if
190 *
191 * where LCM is the least common multiple of NPROW and NPCOL and
192 * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
193 * LCMQ = LCM / NPCOL,
194 *
195 * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
196 * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
197 * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
198 * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
199 * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
200 *
201 * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
202 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
203 * the subroutine BLACS_GRIDINFO.
204 *
205 * Alignment requirements
206 * ======================
207 *
208 * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
209 * must verify some alignment properties, namely the following
210 * expressions should be true:
211 *
212 * MB_V = NB_V,
213 *
214 * If INCV = 1,
215 * If SIDE = 'Left',
216 * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
217 * If SIDE = 'Right',
218 * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
219 * else if INCV = M_V,
220 * If SIDE = 'Left',
221 * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
222 * If SIDE = 'Right',
223 * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
224 * end if
225 *
226 * =====================================================================
227 *
228 * .. Parameters ..
229  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
230  $ lld_, mb_, m_, nb_, n_, rsrc_
231  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
232  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
233  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
234  COMPLEX ONE, ZERO
235  parameter( one = ( 1.0e+0, 0.0e+0 ),
236  $ zero = ( 0.0e+0, 0.0e+0 ) )
237 * ..
238 * .. Local Scalars ..
239  LOGICAL CCBLCK, CRBLCK
240  CHARACTER COLBTOP, ROWBTOP
241  INTEGER ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC,
242  $ ioffv, ipw, iroff, ivcol, ivrow, jjc, jjv, ldc,
243  $ ldv, mycol, myrow, mp, ncc, ncv, npcol, nprow,
244  $ nq, rdest
245  COMPLEX TAULOC
246 * ..
247 * .. External Subroutines ..
248  EXTERNAL blacs_gridinfo, ccopy, cgebr2d, cgebs2d,
249  $ cgemv, cgerc, cgerv2d, cgesd2d,
250  $ cgsum2d, claset, infog2l, pb_topget,
251  $ pbctrnv
252 * ..
253 * .. External Functions ..
254  LOGICAL LSAME
255  INTEGER NUMROC
256  EXTERNAL lsame, numroc
257 * ..
258 * .. Intrinsic Functions ..
259  INTRINSIC min, mod
260 * ..
261 * .. Executable Statements ..
262 *
263 * Quick return if possible
264 *
265  IF( m.LE.0 .OR. n.LE.0 )
266  $ RETURN
267 *
268 * Get grid parameters.
269 *
270  ictxt = descc( ctxt_ )
271  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
272 *
273 * Figure local indexes
274 *
275  CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
276  $ icrow, iccol )
277  CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
278  $ ivrow, ivcol )
279  ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),
280  $ npcol )
281  ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),
282  $ npcol )
283  ldc = descc( lld_ )
284  ldv = descv( lld_ )
285  iic = min( iic, ldc )
286  iiv = min( iiv, ldv )
287  jjc = min( jjc, ncc )
288  jjv = min( jjv, ncv )
289  ioffc = iic+(jjc-1)*ldc
290  ioffv = iiv+(jjv-1)*ldv
291 *
292  iroff = mod( ic-1, descc( mb_ ) )
293  icoff = mod( jc-1, descc( nb_ ) )
294  mp = numroc( m+iroff, descc( mb_ ), myrow, icrow, nprow )
295  nq = numroc( n+icoff, descc( nb_ ), mycol, iccol, npcol )
296  IF( myrow.EQ.icrow )
297  $ mp = mp - iroff
298  IF( mycol.EQ.iccol )
299  $ nq = nq - icoff
300 *
301 * Is sub( C ) only distributed over a process row ?
302 *
303  crblck = ( m.LE.(descc( mb_ )-iroff) )
304 *
305 * Is sub( C ) only distributed over a process column ?
306 *
307  ccblck = ( n.LE.(descc( nb_ )-icoff) )
308 *
309  IF( lsame( side, 'L' ) ) THEN
310 *
311  IF( crblck ) THEN
312  rdest = icrow
313  ELSE
314  rdest = -1
315  END IF
316 *
317  IF( ccblck ) THEN
318 *
319 * sub( C ) is distributed over a process column
320 *
321  IF( descv( m_ ).EQ.incv ) THEN
322 *
323 * Transpose row vector V
324 *
325  ipw = mp+1
326  CALL pbctrnv( ictxt, 'Rowwise', 'Transpose', m,
327  $ descv( nb_ ), iroff, v( ioffv ), ldv, zero,
328  $ work, 1, ivrow, ivcol, icrow, iccol,
329  $ work( ipw ) )
330 *
331 * Perform the local computation within a process column
332 *
333  IF( mycol.EQ.iccol ) THEN
334 *
335  IF( myrow.EQ.ivrow ) THEN
336 *
337  CALL cgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
338  $ tau( iiv ), 1 )
339  tauloc = conjg( tau( iiv ) )
340 *
341  ELSE
342 *
343  CALL cgebr2d( ictxt, 'Columnwise', ' ', 1, 1,
344  $ tauloc, 1, ivrow, mycol )
345  tauloc = conjg( tauloc )
346 *
347  END IF
348 *
349  IF( tauloc.NE.zero ) THEN
350 *
351 * w := sub( C )' * v
352 *
353  IF( mp.GT.0 ) THEN
354  CALL cgemv( 'Conjugate transpose', mp, nq, one,
355  $ c( ioffc ), ldc, work, 1, zero,
356  $ work( ipw ), 1 )
357  ELSE
358  CALL claset( 'All', nq, 1, zero, zero,
359  $ work( ipw ), max( 1, nq ) )
360  END IF
361  CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
362  $ work( ipw ), max( 1, nq ), rdest,
363  $ mycol )
364 *
365 * sub( C ) := sub( C ) - v * w'
366 *
367  CALL cgerc( mp, nq, -tauloc, work, 1, work( ipw ),
368  $ 1, c( ioffc ), ldc )
369  END IF
370 *
371  END IF
372 *
373  ELSE
374 *
375 * V is a column vector
376 *
377  IF( ivcol.EQ.iccol ) THEN
378 *
379 * Perform the local computation within a process column
380 *
381  IF( mycol.EQ.iccol ) THEN
382 *
383  tauloc = conjg( tau( jjv ) )
384 *
385  IF( tauloc.NE.zero ) THEN
386 *
387 * w := sub( C )' * v
388 *
389  IF( mp.GT.0 ) THEN
390  CALL cgemv( 'Conjugate transpose', mp, nq,
391  $ one, c( ioffc ), ldc, v( ioffv ), 1,
392  $ zero, work, 1 )
393  ELSE
394  CALL claset( 'All', nq, 1, zero, zero,
395  $ work, max( 1, nq ) )
396  END IF
397  CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
398  $ work, max( 1, nq ), rdest, mycol )
399 *
400 * sub( C ) := sub( C ) - v * w'
401 *
402  CALL cgerc( mp, nq, -tauloc, v( ioffv ), 1,
403  $ work, 1, c( ioffc ), ldc )
404  END IF
405 *
406  END IF
407 *
408  ELSE
409 *
410 * Send V and TAU to the process column ICCOL
411 *
412  IF( mycol.EQ.ivcol ) THEN
413 *
414  ipw = mp+1
415  CALL ccopy( mp, v( ioffv ), 1, work, 1 )
416  work( ipw ) = tau( jjv )
417  CALL cgesd2d( ictxt, ipw, 1, work, ipw, myrow,
418  $ iccol )
419 *
420  ELSE IF( mycol.EQ.iccol ) THEN
421 *
422  ipw = mp+1
423  CALL cgerv2d( ictxt, ipw, 1, work, ipw, myrow,
424  $ ivcol )
425  tauloc = conjg( work( ipw ) )
426 *
427  IF( tauloc.NE.zero ) THEN
428 *
429 * w := sub( C )' * v
430 *
431  IF( mp.GT.0 ) THEN
432  CALL cgemv( 'Conjugate transpose', mp, nq,
433  $ one, c( ioffc ), ldc, work, 1,
434  $ zero, work( ipw ), 1 )
435  ELSE
436  CALL claset( 'All', nq, 1, zero, zero,
437  $ work( ipw ), max( 1, nq ) )
438  END IF
439  CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
440  $ work( ipw ), max( 1, nq ), rdest,
441  $ mycol )
442 *
443 * sub( C ) := sub( C ) - v * w'
444 *
445  CALL cgerc( mp, nq, -tauloc, work, 1,
446  $ work( ipw ), 1, c( ioffc ), ldc )
447  END IF
448 *
449  END IF
450 *
451  END IF
452 *
453  END IF
454 *
455  ELSE
456 *
457 * sub( C ) is a proper distributed matrix
458 *
459  IF( descv( m_ ).EQ.incv ) THEN
460 *
461 * Transpose and broadcast row vector V
462 *
463  ipw = mp+1
464  CALL pbctrnv( ictxt, 'Rowwise', 'Transpose', m,
465  $ descv( nb_ ), iroff, v( ioffv ), ldv, zero,
466  $ work, 1, ivrow, ivcol, icrow, -1,
467  $ work( ipw ) )
468 *
469 * Perform the local computation within a process column
470 *
471  IF( myrow.EQ.ivrow ) THEN
472 *
473  CALL cgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
474  $ tau( iiv ), 1 )
475  tauloc = conjg( tau( iiv ) )
476 *
477  ELSE
478 *
479  CALL cgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,
480  $ 1, ivrow, mycol )
481  tauloc = conjg( tauloc )
482 *
483  END IF
484 *
485  IF( tauloc.NE.zero ) THEN
486 *
487 * w := sub( C )' * v
488 *
489  IF( mp.GT.0 ) THEN
490  CALL cgemv( 'Conjugate transpose', mp, nq, one,
491  $ c( ioffc ), ldc, work, 1, zero,
492  $ work( ipw ), 1 )
493  ELSE
494  CALL claset( 'All', nq, 1, zero, zero,
495  $ work( ipw ), max( 1, nq ) )
496  END IF
497  CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
498  $ work( ipw ), max( 1, nq ), rdest,
499  $ mycol )
500 *
501 * sub( C ) := sub( C ) - v * w'
502 *
503  CALL cgerc( mp, nq, -tauloc, work, 1, work( ipw ), 1,
504  $ c( ioffc ), ldc )
505  END IF
506 *
507  ELSE
508 *
509 * Broadcast column vector V
510 *
511  CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
512  IF( mycol.EQ.ivcol ) THEN
513 *
514  ipw = mp+1
515  CALL ccopy( mp, v( ioffv ), 1, work, 1 )
516  work(ipw) = tau( jjv )
517  CALL cgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,
518  $ work, ipw )
519  tauloc = conjg( tau( jjv ) )
520 *
521  ELSE
522 *
523  ipw = mp+1
524  CALL cgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,
525  $ ipw, myrow, ivcol )
526  tauloc = conjg( work( ipw ) )
527 *
528  END IF
529 *
530  IF( tauloc.NE.zero ) THEN
531 *
532 * w := sub( C )' * v
533 *
534  IF( mp.GT.0 ) THEN
535  CALL cgemv( 'Conjugate transpose', mp, nq, one,
536  $ c( ioffc ), ldc, work, 1, zero,
537  $ work( ipw ), 1 )
538  ELSE
539  CALL claset( 'All', nq, 1, zero, zero,
540  $ work( ipw ), max( 1, nq ) )
541  END IF
542  CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
543  $ work( ipw ), max( 1, nq ), rdest,
544  $ mycol )
545 *
546 * sub( C ) := sub( C ) - v * w'
547 *
548  CALL cgerc( mp, nq, -tauloc, work, 1, work( ipw ), 1,
549  $ c( ioffc ), ldc )
550  END IF
551 *
552  END IF
553 *
554  END IF
555 *
556  ELSE
557 *
558  IF( ccblck ) THEN
559  rdest = myrow
560  ELSE
561  rdest = -1
562  END IF
563 *
564  IF( crblck ) THEN
565 *
566 * sub( C ) is distributed over a process row
567 *
568  IF( descv( m_ ).EQ.incv ) THEN
569 *
570 * V is a row vector
571 *
572  IF( ivrow.EQ.icrow ) THEN
573 *
574 * Perform the local computation within a process row
575 *
576  IF( myrow.EQ.icrow ) THEN
577 *
578  tauloc = conjg( tau( iiv ) )
579 *
580  IF( tauloc.NE.zero ) THEN
581 *
582 * w := sub( C ) * v
583 *
584  IF( nq.GT.0 ) THEN
585  CALL cgemv( 'No transpose', mp, nq, one,
586  $ c( ioffc ), ldc, v( ioffv ), ldv,
587  $ zero, work, 1 )
588  ELSE
589  CALL claset( 'All', mp, 1, zero, zero,
590  $ work, max( 1, mp ) )
591  END IF
592  CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
593  $ work, max( 1, mp ), rdest, iccol )
594 *
595 * sub( C ) := sub( C ) - w * v'
596 *
597  CALL cgerc( mp, nq, -tauloc, work, 1,
598  $ v( ioffv ), ldv, c( ioffc ), ldc )
599  END IF
600 *
601  END IF
602 *
603  ELSE
604 *
605 * Send V and TAU to the process row ICROW
606 *
607  IF( myrow.EQ.ivrow ) THEN
608 *
609  ipw = nq+1
610  CALL ccopy( nq, v( ioffv ), ldv, work, 1 )
611  work(ipw) = tau( iiv )
612  CALL cgesd2d( ictxt, ipw, 1, work, ipw, icrow,
613  $ mycol )
614 *
615  ELSE IF( myrow.EQ.icrow ) THEN
616 *
617  ipw = nq+1
618  CALL cgerv2d( ictxt, ipw, 1, work, ipw, ivrow,
619  $ mycol )
620  tauloc = conjg( work( ipw ) )
621 *
622  IF( tauloc.NE.zero ) THEN
623 *
624 * w := sub( C ) * v
625 *
626  IF( nq.GT.0 ) THEN
627  CALL cgemv( 'No transpose', mp, nq, one,
628  $ c( ioffc ), ldc, work, 1, zero,
629  $ work( ipw ), 1 )
630  ELSE
631  CALL claset( 'All', mp, 1, zero, zero,
632  $ work( ipw ), max( 1, mp ) )
633  END IF
634  CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
635  $ work( ipw ), max( 1, mp ), rdest,
636  $ iccol )
637 *
638 * sub( C ) := sub( C ) - w * v'
639 *
640  CALL cgerc( mp, nq, -tauloc, work( ipw ), 1,
641  $ work, 1, c( ioffc ), ldc )
642  END IF
643 *
644  END IF
645 *
646  END IF
647 *
648  ELSE
649 *
650 * Transpose column vector V
651 *
652  ipw = nq+1
653  CALL pbctrnv( ictxt, 'Columnwise', 'Transpose', n,
654  $ descv( mb_ ), icoff, v( ioffv ), 1, zero,
655  $ work, 1, ivrow, ivcol, icrow, iccol,
656  $ work( ipw ) )
657 *
658 * Perform the local computation within a process column
659 *
660  IF( myrow.EQ.icrow ) THEN
661 *
662  IF( mycol.EQ.ivcol ) THEN
663 *
664  CALL cgebs2d( ictxt, 'Rowwise', ' ', 1, 1,
665  $ tau( jjv ), 1 )
666  tauloc = conjg( tau( jjv ) )
667 *
668  ELSE
669 *
670  CALL cgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,
671  $ 1, myrow, ivcol )
672  tauloc = conjg( tauloc )
673 *
674  END IF
675 *
676  IF( tauloc.NE.zero ) THEN
677 *
678 * w := sub( C ) * v
679 *
680  IF( nq.GT.0 ) THEN
681  CALL cgemv( 'No transpose', mp, nq, one,
682  $ c( ioffc ), ldc, work, 1, zero,
683  $ work( ipw ), 1 )
684  ELSE
685  CALL claset( 'All', mp, 1, zero, zero,
686  $ work( ipw ), max( 1, mp ) )
687  END IF
688  CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
689  $ work( ipw ), max( 1, mp ), rdest,
690  $ iccol )
691 *
692 * sub( C ) := sub( C ) - w * v'
693 *
694  CALL cgerc( mp, nq, -tauloc, work( ipw ), 1, work,
695  $ 1, c( ioffc ), ldc )
696  END IF
697 *
698  END IF
699 *
700  END IF
701 *
702  ELSE
703 *
704 * sub( C ) is a proper distributed matrix
705 *
706  IF( descv( m_ ).EQ.incv ) THEN
707 *
708 * Broadcast row vector V
709 *
710  CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',
711  $ colbtop )
712  IF( myrow.EQ.ivrow ) THEN
713 *
714  ipw = nq+1
715  CALL ccopy( nq, v( ioffv ), ldv, work, 1 )
716  work(ipw) = tau( iiv )
717  CALL cgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,
718  $ work, ipw )
719  tauloc = conjg( tau( iiv ) )
720 *
721  ELSE
722 *
723  ipw = nq+1
724  CALL cgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,
725  $ work, ipw, ivrow, mycol )
726  tauloc = conjg( work( ipw ) )
727 *
728  END IF
729 *
730  IF( tauloc.NE.zero ) THEN
731 *
732 * w := sub( C ) * v
733 *
734  IF( nq.GT.0 ) THEN
735  CALL cgemv( 'No Transpose', mp, nq, one,
736  $ c( ioffc ), ldc, work, 1, zero,
737  $ work( ipw ), 1 )
738  ELSE
739  CALL claset( 'All', mp, 1, zero, zero,
740  $ work( ipw ), max( 1, mp ) )
741  END IF
742  CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
743  $ work( ipw ), max( 1, mp ), rdest,
744  $ iccol )
745 *
746 * sub( C ) := sub( C ) - w * v'
747 *
748  CALL cgerc( mp, nq, -tauloc, work( ipw ), 1, work, 1,
749  $ c( ioffc ), ldc )
750  END IF
751 *
752  ELSE
753 *
754 * Transpose and broadcast column vector V
755 *
756  ipw = nq+1
757  CALL pbctrnv( ictxt, 'Columnwise', 'Transpose', n,
758  $ descv( mb_ ), icoff, v( ioffv ), 1, zero,
759  $ work, 1, ivrow, ivcol, -1, iccol,
760  $ work( ipw ) )
761 *
762 * Perform the local computation within a process column
763 *
764  IF( mycol.EQ.ivcol ) THEN
765 *
766  CALL cgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),
767  $ 1 )
768  tauloc = conjg( tau( jjv ) )
769 *
770  ELSE
771 *
772  CALL cgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,
773  $ myrow, ivcol )
774  tauloc = conjg( tauloc )
775 *
776  END IF
777 *
778  IF( tauloc.NE.zero ) THEN
779 *
780 * w := sub( C ) * v
781 *
782  IF( nq.GT.0 ) THEN
783  CALL cgemv( 'No transpose', mp, nq, one,
784  $ c( ioffc ), ldc, work, 1, zero,
785  $ work( ipw ), 1 )
786  ELSE
787  CALL claset( 'All', mp, 1, zero, zero, work( ipw ),
788  $ max( 1, mp ) )
789  END IF
790  CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
791  $ work( ipw ), max( 1, mp ), rdest,
792  $ iccol )
793 *
794 * sub( C ) := sub( C ) - w * v'
795 *
796  CALL cgerc( mp, nq, -tauloc, work( ipw ), 1, work, 1,
797  $ c( ioffc ), ldc )
798  END IF
799 *
800  END IF
801 *
802  END IF
803 *
804  END IF
805 *
806  RETURN
807 *
808 * End of PCLARFC
809 *
810  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pclarfc
subroutine pclarfc(SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, C, IC, JC, DESCC, WORK)
Definition: pclarfc.f:3
pbctrnv
subroutine pbctrnv(ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX, BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL, WORK)
Definition: pbctrnv.f:4
min
#define min(A, B)
Definition: pcgemr.c:181