SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pclarfc.f
Go to the documentation of this file.
1 SUBROUTINE pclarfc( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU,
2 $ C, IC, JC, DESCC, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 25, 2001
8*
9* .. Scalar Arguments ..
10 CHARACTER SIDE
11 INTEGER IC, INCV, IV, JC, JV, M, N
12* ..
13* .. Array Arguments ..
14 INTEGER DESCC( * ), DESCV( * )
15 COMPLEX C( * ), TAU( * ), V( * ), WORK( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PCLARFC applies a complex elementary reflector Q**H to a
22* complex M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1),
23* from either the left or the right. Q is represented in the form
24*
25* Q = I - tau * v * v'
26*
27* where tau is a complex scalar and v is a complex vector.
28*
29* If tau = 0, then Q is taken to be the unit matrix.
30*
31* Notes
32* =====
33*
34* Each global data object is described by an associated description
35* vector. This vector stores the information required to establish
36* the mapping between an object element and its corresponding process
37* and memory location.
38*
39* Let A be a generic term for any 2D block cyclicly distributed array.
40* Such a global array has an associated description vector DESCA.
41* In the following comments, the character _ should be read as
42* "of the global array".
43*
44* NOTATION STORED IN EXPLANATION
45* --------------- -------------- --------------------------------------
46* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
47* DTYPE_A = 1.
48* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49* the BLACS process grid A is distribu-
50* ted over. The context itself is glo-
51* bal, but the handle (the integer
52* value) may vary.
53* M_A (global) DESCA( M_ ) The number of rows in the global
54* array A.
55* N_A (global) DESCA( N_ ) The number of columns in the global
56* array A.
57* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
58* the rows of the array.
59* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
60* the columns of the array.
61* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62* row of the array A is distributed.
63* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64* first column of the array A is
65* distributed.
66* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
67* array. LLD_A >= MAX(1,LOCr(M_A)).
68*
69* Let K be the number of rows or columns of a distributed matrix,
70* and assume that its process grid has dimension p x q.
71* LOCr( K ) denotes the number of elements of K that a process
72* would receive if K were distributed over the p processes of its
73* process column.
74* Similarly, LOCc( K ) denotes the number of elements of K that a
75* process would receive if K were distributed over the q processes of
76* its process row.
77* The values of LOCr() and LOCc() may be determined via a call to the
78* ScaLAPACK tool function, NUMROC:
79* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81* An upper bound for these quantities may be computed by:
82* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84*
85* Because vectors may be viewed as a subclass of matrices, a
86* distributed vector is considered to be a distributed matrix.
87*
88* Restrictions
89* ============
90*
91* If SIDE = 'Left' and INCV = 1, then the row process having the first
92* entry V(IV,JV) must also have the first row of sub( C ). Moreover,
93* MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only
94* the last equality must be satisfied.
95*
96* If SIDE = 'Right' and INCV = M_V then the column process having the
97* first entry V(IV,JV) must also have the first column of sub( C ) and
98* MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the
99* last equality must be satisfied.
100*
101* Arguments
102* =========
103*
104* SIDE (global input) CHARACTER
105* = 'L': form Q**H * sub( C ),
106* = 'R': form sub( C ) * Q**H.
107*
108* M (global input) INTEGER
109* The number of rows to be operated on i.e the number of rows
110* of the distributed submatrix sub( C ). M >= 0.
111*
112* N (global input) INTEGER
113* The number of columns to be operated on i.e the number of
114* columns of the distributed submatrix sub( C ). N >= 0.
115*
116* V (local input) COMPLEX pointer into the local memory
117* to an array of dimension (LLD_V,*) containing the local
118* pieces of the distributed vectors V representing the
119* Householder transformation Q,
120* V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1,
121* V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V,
122* V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1,
123* V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V,
124*
125* The vector v in the representation of Q. V is not used if
126* TAU = 0.
127*
128* IV (global input) INTEGER
129* The row index in the global array V indicating the first
130* row of sub( V ).
131*
132* JV (global input) INTEGER
133* The column index in the global array V indicating the
134* first column of sub( V ).
135*
136* DESCV (global and local input) INTEGER array of dimension DLEN_.
137* The array descriptor for the distributed matrix V.
138*
139* INCV (global input) INTEGER
140* The global increment for the elements of V. Only two values
141* of INCV are supported in this version, namely 1 and M_V.
142* INCV must not be zero.
143*
144* TAU (local input) COMPLEX, array, dimension LOCc(JV) if
145* INCV = 1, and LOCr(IV) otherwise. This array contains the
146* Householder scalars related to the Householder vectors.
147* TAU is tied to the distributed matrix V.
148*
149* C (local input/local output) COMPLEX pointer into the
150* local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
151* containing the local pieces of sub( C ). On exit, sub( C )
152* is overwritten by the Q**H * sub( C ) if SIDE = 'L', or
153* sub( C ) * Q**H if SIDE = 'R'.
154*
155* IC (global input) INTEGER
156* The row index in the global array C indicating the first
157* row of sub( C ).
158*
159* JC (global input) INTEGER
160* The column index in the global array C indicating the
161* first column of sub( C ).
162*
163* DESCC (global and local input) INTEGER array of dimension DLEN_.
164* The array descriptor for the distributed matrix C.
165*
166* WORK (local workspace) COMPLEX array, dimension (LWORK)
167* If INCV = 1,
168* if SIDE = 'L',
169* if IVCOL = ICCOL,
170* LWORK >= NqC0
171* else
172* LWORK >= MpC0 + MAX( 1, NqC0 )
173* end if
174* else if SIDE = 'R',
175* LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
176* N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
177* end if
178* else if INCV = M_V,
179* if SIDE = 'L',
180* LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
181* M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
182* else if SIDE = 'R',
183* if IVROW = ICROW,
184* LWORK >= MpC0
185* else
186* LWORK >= NqC0 + MAX( 1, MpC0 )
187* end if
188* end if
189* end if
190*
191* where LCM is the least common multiple of NPROW and NPCOL and
192* LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
193* LCMQ = LCM / NPCOL,
194*
195* IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
196* ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
197* ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
198* MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
199* NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
200*
201* ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
202* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
203* the subroutine BLACS_GRIDINFO.
204*
205* Alignment requirements
206* ======================
207*
208* The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
209* must verify some alignment properties, namely the following
210* expressions should be true:
211*
212* MB_V = NB_V,
213*
214* If INCV = 1,
215* If SIDE = 'Left',
216* ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
217* If SIDE = 'Right',
218* ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
219* else if INCV = M_V,
220* If SIDE = 'Left',
221* ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
222* If SIDE = 'Right',
223* ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
224* end if
225*
226* =====================================================================
227*
228* .. Parameters ..
229 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
230 $ lld_, mb_, m_, nb_, n_, rsrc_
231 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
232 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
233 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
234 COMPLEX ONE, ZERO
235 parameter( one = ( 1.0e+0, 0.0e+0 ),
236 $ zero = ( 0.0e+0, 0.0e+0 ) )
237* ..
238* .. Local Scalars ..
239 LOGICAL CCBLCK, CRBLCK
240 CHARACTER COLBTOP, ROWBTOP
241 INTEGER ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC,
242 $ ioffv, ipw, iroff, ivcol, ivrow, jjc, jjv, ldc,
243 $ ldv, mycol, myrow, mp, ncc, ncv, npcol, nprow,
244 $ nq, rdest
245 COMPLEX TAULOC( 1 )
246* ..
247* .. External Subroutines ..
248 EXTERNAL blacs_gridinfo, ccopy, cgebr2d, cgebs2d,
249 $ cgemv, cgerc, cgerv2d, cgesd2d,
250 $ cgsum2d, claset, infog2l, pb_topget,
251 $ pbctrnv
252* ..
253* .. External Functions ..
254 LOGICAL LSAME
255 INTEGER NUMROC
256 EXTERNAL lsame, numroc
257* ..
258* .. Intrinsic Functions ..
259 INTRINSIC min, mod
260* ..
261* .. Executable Statements ..
262*
263* Quick return if possible
264*
265 IF( m.LE.0 .OR. n.LE.0 )
266 $ RETURN
267*
268* Get grid parameters.
269*
270 ictxt = descc( ctxt_ )
271 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
272*
273* Figure local indexes
274*
275 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
276 $ icrow, iccol )
277 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
278 $ ivrow, ivcol )
279 ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),
280 $ npcol )
281 ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),
282 $ npcol )
283 ldc = descc( lld_ )
284 ldv = descv( lld_ )
285 iic = min( iic, ldc )
286 iiv = min( iiv, ldv )
287 jjc = min( jjc, ncc )
288 jjv = min( jjv, ncv )
289 ioffc = iic+(jjc-1)*ldc
290 ioffv = iiv+(jjv-1)*ldv
291*
292 iroff = mod( ic-1, descc( mb_ ) )
293 icoff = mod( jc-1, descc( nb_ ) )
294 mp = numroc( m+iroff, descc( mb_ ), myrow, icrow, nprow )
295 nq = numroc( n+icoff, descc( nb_ ), mycol, iccol, npcol )
296 IF( myrow.EQ.icrow )
297 $ mp = mp - iroff
298 IF( mycol.EQ.iccol )
299 $ nq = nq - icoff
300*
301* Is sub( C ) only distributed over a process row ?
302*
303 crblck = ( m.LE.(descc( mb_ )-iroff) )
304*
305* Is sub( C ) only distributed over a process column ?
306*
307 ccblck = ( n.LE.(descc( nb_ )-icoff) )
308*
309 IF( lsame( side, 'L' ) ) THEN
310*
311 IF( crblck ) THEN
312 rdest = icrow
313 ELSE
314 rdest = -1
315 END IF
316*
317 IF( ccblck ) THEN
318*
319* sub( C ) is distributed over a process column
320*
321 IF( descv( m_ ).EQ.incv ) THEN
322*
323* Transpose row vector V
324*
325 ipw = mp+1
326 CALL pbctrnv( ictxt, 'Rowwise', 'Transpose', m,
327 $ descv( nb_ ), iroff, v( ioffv ), ldv, zero,
328 $ work, 1, ivrow, ivcol, icrow, iccol,
329 $ work( ipw ) )
330*
331* Perform the local computation within a process column
332*
333 IF( mycol.EQ.iccol ) THEN
334*
335 IF( myrow.EQ.ivrow ) THEN
336*
337 CALL cgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
338 $ tau( iiv ), 1 )
339 tauloc( 1 ) = conjg( tau( iiv ) )
340*
341 ELSE
342*
343 CALL cgebr2d( ictxt, 'Columnwise', ' ', 1, 1,
344 $ tauloc, 1, ivrow, mycol )
345 tauloc( 1 ) = conjg( tauloc( 1 ) )
346*
347 END IF
348*
349 IF( tauloc( 1 ).NE.zero ) THEN
350*
351* w := sub( C )' * v
352*
353 IF( mp.GT.0 ) THEN
354 CALL cgemv( 'Conjugate transpose', mp, nq, one,
355 $ c( ioffc ), ldc, work, 1, zero,
356 $ work( ipw ), 1 )
357 ELSE
358 CALL claset( 'All', nq, 1, zero, zero,
359 $ work( ipw ), max( 1, nq ) )
360 END IF
361 CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
362 $ work( ipw ), max( 1, nq ), rdest,
363 $ mycol )
364*
365* sub( C ) := sub( C ) - v * w'
366*
367 CALL cgerc( mp, nq, -tauloc( 1 ), work, 1,
368 $ work( ipw ), 1, c( ioffc ), ldc )
369 END IF
370*
371 END IF
372*
373 ELSE
374*
375* V is a column vector
376*
377 IF( ivcol.EQ.iccol ) THEN
378*
379* Perform the local computation within a process column
380*
381 IF( mycol.EQ.iccol ) THEN
382*
383 tauloc( 1 ) = conjg( tau( jjv ) )
384*
385 IF( tauloc( 1 ).NE.zero ) THEN
386*
387* w := sub( C )' * v
388*
389 IF( mp.GT.0 ) THEN
390 CALL cgemv( 'Conjugate transpose', mp, nq,
391 $ one, c( ioffc ), ldc, v( ioffv ), 1,
392 $ zero, work, 1 )
393 ELSE
394 CALL claset( 'All', nq, 1, zero, zero,
395 $ work, max( 1, nq ) )
396 END IF
397 CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
398 $ work, max( 1, nq ), rdest, mycol )
399*
400* sub( C ) := sub( C ) - v * w'
401*
402 CALL cgerc( mp, nq, -tauloc( 1 ), v( ioffv ), 1,
403 $ work, 1, c( ioffc ), ldc )
404 END IF
405*
406 END IF
407*
408 ELSE
409*
410* Send V and TAU to the process column ICCOL
411*
412 IF( mycol.EQ.ivcol ) THEN
413*
414 ipw = mp+1
415 CALL ccopy( mp, v( ioffv ), 1, work, 1 )
416 work( ipw ) = tau( jjv )
417 CALL cgesd2d( ictxt, ipw, 1, work, ipw, myrow,
418 $ iccol )
419*
420 ELSE IF( mycol.EQ.iccol ) THEN
421*
422 ipw = mp+1
423 CALL cgerv2d( ictxt, ipw, 1, work, ipw, myrow,
424 $ ivcol )
425 tauloc( 1 ) = conjg( work( ipw ) )
426*
427 IF( tauloc( 1 ).NE.zero ) THEN
428*
429* w := sub( C )' * v
430*
431 IF( mp.GT.0 ) THEN
432 CALL cgemv( 'Conjugate transpose', mp, nq,
433 $ one, c( ioffc ), ldc, work, 1,
434 $ zero, work( ipw ), 1 )
435 ELSE
436 CALL claset( 'All', nq, 1, zero, zero,
437 $ work( ipw ), max( 1, nq ) )
438 END IF
439 CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
440 $ work( ipw ), max( 1, nq ), rdest,
441 $ mycol )
442*
443* sub( C ) := sub( C ) - v * w'
444*
445 CALL cgerc( mp, nq, -tauloc( 1 ), work, 1,
446 $ work( ipw ), 1, c( ioffc ), ldc )
447 END IF
448*
449 END IF
450*
451 END IF
452*
453 END IF
454*
455 ELSE
456*
457* sub( C ) is a proper distributed matrix
458*
459 IF( descv( m_ ).EQ.incv ) THEN
460*
461* Transpose and broadcast row vector V
462*
463 ipw = mp+1
464 CALL pbctrnv( ictxt, 'Rowwise', 'Transpose', m,
465 $ descv( nb_ ), iroff, v( ioffv ), ldv, zero,
466 $ work, 1, ivrow, ivcol, icrow, -1,
467 $ work( ipw ) )
468*
469* Perform the local computation within a process column
470*
471 IF( myrow.EQ.ivrow ) THEN
472*
473 CALL cgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
474 $ tau( iiv ), 1 )
475 tauloc( 1 ) = conjg( tau( iiv ) )
476*
477 ELSE
478*
479 CALL cgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,
480 $ 1, ivrow, mycol )
481 tauloc( 1 ) = conjg( tauloc( 1 ) )
482*
483 END IF
484*
485 IF( tauloc( 1 ).NE.zero ) THEN
486*
487* w := sub( C )' * v
488*
489 IF( mp.GT.0 ) THEN
490 CALL cgemv( 'Conjugate transpose', mp, nq, one,
491 $ c( ioffc ), ldc, work, 1, zero,
492 $ work( ipw ), 1 )
493 ELSE
494 CALL claset( 'All', nq, 1, zero, zero,
495 $ work( ipw ), max( 1, nq ) )
496 END IF
497 CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
498 $ work( ipw ), max( 1, nq ), rdest,
499 $ mycol )
500*
501* sub( C ) := sub( C ) - v * w'
502*
503 CALL cgerc( mp, nq, -tauloc( 1 ), work, 1,
504 $ work( ipw ), 1, c( ioffc ), ldc )
505 END IF
506*
507 ELSE
508*
509* Broadcast column vector V
510*
511 CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
512 IF( mycol.EQ.ivcol ) THEN
513*
514 ipw = mp+1
515 CALL ccopy( mp, v( ioffv ), 1, work, 1 )
516 work(ipw) = tau( jjv )
517 CALL cgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,
518 $ work, ipw )
519 tauloc( 1 ) = conjg( tau( jjv ) )
520*
521 ELSE
522*
523 ipw = mp+1
524 CALL cgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,
525 $ ipw, myrow, ivcol )
526 tauloc( 1 ) = conjg( work( ipw ) )
527*
528 END IF
529*
530 IF( tauloc( 1 ).NE.zero ) THEN
531*
532* w := sub( C )' * v
533*
534 IF( mp.GT.0 ) THEN
535 CALL cgemv( 'Conjugate transpose', mp, nq, one,
536 $ c( ioffc ), ldc, work, 1, zero,
537 $ work( ipw ), 1 )
538 ELSE
539 CALL claset( 'All', nq, 1, zero, zero,
540 $ work( ipw ), max( 1, nq ) )
541 END IF
542 CALL cgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
543 $ work( ipw ), max( 1, nq ), rdest,
544 $ mycol )
545*
546* sub( C ) := sub( C ) - v * w'
547*
548 CALL cgerc( mp, nq, -tauloc( 1 ), work, 1,
549 $ work( ipw ), 1, c( ioffc ), ldc )
550 END IF
551*
552 END IF
553*
554 END IF
555*
556 ELSE
557*
558 IF( ccblck ) THEN
559 rdest = myrow
560 ELSE
561 rdest = -1
562 END IF
563*
564 IF( crblck ) THEN
565*
566* sub( C ) is distributed over a process row
567*
568 IF( descv( m_ ).EQ.incv ) THEN
569*
570* V is a row vector
571*
572 IF( ivrow.EQ.icrow ) THEN
573*
574* Perform the local computation within a process row
575*
576 IF( myrow.EQ.icrow ) THEN
577*
578 tauloc( 1 ) = conjg( tau( iiv ) )
579*
580 IF( tauloc( 1 ).NE.zero ) THEN
581*
582* w := sub( C ) * v
583*
584 IF( nq.GT.0 ) THEN
585 CALL cgemv( 'No transpose', mp, nq, one,
586 $ c( ioffc ), ldc, v( ioffv ), ldv,
587 $ zero, work, 1 )
588 ELSE
589 CALL claset( 'All', mp, 1, zero, zero,
590 $ work, max( 1, mp ) )
591 END IF
592 CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
593 $ work, max( 1, mp ), rdest, iccol )
594*
595* sub( C ) := sub( C ) - w * v'
596*
597 CALL cgerc( mp, nq, -tauloc( 1 ), work, 1,
598 $ v( ioffv ), ldv, c( ioffc ), ldc )
599 END IF
600*
601 END IF
602*
603 ELSE
604*
605* Send V and TAU to the process row ICROW
606*
607 IF( myrow.EQ.ivrow ) THEN
608*
609 ipw = nq+1
610 CALL ccopy( nq, v( ioffv ), ldv, work, 1 )
611 work(ipw) = tau( iiv )
612 CALL cgesd2d( ictxt, ipw, 1, work, ipw, icrow,
613 $ mycol )
614*
615 ELSE IF( myrow.EQ.icrow ) THEN
616*
617 ipw = nq+1
618 CALL cgerv2d( ictxt, ipw, 1, work, ipw, ivrow,
619 $ mycol )
620 tauloc( 1 ) = conjg( work( ipw ) )
621*
622 IF( tauloc( 1 ).NE.zero ) THEN
623*
624* w := sub( C ) * v
625*
626 IF( nq.GT.0 ) THEN
627 CALL cgemv( 'No transpose', mp, nq, one,
628 $ c( ioffc ), ldc, work, 1, zero,
629 $ work( ipw ), 1 )
630 ELSE
631 CALL claset( 'All', mp, 1, zero, zero,
632 $ work( ipw ), max( 1, mp ) )
633 END IF
634 CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
635 $ work( ipw ), max( 1, mp ), rdest,
636 $ iccol )
637*
638* sub( C ) := sub( C ) - w * v'
639*
640 CALL cgerc( mp, nq, -tauloc( 1 ), work( ipw ),
641 $ 1, work, 1, c( ioffc ), ldc )
642 END IF
643*
644 END IF
645*
646 END IF
647*
648 ELSE
649*
650* Transpose column vector V
651*
652 ipw = nq+1
653 CALL pbctrnv( ictxt, 'Columnwise', 'Transpose', n,
654 $ descv( mb_ ), icoff, v( ioffv ), 1, zero,
655 $ work, 1, ivrow, ivcol, icrow, iccol,
656 $ work( ipw ) )
657*
658* Perform the local computation within a process column
659*
660 IF( myrow.EQ.icrow ) THEN
661*
662 IF( mycol.EQ.ivcol ) THEN
663*
664 CALL cgebs2d( ictxt, 'Rowwise', ' ', 1, 1,
665 $ tau( jjv ), 1 )
666 tauloc( 1 ) = conjg( tau( jjv ) )
667*
668 ELSE
669*
670 CALL cgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,
671 $ 1, myrow, ivcol )
672 tauloc( 1 ) = conjg( tauloc( 1 ) )
673*
674 END IF
675*
676 IF( tauloc( 1 ).NE.zero ) THEN
677*
678* w := sub( C ) * v
679*
680 IF( nq.GT.0 ) THEN
681 CALL cgemv( 'No transpose', mp, nq, one,
682 $ c( ioffc ), ldc, work, 1, zero,
683 $ work( ipw ), 1 )
684 ELSE
685 CALL claset( 'All', mp, 1, zero, zero,
686 $ work( ipw ), max( 1, mp ) )
687 END IF
688 CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
689 $ work( ipw ), max( 1, mp ), rdest,
690 $ iccol )
691*
692* sub( C ) := sub( C ) - w * v'
693*
694 CALL cgerc( mp, nq, -tauloc( 1 ), work( ipw ), 1,
695 $ work, 1, c( ioffc ), ldc )
696 END IF
697*
698 END IF
699*
700 END IF
701*
702 ELSE
703*
704* sub( C ) is a proper distributed matrix
705*
706 IF( descv( m_ ).EQ.incv ) THEN
707*
708* Broadcast row vector V
709*
710 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',
711 $ colbtop )
712 IF( myrow.EQ.ivrow ) THEN
713*
714 ipw = nq+1
715 CALL ccopy( nq, v( ioffv ), ldv, work, 1 )
716 work(ipw) = tau( iiv )
717 CALL cgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,
718 $ work, ipw )
719 tauloc( 1 ) = conjg( tau( iiv ) )
720*
721 ELSE
722*
723 ipw = nq+1
724 CALL cgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,
725 $ work, ipw, ivrow, mycol )
726 tauloc( 1 ) = conjg( work( ipw ) )
727*
728 END IF
729*
730 IF( tauloc( 1 ).NE.zero ) THEN
731*
732* w := sub( C ) * v
733*
734 IF( nq.GT.0 ) THEN
735 CALL cgemv( 'No Transpose', mp, nq, one,
736 $ c( ioffc ), ldc, work, 1, zero,
737 $ work( ipw ), 1 )
738 ELSE
739 CALL claset( 'All', mp, 1, zero, zero,
740 $ work( ipw ), max( 1, mp ) )
741 END IF
742 CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
743 $ work( ipw ), max( 1, mp ), rdest,
744 $ iccol )
745*
746* sub( C ) := sub( C ) - w * v'
747*
748 CALL cgerc( mp, nq, -tauloc( 1 ), work( ipw ), 1,
749 $ work, 1, c( ioffc ), ldc )
750 END IF
751*
752 ELSE
753*
754* Transpose and broadcast column vector V
755*
756 ipw = nq+1
757 CALL pbctrnv( ictxt, 'Columnwise', 'Transpose', n,
758 $ descv( mb_ ), icoff, v( ioffv ), 1, zero,
759 $ work, 1, ivrow, ivcol, -1, iccol,
760 $ work( ipw ) )
761*
762* Perform the local computation within a process column
763*
764 IF( mycol.EQ.ivcol ) THEN
765*
766 CALL cgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),
767 $ 1 )
768 tauloc( 1 ) = conjg( tau( jjv ) )
769*
770 ELSE
771*
772 CALL cgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,
773 $ myrow, ivcol )
774 tauloc( 1 ) = conjg( tauloc( 1 ) )
775*
776 END IF
777*
778 IF( tauloc( 1 ).NE.zero ) THEN
779*
780* w := sub( C ) * v
781*
782 IF( nq.GT.0 ) THEN
783 CALL cgemv( 'No transpose', mp, nq, one,
784 $ c( ioffc ), ldc, work, 1, zero,
785 $ work( ipw ), 1 )
786 ELSE
787 CALL claset( 'All', mp, 1, zero, zero, work( ipw ),
788 $ max( 1, mp ) )
789 END IF
790 CALL cgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
791 $ work( ipw ), max( 1, mp ), rdest,
792 $ iccol )
793*
794* sub( C ) := sub( C ) - w * v'
795*
796 CALL cgerc( mp, nq, -tauloc( 1 ), work( ipw ), 1,
797 $ work, 1, c( ioffc ), ldc )
798 END IF
799*
800 END IF
801*
802 END IF
803*
804 END IF
805*
806 RETURN
807*
808* End of PCLARFC
809*
810 END
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pbctrnv(icontxt, xdist, trans, n, nb, nz, x, incx, beta, y, incy, ixrow, ixcol, iyrow, iycol, work)
Definition pbctrnv.f:4
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pclarfc(side, m, n, v, iv, jv, descv, incv, tau, c, ic, jc, descc, work)
Definition pclarfc.f:3