SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdlarf.f
Go to the documentation of this file.
1 SUBROUTINE pdlarf( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU,
2 $ C, IC, JC, DESCC, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 25, 2001
8*
9* .. Scalar Arguments ..
10 CHARACTER SIDE
11 INTEGER IC, INCV, IV, JC, JV, M, N
12* ..
13* .. Array Arguments ..
14 INTEGER DESCC( * ), DESCV( * )
15 DOUBLE PRECISION C( * ), TAU( * ), V( * ), WORK( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PDLARF applies a real elementary reflector Q (or Q**T) to a real
22* M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from
23* either the left or the right. Q is represented in the form
24*
25* Q = I - tau * v * v'
26*
27* where tau is a real scalar and v is a real vector.
28*
29* If tau = 0, then Q is taken to be the unit matrix.
30*
31* Notes
32* =====
33*
34* Each global data object is described by an associated description
35* vector. This vector stores the information required to establish
36* the mapping between an object element and its corresponding process
37* and memory location.
38*
39* Let A be a generic term for any 2D block cyclicly distributed array.
40* Such a global array has an associated description vector DESCA.
41* In the following comments, the character _ should be read as
42* "of the global array".
43*
44* NOTATION STORED IN EXPLANATION
45* --------------- -------------- --------------------------------------
46* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
47* DTYPE_A = 1.
48* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49* the BLACS process grid A is distribu-
50* ted over. The context itself is glo-
51* bal, but the handle (the integer
52* value) may vary.
53* M_A (global) DESCA( M_ ) The number of rows in the global
54* array A.
55* N_A (global) DESCA( N_ ) The number of columns in the global
56* array A.
57* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
58* the rows of the array.
59* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
60* the columns of the array.
61* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62* row of the array A is distributed.
63* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64* first column of the array A is
65* distributed.
66* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
67* array. LLD_A >= MAX(1,LOCr(M_A)).
68*
69* Let K be the number of rows or columns of a distributed matrix,
70* and assume that its process grid has dimension p x q.
71* LOCr( K ) denotes the number of elements of K that a process
72* would receive if K were distributed over the p processes of its
73* process column.
74* Similarly, LOCc( K ) denotes the number of elements of K that a
75* process would receive if K were distributed over the q processes of
76* its process row.
77* The values of LOCr() and LOCc() may be determined via a call to the
78* ScaLAPACK tool function, NUMROC:
79* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81* An upper bound for these quantities may be computed by:
82* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84*
85* Because vectors may be viewed as a subclass of matrices, a
86* distributed vector is considered to be a distributed matrix.
87*
88* Restrictions
89* ============
90*
91* If SIDE = 'Left' and INCV = 1, then the row process having the first
92* entry V(IV,JV) must also have the first row of sub( C ). Moreover,
93* MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only
94* the last equality must be satisfied.
95*
96* If SIDE = 'Right' and INCV = M_V then the column process having the
97* first entry V(IV,JV) must also have the first column of sub( C ) and
98* MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the
99* last equality must be satisfied.
100*
101* Arguments
102* =========
103*
104* SIDE (global input) CHARACTER
105* = 'L': form Q * sub( C ),
106* = 'R': form sub( C ) * Q, Q = Q**T.
107*
108* M (global input) INTEGER
109* The number of rows to be operated on i.e the number of rows
110* of the distributed submatrix sub( C ). M >= 0.
111*
112* N (global input) INTEGER
113* The number of columns to be operated on i.e the number of
114* columns of the distributed submatrix sub( C ). N >= 0.
115*
116* V (local input) DOUBLE PRECISION pointer into the local memory
117* to an array of dimension (LLD_V,*) containing the local
118* pieces of the distributed vectors V representing the
119* Householder transformation Q,
120* V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1,
121* V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V,
122* V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1,
123* V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V,
124*
125* The vector v in the representation of Q. V is not used if
126* TAU = 0.
127*
128* IV (global input) INTEGER
129* The row index in the global array V indicating the first
130* row of sub( V ).
131*
132* JV (global input) INTEGER
133* The column index in the global array V indicating the
134* first column of sub( V ).
135*
136* DESCV (global and local input) INTEGER array of dimension DLEN_.
137* The array descriptor for the distributed matrix V.
138*
139* INCV (global input) INTEGER
140* The global increment for the elements of V. Only two values
141* of INCV are supported in this version, namely 1 and M_V.
142* INCV must not be zero.
143*
144* TAU (local input) DOUBLE PRECISION array, dimension LOCc(JV) if
145* INCV = 1, and LOCr(IV) otherwise. This array contains the
146* Householder scalars related to the Householder vectors.
147* TAU is tied to the distributed matrix V.
148*
149* C (local input/local output) DOUBLE PRECISION pointer into the
150* local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
151* containing the local pieces of sub( C ). On exit, sub( C )
152* is overwritten by the Q * sub( C ) if SIDE = 'L', or
153* sub( C ) * Q if SIDE = 'R'.
154*
155* IC (global input) INTEGER
156* The row index in the global array C indicating the first
157* row of sub( C ).
158*
159* JC (global input) INTEGER
160* The column index in the global array C indicating the
161* first column of sub( C ).
162*
163* DESCC (global and local input) INTEGER array of dimension DLEN_.
164* The array descriptor for the distributed matrix C.
165*
166* WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
167* If INCV = 1,
168* if SIDE = 'L',
169* if IVCOL = ICCOL,
170* LWORK >= NqC0
171* else
172* LWORK >= MpC0 + MAX( 1, NqC0 )
173* end if
174* else if SIDE = 'R',
175* LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
176* N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
177* end if
178* else if INCV = M_V,
179* if SIDE = 'L',
180* LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
181* M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
182* else if SIDE = 'R',
183* if IVROW = ICROW,
184* LWORK >= MpC0
185* else
186* LWORK >= NqC0 + MAX( 1, MpC0 )
187* end if
188* end if
189* end if
190*
191* where LCM is the least common multiple of NPROW and NPCOL and
192* LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
193* LCMQ = LCM / NPCOL,
194*
195* IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
196* ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
197* ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
198* MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
199* NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
200*
201* ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
202* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
203* the subroutine BLACS_GRIDINFO.
204*
205* Alignment requirements
206* ======================
207*
208* The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
209* must verify some alignment properties, namely the following
210* expressions should be true:
211*
212* MB_V = NB_V,
213*
214* If INCV = 1,
215* If SIDE = 'Left',
216* ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
217* If SIDE = 'Right',
218* ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
219* else if INCV = M_V,
220* If SIDE = 'Left',
221* ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
222* If SIDE = 'Right',
223* ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
224* end if
225*
226* =====================================================================
227*
228* .. Parameters ..
229 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
230 $ lld_, mb_, m_, nb_, n_, rsrc_
231 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
232 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
233 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
234 DOUBLE PRECISION ONE, ZERO
235 parameter( one = 1.0d+0, zero = 0.0d+0 )
236* ..
237* .. Local Scalars ..
238 LOGICAL CCBLCK, CRBLCK
239 CHARACTER COLBTOP, ROWBTOP
240 INTEGER ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC,
241 $ ioffv, ipw, iroff, ivcol, ivrow, jjc, jjv, ldc,
242 $ ldv, mycol, myrow, mp, ncc, ncv, npcol, nprow,
243 $ nq, rdest
244 DOUBLE PRECISION TAULOC( 1 )
245* ..
246* .. External Subroutines ..
247 EXTERNAL blacs_gridinfo, dcopy, dgebr2d, dgebs2d,
248 $ dgemv, dger, dgerv2d, dgesd2d,
249 $ dgsum2d, dlaset, infog2l, pb_topget,
250 $ pbdtrnv
251* ..
252* .. External Functions ..
253 LOGICAL LSAME
254 INTEGER NUMROC
255 EXTERNAL lsame, numroc
256* ..
257* .. Intrinsic Functions ..
258 INTRINSIC min, mod
259* ..
260* .. Executable Statements ..
261*
262* Quick return if possible
263*
264 IF( m.LE.0 .OR. n.LE.0 )
265 $ RETURN
266*
267* Get grid parameters.
268*
269 ictxt = descc( ctxt_ )
270 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
271*
272* Figure local indexes
273*
274 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
275 $ icrow, iccol )
276 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
277 $ ivrow, ivcol )
278 ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),
279 $ npcol )
280 ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),
281 $ npcol )
282 ldc = descc( lld_ )
283 ldv = descv( lld_ )
284 iic = min( iic, ldc )
285 iiv = min( iiv, ldv )
286 jjc = min( jjc, ncc )
287 jjv = min( jjv, ncv )
288 ioffc = iic+(jjc-1)*ldc
289 ioffv = iiv+(jjv-1)*ldv
290*
291 iroff = mod( ic-1, descc( mb_ ) )
292 icoff = mod( jc-1, descc( nb_ ) )
293 mp = numroc( m+iroff, descc( mb_ ), myrow, icrow, nprow )
294 nq = numroc( n+icoff, descc( nb_ ), mycol, iccol, npcol )
295 IF( myrow.EQ.icrow )
296 $ mp = mp - iroff
297 IF( mycol.EQ.iccol )
298 $ nq = nq - icoff
299*
300* Is sub( C ) only distributed over a process row ?
301*
302 crblck = ( m.LE.(descc( mb_ )-iroff) )
303*
304* Is sub( C ) only distributed over a process column ?
305*
306 ccblck = ( n.LE.(descc( nb_ )-icoff) )
307*
308 IF( lsame( side, 'L' ) ) THEN
309*
310 IF( crblck ) THEN
311 rdest = icrow
312 ELSE
313 rdest = -1
314 END IF
315*
316 IF( ccblck ) THEN
317*
318* sub( C ) is distributed over a process column
319*
320 IF( descv( m_ ).EQ.incv ) THEN
321*
322* Transpose row vector V
323*
324 ipw = mp+1
325 CALL pbdtrnv( ictxt, 'Rowwise', 'Transpose', m,
326 $ descv( nb_ ), iroff, v( ioffv ), ldv, zero,
327 $ work, 1, ivrow, ivcol, icrow, iccol,
328 $ work( ipw ) )
329*
330* Perform the local computation within a process column
331*
332 IF( mycol.EQ.iccol ) THEN
333*
334 IF( myrow.EQ.ivrow ) THEN
335*
336 CALL dgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
337 $ tau( iiv ), 1 )
338 tauloc( 1 ) = tau( iiv )
339*
340 ELSE
341*
342 CALL dgebr2d( ictxt, 'Columnwise', ' ', 1, 1,
343 $ tauloc, 1, ivrow, mycol )
344*
345 END IF
346*
347 IF( tauloc( 1 ).NE.zero ) THEN
348*
349* w := sub( C )' * v
350*
351 IF( mp.GT.0 ) THEN
352 CALL dgemv( 'Transpose', mp, nq, one,
353 $ c( ioffc ), ldc, work, 1, zero,
354 $ work( ipw ), 1 )
355 ELSE
356 CALL dlaset( 'All', nq, 1, zero, zero,
357 $ work( ipw ), max( 1, nq ) )
358 END IF
359 CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
360 $ work( ipw ), max( 1, nq ), rdest,
361 $ mycol )
362*
363* sub( C ) := sub( C ) - v * w'
364*
365 CALL dger( mp, nq, -tauloc( 1 ), work, 1,
366 $ work( ipw ), 1, c( ioffc ), ldc )
367 END IF
368*
369 END IF
370*
371 ELSE
372*
373* V is a column vector
374*
375 IF( ivcol.EQ.iccol ) THEN
376*
377* Perform the local computation within a process column
378*
379 IF( mycol.EQ.iccol ) THEN
380*
381 tauloc( 1 ) = tau( jjv )
382*
383 IF( tauloc( 1 ).NE.zero ) THEN
384*
385* w := sub( C )' * v
386*
387 IF( mp.GT.0 ) THEN
388 CALL dgemv( 'Transpose', mp, nq, one,
389 $ c( ioffc ), ldc, v( ioffv ), 1,
390 $ zero, work, 1 )
391 ELSE
392 CALL dlaset( 'All', nq, 1, zero, zero,
393 $ work, max( 1, nq ) )
394 END IF
395 CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
396 $ work, max( 1, nq ), rdest, mycol )
397*
398* sub( C ) := sub( C ) - v * w'
399*
400 CALL dger( mp, nq, -tauloc( 1 ), v( ioffv ), 1,
401 $ work, 1, c( ioffc ), ldc )
402 END IF
403*
404 END IF
405*
406 ELSE
407*
408* Send V and TAU to the process column ICCOL
409*
410 IF( mycol.EQ.ivcol ) THEN
411*
412 ipw = mp+1
413 CALL dcopy( mp, v( ioffv ), 1, work, 1 )
414 work( ipw ) = tau( jjv )
415 CALL dgesd2d( ictxt, ipw, 1, work, ipw, myrow,
416 $ iccol )
417*
418 ELSE IF( mycol.EQ.iccol ) THEN
419*
420 ipw = mp+1
421 CALL dgerv2d( ictxt, ipw, 1, work, ipw, myrow,
422 $ ivcol )
423 tauloc( 1 ) = work( ipw )
424*
425 IF( tauloc( 1 ).NE.zero ) THEN
426*
427* w := sub( C )' * v
428*
429 IF( mp.GT.0 ) THEN
430 CALL dgemv( 'Transpose', mp, nq, one,
431 $ c( ioffc ), ldc, work, 1, zero,
432 $ work( ipw ), 1 )
433 ELSE
434 CALL dlaset( 'All', nq, 1, zero, zero,
435 $ work( ipw ), max( 1, nq ) )
436 END IF
437 CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
438 $ work( ipw ), max( 1, nq ), rdest,
439 $ mycol )
440*
441* sub( C ) := sub( C ) - v * w'
442*
443 CALL dger( mp, nq, -tauloc( 1 ), work, 1,
444 $ work( ipw ), 1, c( ioffc ), ldc )
445 END IF
446*
447 END IF
448*
449 END IF
450*
451 END IF
452*
453 ELSE
454*
455* sub( C ) is a proper distributed matrix
456*
457 IF( descv( m_ ).EQ.incv ) THEN
458*
459* Transpose and broadcast row vector V
460*
461 ipw = mp+1
462 CALL pbdtrnv( ictxt, 'Rowwise', 'Transpose', m,
463 $ descv( nb_ ), iroff, v( ioffv ), ldv, zero,
464 $ work, 1, ivrow, ivcol, icrow, -1,
465 $ work( ipw ) )
466*
467* Perform the local computation within a process column
468*
469 IF( myrow.EQ.ivrow ) THEN
470*
471 CALL dgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
472 $ tau( iiv ), 1 )
473 tauloc( 1 ) = tau( iiv )
474*
475 ELSE
476*
477 CALL dgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,
478 $ 1, ivrow, mycol )
479*
480 END IF
481*
482 IF( tauloc( 1 ).NE.zero ) THEN
483*
484* w := sub( C )' * v
485*
486 IF( mp.GT.0 ) THEN
487 IF( ioffc.GT.0 )
488 $ CALL dgemv( 'Transpose', mp, nq, one,
489 $ c( ioffc ), ldc, work, 1, zero,
490 $ work( ipw ), 1 )
491 ELSE
492 CALL dlaset( 'All', nq, 1, zero, zero,
493 $ work( ipw ), max( 1, nq ) )
494 END IF
495 CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
496 $ work( ipw ), max( 1, nq ), rdest,
497 $ mycol )
498*
499* sub( C ) := sub( C ) - v * w'
500*
501 IF( ioffc.GT.0 )
502 $ CALL dger( mp, nq, -tauloc( 1 ), work, 1,
503 $ work( ipw ), 1, c( ioffc ), ldc )
504 END IF
505*
506 ELSE
507*
508* Broadcast column vector V
509*
510 CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
511 IF( mycol.EQ.ivcol ) THEN
512*
513 ipw = mp+1
514 CALL dcopy( mp, v( ioffv ), 1, work, 1 )
515 work(ipw) = tau( jjv )
516 CALL dgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,
517 $ work, ipw )
518 tauloc( 1 ) = tau( jjv )
519*
520 ELSE
521*
522 ipw = mp+1
523 CALL dgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,
524 $ ipw, myrow, ivcol )
525 tauloc( 1 ) = work( ipw )
526*
527 END IF
528*
529 IF( tauloc( 1 ).NE.zero ) THEN
530*
531* w := sub( C )' * v
532*
533 IF( mp.GT.0 ) THEN
534 IF( ioffc.GT.0 )
535 $ CALL dgemv( 'Transpose', mp, nq, one,
536 $ c( ioffc ), ldc, work, 1, zero,
537 $ work( ipw ), 1 )
538 ELSE
539 CALL dlaset( 'All', nq, 1, zero, zero,
540 $ work( ipw ), max( 1, nq ) )
541 END IF
542 CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
543 $ work( ipw ), max( 1, nq ), rdest,
544 $ mycol )
545*
546* sub( C ) := sub( C ) - v * w'
547*
548 IF( ioffc.GT.0 )
549 $ CALL dger( mp, nq, -tauloc( 1 ), work, 1,
550 $ work( ipw ), 1, c( ioffc ), ldc )
551 END IF
552*
553 END IF
554*
555 END IF
556*
557 ELSE
558*
559 IF( ccblck ) THEN
560 rdest = myrow
561 ELSE
562 rdest = -1
563 END IF
564*
565 IF( crblck ) THEN
566*
567* sub( C ) is distributed over a process row
568*
569 IF( descv( m_ ).EQ.incv ) THEN
570*
571* V is a row vector
572*
573 IF( ivrow.EQ.icrow ) THEN
574*
575* Perform the local computation within a process row
576*
577 IF( myrow.EQ.icrow ) THEN
578*
579 tauloc( 1 ) = tau( iiv )
580*
581 IF( tauloc( 1 ).NE.zero ) THEN
582*
583* w := sub( C ) * v
584*
585 IF( nq.GT.0 ) THEN
586 CALL dgemv( 'No transpose', mp, nq, one,
587 $ c( ioffc ), ldc, v( ioffv ), ldv,
588 $ zero, work, 1 )
589 ELSE
590 CALL dlaset( 'All', mp, 1, zero, zero,
591 $ work, max( 1, mp ) )
592 END IF
593 CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
594 $ work, max( 1, mp ), rdest, iccol )
595*
596* sub( C ) := sub( C ) - w * v'
597*
598 IF( ioffv.GT.0 .AND. ioffc.GT.0 )
599 $ CALL dger( mp, nq, -tauloc( 1 ), work, 1,
600 $ v( ioffv ), ldv, c( ioffc ), ldc )
601 END IF
602*
603 END IF
604*
605 ELSE
606*
607* Send V and TAU to the process row ICROW
608*
609 IF( myrow.EQ.ivrow ) THEN
610*
611 ipw = nq+1
612 CALL dcopy( nq, v( ioffv ), ldv, work, 1 )
613 work(ipw) = tau( iiv )
614 CALL dgesd2d( ictxt, ipw, 1, work, ipw, icrow,
615 $ mycol )
616*
617 ELSE IF( myrow.EQ.icrow ) THEN
618*
619 ipw = nq+1
620 CALL dgerv2d( ictxt, ipw, 1, work, ipw, ivrow,
621 $ mycol )
622 tauloc( 1 ) = work( ipw )
623*
624 IF( tauloc( 1 ).NE.zero ) THEN
625*
626* w := sub( C ) * v
627*
628 IF( nq.GT.0 ) THEN
629 CALL dgemv( 'No transpose', mp, nq, one,
630 $ c( ioffc ), ldc, work, 1, zero,
631 $ work( ipw ), 1 )
632 ELSE
633 CALL dlaset( 'All', mp, 1, zero, zero,
634 $ work( ipw ), max( 1, mp ) )
635 END IF
636 CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
637 $ work( ipw ), max( 1, mp ), rdest,
638 $ iccol )
639*
640* sub( C ) := sub( C ) - w * v'
641*
642 CALL dger( mp, nq, -tauloc( 1 ), work( ipw ), 1,
643 $ work, 1, c( ioffc ), ldc )
644 END IF
645*
646 END IF
647*
648 END IF
649*
650 ELSE
651*
652* Transpose column vector V
653*
654 ipw = nq+1
655 CALL pbdtrnv( ictxt, 'Columnwise', 'Transpose', n,
656 $ descv( mb_ ), icoff, v( ioffv ), 1, zero,
657 $ work, 1, ivrow, ivcol, icrow, iccol,
658 $ work( ipw ) )
659*
660* Perform the local computation within a process column
661*
662 IF( myrow.EQ.icrow ) THEN
663*
664 IF( mycol.EQ.ivcol ) THEN
665*
666 CALL dgebs2d( ictxt, 'Rowwise', ' ', 1, 1,
667 $ tau( jjv ), 1 )
668 tauloc( 1 ) = tau( jjv )
669*
670 ELSE
671*
672 CALL dgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,
673 $ 1, myrow, ivcol )
674*
675 END IF
676*
677 IF( tauloc( 1 ).NE.zero ) THEN
678*
679* w := sub( C ) * v
680*
681 IF( nq.GT.0 ) THEN
682 CALL dgemv( 'No transpose', mp, nq, one,
683 $ c( ioffc ), ldc, work, 1, zero,
684 $ work( ipw ), 1 )
685 ELSE
686 CALL dlaset( 'All', mp, 1, zero, zero,
687 $ work( ipw ), max( 1, mp ) )
688 END IF
689 CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
690 $ work( ipw ), max( 1, mp ), rdest,
691 $ iccol )
692*
693* sub( C ) := sub( C ) - w * v'
694*
695 CALL dger( mp, nq, -tauloc( 1 ), work( ipw ), 1,
696 $ work, 1, c( ioffc ), ldc )
697 END IF
698*
699 END IF
700*
701 END IF
702*
703 ELSE
704*
705* sub( C ) is a proper distributed matrix
706*
707 IF( descv( m_ ).EQ.incv ) THEN
708*
709* Broadcast row vector V
710*
711 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',
712 $ colbtop )
713 IF( myrow.EQ.ivrow ) THEN
714*
715 ipw = nq+1
716 IF( ioffv.GT.0 )
717 $ CALL dcopy( nq, v( ioffv ), ldv, work, 1 )
718 work(ipw) = tau( iiv )
719 CALL dgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,
720 $ work, ipw )
721 tauloc( 1 ) = tau( iiv )
722*
723 ELSE
724*
725 ipw = nq+1
726 CALL dgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,
727 $ work, ipw, ivrow, mycol )
728 tauloc( 1 ) = work( ipw )
729*
730 END IF
731*
732 IF( tauloc( 1 ).NE.zero ) THEN
733*
734* w := sub( C ) * v
735*
736 IF( nq.GT.0 ) THEN
737 CALL dgemv( 'No Transpose', mp, nq, one,
738 $ c( ioffc ), ldc, work, 1, zero,
739 $ work( ipw ), 1 )
740 ELSE
741 CALL dlaset( 'All', mp, 1, zero, zero,
742 $ work( ipw ), max( 1, mp ) )
743 END IF
744 CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
745 $ work( ipw ), max( 1, mp ), rdest,
746 $ iccol )
747*
748* sub( C ) := sub( C ) - w * v'
749*
750 IF( ioffc.GT.0 )
751 $ CALL dger( mp, nq, -tauloc( 1 ), work( ipw ), 1,
752 $ work, 1, c( ioffc ), ldc )
753 END IF
754*
755 ELSE
756*
757* Transpose and broadcast column vector V
758*
759 ipw = nq+1
760 CALL pbdtrnv( ictxt, 'Columnwise', 'Transpose', n,
761 $ descv( mb_ ), icoff, v( ioffv ), 1, zero,
762 $ work, 1, ivrow, ivcol, -1, iccol,
763 $ work( ipw ) )
764*
765* Perform the local computation within a process column
766*
767 IF( mycol.EQ.ivcol ) THEN
768*
769 CALL dgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),
770 $ 1 )
771 tauloc( 1 ) = tau( jjv )
772*
773 ELSE
774*
775 CALL dgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,
776 $ myrow, ivcol )
777*
778 END IF
779*
780 IF( tauloc( 1 ).NE.zero ) THEN
781*
782* w := sub( C ) * v
783*
784 IF( nq.GT.0 ) THEN
785 CALL dgemv( 'No transpose', mp, nq, one,
786 $ c( ioffc ), ldc, work, 1, zero,
787 $ work( ipw ), 1 )
788 ELSE
789 CALL dlaset( 'All', mp, 1, zero, zero, work( ipw ),
790 $ max( 1, mp ) )
791 END IF
792 CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
793 $ work( ipw ), max( 1, mp ), rdest,
794 $ iccol )
795*
796* sub( C ) := sub( C ) - w * v'
797*
798 CALL dger( mp, nq, -tauloc( 1 ), work( ipw ), 1, work,
799 $ 1, c( ioffc ), ldc )
800 END IF
801*
802 END IF
803*
804 END IF
805*
806 END IF
807*
808 RETURN
809*
810* End of PDLARF
811*
812 END
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pbdtrnv(icontxt, xdist, trans, n, nb, nz, x, incx, beta, y, incy, ixrow, ixcol, iyrow, iycol, work)
Definition pbdtrnv.f:4
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdlarf(side, m, n, v, iv, jv, descv, incv, tau, c, ic, jc, descc, work)
Definition pdlarf.f:3