SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdlarz.f
Go to the documentation of this file.
1 SUBROUTINE pdlarz( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C,
2 $ IC, JC, DESCC, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 25, 2001
8*
9* .. Scalar Arguments ..
10 CHARACTER SIDE
11 INTEGER IC, INCV, IV, JC, JV, L, M, N
12* ..
13* .. Array Arguments ..
14 INTEGER DESCC( * ), DESCV( * )
15 DOUBLE PRECISION C( * ), TAU( * ), V( * ), WORK( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PDLARZ applies a real elementary reflector Q (or Q**T) to a real
22* M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from
23* either the left or the right. Q is represented in the form
24*
25* Q = I - tau * v * v'
26*
27* where tau is a real scalar and v is a real vector.
28*
29* If tau = 0, then Q is taken to be the unit matrix.
30*
31* Q is a product of k elementary reflectors as returned by PDTZRZF.
32*
33* Notes
34* =====
35*
36* Each global data object is described by an associated description
37* vector. This vector stores the information required to establish
38* the mapping between an object element and its corresponding process
39* and memory location.
40*
41* Let A be a generic term for any 2D block cyclicly distributed array.
42* Such a global array has an associated description vector DESCA.
43* In the following comments, the character _ should be read as
44* "of the global array".
45*
46* NOTATION STORED IN EXPLANATION
47* --------------- -------------- --------------------------------------
48* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
49* DTYPE_A = 1.
50* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51* the BLACS process grid A is distribu-
52* ted over. The context itself is glo-
53* bal, but the handle (the integer
54* value) may vary.
55* M_A (global) DESCA( M_ ) The number of rows in the global
56* array A.
57* N_A (global) DESCA( N_ ) The number of columns in the global
58* array A.
59* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
60* the rows of the array.
61* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
62* the columns of the array.
63* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64* row of the array A is distributed.
65* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66* first column of the array A is
67* distributed.
68* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
69* array. LLD_A >= MAX(1,LOCr(M_A)).
70*
71* Let K be the number of rows or columns of a distributed matrix,
72* and assume that its process grid has dimension p x q.
73* LOCr( K ) denotes the number of elements of K that a process
74* would receive if K were distributed over the p processes of its
75* process column.
76* Similarly, LOCc( K ) denotes the number of elements of K that a
77* process would receive if K were distributed over the q processes of
78* its process row.
79* The values of LOCr() and LOCc() may be determined via a call to the
80* ScaLAPACK tool function, NUMROC:
81* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83* An upper bound for these quantities may be computed by:
84* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86*
87* Because vectors may be viewed as a subclass of matrices, a
88* distributed vector is considered to be a distributed matrix.
89*
90* Restrictions
91* ============
92*
93* If SIDE = 'Left' and INCV = 1, then the row process having the first
94* entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover,
95* MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only
96* the last equality must be satisfied.
97*
98* If SIDE = 'Right' and INCV = M_V then the column process having the
99* first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and
100* MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only
101* the last equality must be satisfied.
102*
103* Arguments
104* =========
105*
106* SIDE (global input) CHARACTER
107* = 'L': form Q * sub( C ),
108* = 'R': form sub( C ) * Q, Q = Q**T.
109*
110* M (global input) INTEGER
111* The number of rows to be operated on i.e the number of rows
112* of the distributed submatrix sub( C ). M >= 0.
113*
114* N (global input) INTEGER
115* The number of columns to be operated on i.e the number of
116* columns of the distributed submatrix sub( C ). N >= 0.
117*
118* L (global input) INTEGER
119* The columns of the distributed submatrix sub( A ) containing
120* the meaningful part of the Householder reflectors.
121* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
122*
123* V (local input) DOUBLE PRECISION pointer into the local memory
124* to an array of dimension (LLD_V,*) containing the local
125* pieces of the distributed vectors V representing the
126* Householder transformation Q,
127* V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1,
128* V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V,
129* V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1,
130* V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V,
131*
132* The vector v in the representation of Q. V is not used if
133* TAU = 0.
134*
135* IV (global input) INTEGER
136* The row index in the global array V indicating the first
137* row of sub( V ).
138*
139* JV (global input) INTEGER
140* The column index in the global array V indicating the
141* first column of sub( V ).
142*
143* DESCV (global and local input) INTEGER array of dimension DLEN_.
144* The array descriptor for the distributed matrix V.
145*
146* INCV (global input) INTEGER
147* The global increment for the elements of V. Only two values
148* of INCV are supported in this version, namely 1 and M_V.
149* INCV must not be zero.
150*
151* TAU (local input) DOUBLE PRECISION array, dimension LOCc(JV) if
152* INCV = 1, and LOCr(IV) otherwise. This array contains the
153* Householder scalars related to the Householder vectors.
154* TAU is tied to the distributed matrix V.
155*
156* C (local input/local output) DOUBLE PRECISION pointer into the
157* local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
158* containing the local pieces of sub( C ). On exit, sub( C )
159* is overwritten by the Q * sub( C ) if SIDE = 'L', or
160* sub( C ) * Q if SIDE = 'R'.
161*
162* IC (global input) INTEGER
163* The row index in the global array C indicating the first
164* row of sub( C ).
165*
166* JC (global input) INTEGER
167* The column index in the global array C indicating the
168* first column of sub( C ).
169*
170* DESCC (global and local input) INTEGER array of dimension DLEN_.
171* The array descriptor for the distributed matrix C.
172*
173* WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
174* If INCV = 1,
175* if SIDE = 'L',
176* if IVCOL = ICCOL,
177* LWORK >= NqC0
178* else
179* LWORK >= MpC0 + MAX( 1, NqC0 )
180* end if
181* else if SIDE = 'R',
182* LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
183* N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
184* end if
185* else if INCV = M_V,
186* if SIDE = 'L',
187* LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
188* M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
189* else if SIDE = 'R',
190* if IVROW = ICROW,
191* LWORK >= MpC0
192* else
193* LWORK >= NqC0 + MAX( 1, MpC0 )
194* end if
195* end if
196* end if
197*
198* where LCM is the least common multiple of NPROW and NPCOL and
199* LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
200* LCMQ = LCM / NPCOL,
201*
202* IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
203* ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
204* ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
205* MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
206* NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
207*
208* ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
209* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
210* the subroutine BLACS_GRIDINFO.
211*
212* Alignment requirements
213* ======================
214*
215* The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
216* must verify some alignment properties, namely the following
217* expressions should be true:
218*
219* MB_V = NB_V,
220*
221* If INCV = 1,
222* If SIDE = 'Left',
223* ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
224* If SIDE = 'Right',
225* ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
226* else if INCV = M_V,
227* If SIDE = 'Left',
228* ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
229* If SIDE = 'Right',
230* ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
231* end if
232*
233* =====================================================================
234*
235* .. Parameters ..
236 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
237 $ lld_, mb_, m_, nb_, n_, rsrc_
238 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
239 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
240 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
241 DOUBLE PRECISION ONE, ZERO
242 parameter( one = 1.0d+0, zero = 0.0d+0 )
243* ..
244* .. Local Scalars ..
245 LOGICAL CCBLCK, CRBLCK, LEFT
246 CHARACTER COLBTOP, ROWBTOP
247 INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
248 $ icrow1, icrow2, ictxt, iic1, iic2, iiv, ioffc1,
249 $ ioffc2, ioffv, ipw, iroffc1, iroffc2, iroffv,
250 $ ivcol, ivrow, jjc1, jjc2, jjv, ldc, ldv, mpc2,
251 $ mpv, mycol, myrow, ncc, ncv, npcol, nprow,
252 $ nqc2, nqv, rdest
253 DOUBLE PRECISION TAULOC( 1 )
254* ..
255* .. External Subroutines ..
256 EXTERNAL blacs_gridinfo, daxpy, dcopy, dgebr2d,
257 $ dgebs2d, dgemv, dger, dgerv2d,
258 $ dgesd2d, dgsum2d, dlaset, infog2l,
259 $ pb_topget, pbdtrnv
260* ..
261* .. External Functions ..
262 LOGICAL LSAME
263 INTEGER NUMROC
264 EXTERNAL lsame, numroc
265* ..
266* .. Intrinsic Functions ..
267 INTRINSIC min, mod
268* ..
269* .. Executable Statements ..
270*
271* Quick return if possible
272*
273 IF( m.LE.0 .OR. n.LE.0 )
274 $ RETURN
275*
276* Get grid parameters.
277*
278 ictxt = descc( ctxt_ )
279 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
280*
281* Figure local indexes
282*
283 left = lsame( side, 'L' )
284 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
285 $ ivrow, ivcol )
286 iroffv = mod( iv-1, descv( nb_ ) )
287 mpv = numroc( l+iroffv, descv( mb_ ), myrow, ivrow, nprow )
288 IF( myrow.EQ.ivrow )
289 $ mpv = mpv - iroffv
290 icoffv = mod( jv-1, descv( nb_ ) )
291 nqv = numroc( l+icoffv, descv( nb_ ), mycol, ivcol, npcol )
292 IF( mycol.EQ.ivcol )
293 $ nqv = nqv - icoffv
294 ldv = descv( lld_ )
295 ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),
296 $ npcol )
297 ldv = descv( lld_ )
298 iiv = min( iiv, ldv )
299 jjv = min( jjv, ncv )
300 ioffv = iiv+(jjv-1)*ldv
301 ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),
302 $ npcol )
303 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol,
304 $ iic1, jjc1, icrow1, iccol1 )
305 iroffc1 = mod( ic-1, descc( mb_ ) )
306 icoffc1 = mod( jc-1, descc( nb_ ) )
307 ldc = descc( lld_ )
308 iic1 = min( iic1, ldc )
309 jjc1 = min( jjc1, max( 1, ncc ) )
310 ioffc1 = iic1 + ( jjc1-1 ) * ldc
311*
312 IF( left ) THEN
313 CALL infog2l( ic+m-l, jc, descc, nprow, npcol, myrow, mycol,
314 $ iic2, jjc2, icrow2, iccol2 )
315 iroffc2 = mod( ic+m-l-1, descc( mb_ ) )
316 icoffc2 = mod( jc-1, descc( nb_ ) )
317 nqc2 = numroc( n+icoffc2, descc( nb_ ), mycol, iccol2, npcol )
318 IF( mycol.EQ.iccol2 )
319 $ nqc2 = nqc2 - icoffc2
320 ELSE
321 CALL infog2l( ic, jc+n-l, descc, nprow, npcol, myrow, mycol,
322 $ iic2, jjc2, icrow2, iccol2 )
323 iroffc2 = mod( ic-1, descc( mb_ ) )
324 mpc2 = numroc( m+iroffc2, descc( mb_ ), myrow, icrow2, nprow )
325 IF( myrow.EQ.icrow2 )
326 $ mpc2 = mpc2 - iroffc2
327 icoffc2 = mod( jc+n-l-1, descc( nb_ ) )
328 END IF
329 iic2 = min( iic2, ldc )
330 jjc2 = min( jjc2, ncc )
331 ioffc2 = iic2 + ( jjc2-1 ) * ldc
332*
333* Is sub( C ) only distributed over a process row ?
334*
335 crblck = ( m.LE.(descc( mb_ )-iroffc1) )
336*
337* Is sub( C ) only distributed over a process column ?
338*
339 ccblck = ( n.LE.(descc( nb_ )-icoffc1) )
340*
341 IF( left ) THEN
342*
343 IF( crblck ) THEN
344 rdest = icrow2
345 ELSE
346 rdest = -1
347 END IF
348*
349 IF( ccblck ) THEN
350*
351* sub( C ) is distributed over a process column
352*
353 IF( descv( m_ ).EQ.incv ) THEN
354*
355* Transpose row vector V (ICOFFV = IROFFC2)
356*
357 ipw = mpv+1
358 CALL pbdtrnv( ictxt, 'Rowwise', 'Transpose', m,
359 $ descv( nb_ ), iroffc2, v( ioffv ), ldv,
360 $ zero,
361 $ work, 1, ivrow, ivcol, icrow2, iccol2,
362 $ work( ipw ) )
363*
364* Perform the local computation within a process column
365*
366 IF( mycol.EQ.iccol2 ) THEN
367*
368 IF( myrow.EQ.ivrow ) THEN
369*
370 CALL dgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
371 $ tau( iiv ), 1 )
372 tauloc( 1 ) = tau( iiv )
373*
374 ELSE
375*
376 CALL dgebr2d( ictxt, 'Columnwise', ' ', 1, 1,
377 $ tauloc, 1, ivrow, mycol )
378*
379 END IF
380*
381 IF( tauloc( 1 ).NE.zero ) THEN
382*
383* w := sub( C )' * v
384*
385 IF( mpv.GT.0 ) THEN
386 CALL dgemv( 'Transpose', mpv, nqc2, one,
387 $ c( ioffc2 ), ldc, work, 1, zero,
388 $ work( ipw ), 1 )
389 ELSE
390 CALL dlaset( 'All', nqc2, 1, zero, zero,
391 $ work( ipw ), max( 1, nqc2 ) )
392 END IF
393 IF( myrow.EQ.icrow1 )
394 $ CALL daxpy( nqc2, one, c( ioffc1 ), ldc,
395 $ work( ipw ), max( 1, nqc2 ) )
396*
397 CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
398 $ work( ipw ), max( 1, nqc2 ), rdest,
399 $ mycol )
400*
401* sub( C ) := sub( C ) - v * w'
402*
403 IF( myrow.EQ.icrow1 )
404 $ CALL daxpy( nqc2, -tauloc( 1 ), work( ipw ),
405 $ max( 1, nqc2 ), c( ioffc1 ), ldc )
406 CALL dger( mpv, nqc2, -tauloc( 1 ), work, 1,
407 $ work( ipw ), 1, c( ioffc2 ), ldc )
408 END IF
409*
410 END IF
411*
412 ELSE
413*
414* V is a column vector
415*
416 IF( ivcol.EQ.iccol2 ) THEN
417*
418* Perform the local computation within a process column
419*
420 IF( mycol.EQ.iccol2 ) THEN
421*
422 tauloc( 1 ) = tau( jjv )
423*
424 IF( tauloc( 1 ).NE.zero ) THEN
425*
426* w := sub( C )' * v
427*
428 IF( mpv.GT.0 ) THEN
429 CALL dgemv( 'Transpose', mpv, nqc2, one,
430 $ c( ioffc2 ), ldc, v( ioffv ), 1,
431 $ zero, work, 1 )
432 ELSE
433 CALL dlaset( 'All', nqc2, 1, zero, zero,
434 $ work, max( 1, nqc2 ) )
435 END IF
436 IF( myrow.EQ.icrow1 )
437 $ CALL daxpy( nqc2, one, c( ioffc1 ), ldc,
438 $ work, max( 1, nqc2 ) )
439*
440 CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
441 $ work, max( 1, nqc2 ), rdest,
442 $ mycol )
443*
444* sub( C ) := sub( C ) - v * w'
445*
446 IF( myrow.EQ.icrow1 )
447 $ CALL daxpy( nqc2, -tauloc( 1 ), work,
448 $ max( 1, nqc2 ), c( ioffc1 ),
449 $ ldc )
450 CALL dger( mpv, nqc2, -tauloc( 1 ), v( ioffv ),
451 $ 1, work, 1, c( ioffc2 ), ldc )
452 END IF
453*
454 END IF
455*
456 ELSE
457*
458* Send V and TAU to the process column ICCOL2
459*
460 IF( mycol.EQ.ivcol ) THEN
461*
462 ipw = mpv+1
463 CALL dcopy( mpv, v( ioffv ), 1, work, 1 )
464 work( ipw ) = tau( jjv )
465 CALL dgesd2d( ictxt, ipw, 1, work, ipw, myrow,
466 $ iccol2 )
467*
468 ELSE IF( mycol.EQ.iccol2 ) THEN
469*
470 ipw = mpv+1
471 CALL dgerv2d( ictxt, ipw, 1, work, ipw, myrow,
472 $ ivcol )
473 tauloc( 1 ) = work( ipw )
474*
475 IF( tauloc( 1 ).NE.zero ) THEN
476*
477* w := sub( C )' * v
478*
479 IF( mpv.GT.0 ) THEN
480 CALL dgemv( 'Transpose', mpv, nqc2, one,
481 $ c( ioffc2 ), ldc, work, 1, zero,
482 $ work( ipw ), 1 )
483 ELSE
484 CALL dlaset( 'All', nqc2, 1, zero, zero,
485 $ work( ipw ), max( 1, nqc2 ) )
486 END IF
487 IF( myrow.EQ.icrow1 )
488 $ CALL daxpy( nqc2, one, c( ioffc1 ), ldc,
489 $ work( ipw ), max( 1, nqc2 ) )
490*
491 CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
492 $ work( ipw ), max( 1, nqc2 ),
493 $ rdest, mycol )
494*
495* sub( C ) := sub( C ) - v * w'
496*
497 IF( myrow.EQ.icrow1 )
498 $ CALL daxpy( nqc2, -tauloc( 1 ), work( ipw ),
499 $ max( 1, nqc2 ), c( ioffc1 ),
500 $ ldc )
501 CALL dger( mpv, nqc2, -tauloc( 1 ), work, 1,
502 $ work( ipw ), 1, c( ioffc2 ), ldc )
503 END IF
504*
505 END IF
506*
507 END IF
508*
509 END IF
510*
511 ELSE
512*
513* sub( C ) is a proper distributed matrix
514*
515 IF( descv( m_ ).EQ.incv ) THEN
516*
517* Transpose and broadcast row vector V (ICOFFV=IROFFC2)
518*
519 ipw = mpv+1
520 CALL pbdtrnv( ictxt, 'Rowwise', 'Transpose', m,
521 $ descv( nb_ ), iroffc2, v( ioffv ), ldv,
522 $ zero,
523 $ work, 1, ivrow, ivcol, icrow2, -1,
524 $ work( ipw ) )
525*
526* Perform the local computation within a process column
527*
528 IF( myrow.EQ.ivrow ) THEN
529*
530 CALL dgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
531 $ tau( iiv ), 1 )
532 tauloc( 1 ) = tau( iiv )
533*
534 ELSE
535*
536 CALL dgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,
537 $ 1, ivrow, mycol )
538*
539 END IF
540*
541 IF( tauloc( 1 ).NE.zero ) THEN
542*
543* w := sub( C )' * v
544*
545 IF( mpv.GT.0 ) THEN
546 CALL dgemv( 'Transpose', mpv, nqc2, one,
547 $ c( ioffc2 ), ldc, work, 1, zero,
548 $ work( ipw ), 1 )
549 ELSE
550 CALL dlaset( 'All', nqc2, 1, zero, zero,
551 $ work( ipw ), max( 1, nqc2 ) )
552 END IF
553 IF( myrow.EQ.icrow1 )
554 $ CALL daxpy( nqc2, one, c( ioffc1 ), ldc,
555 $ work( ipw ), max( 1, nqc2 ) )
556*
557 CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
558 $ work( ipw ), max( 1, nqc2 ), rdest,
559 $ mycol )
560*
561* sub( C ) := sub( C ) - v * w'
562*
563 IF( myrow.EQ.icrow1 )
564 $ CALL daxpy( nqc2, -tauloc( 1 ), work( ipw ),
565 $ max( 1, nqc2 ), c( ioffc1 ), ldc )
566 CALL dger( mpv, nqc2, -tauloc( 1 ), work, 1,
567 $ work( ipw ), 1, c( ioffc2 ), ldc )
568 END IF
569*
570 ELSE
571*
572* Broadcast column vector V
573*
574 CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
575 IF( mycol.EQ.ivcol ) THEN
576*
577 ipw = mpv+1
578 CALL dcopy( mpv, v( ioffv ), 1, work, 1 )
579 work( ipw ) = tau( jjv )
580 CALL dgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,
581 $ work, ipw )
582 tauloc( 1 ) = tau( jjv )
583*
584 ELSE
585*
586 ipw = mpv+1
587 CALL dgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,
588 $ ipw, myrow, ivcol )
589 tauloc( 1 ) = work( ipw )
590*
591 END IF
592*
593 IF( tauloc( 1 ).NE.zero ) THEN
594*
595* w := sub( C )' * v
596*
597 IF( mpv.GT.0 ) THEN
598 CALL dgemv( 'Transpose', mpv, nqc2, one,
599 $ c( ioffc2 ), ldc, work, 1, zero,
600 $ work( ipw ), 1 )
601 ELSE
602 CALL dlaset( 'All', nqc2, 1, zero, zero,
603 $ work( ipw ), max( 1, nqc2 ) )
604 END IF
605 IF( myrow.EQ.icrow1 )
606 $ CALL daxpy( nqc2, one, c( ioffc1 ), ldc,
607 $ work( ipw ), max( 1, nqc2 ) )
608*
609 CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
610 $ work( ipw ), max( 1, nqc2 ), rdest,
611 $ mycol )
612*
613* sub( C ) := sub( C ) - v * w'
614*
615 IF( myrow.EQ.icrow1 )
616 $ CALL daxpy( nqc2, -tauloc( 1 ), work( ipw ),
617 $ max( 1, nqc2 ), c( ioffc1 ), ldc )
618 CALL dger( mpv, nqc2, -tauloc( 1 ), work, 1,
619 $ work( ipw ), 1, c( ioffc2 ), ldc )
620 END IF
621*
622 END IF
623*
624 END IF
625*
626 ELSE
627*
628 IF( ccblck ) THEN
629 rdest = myrow
630 ELSE
631 rdest = -1
632 END IF
633*
634 IF( crblck ) THEN
635*
636* sub( C ) is distributed over a process row
637*
638 IF( descv( m_ ).EQ.incv ) THEN
639*
640* V is a row vector
641*
642 IF( ivrow.EQ.icrow2 ) THEN
643*
644* Perform the local computation within a process row
645*
646 IF( myrow.EQ.icrow2 ) THEN
647*
648 tauloc( 1 ) = tau( iiv )
649*
650 IF( tauloc( 1 ).NE.zero ) THEN
651*
652* w := sub( C ) * v
653*
654 IF( nqv.GT.0 ) THEN
655 CALL dgemv( 'No transpose', mpc2, nqv, one,
656 $ c( ioffc2 ), ldc, v( ioffv ),
657 $ ldv, zero, work, 1 )
658 ELSE
659 CALL dlaset( 'All', mpc2, 1, zero, zero,
660 $ work, max( 1, mpc2 ) )
661 END IF
662 IF( mycol.EQ.iccol1 )
663 $ CALL daxpy( mpc2, one, c( ioffc1 ), 1,
664 $ work, 1 )
665*
666 CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
667 $ work, max( 1, mpc2 ), rdest,
668 $ iccol2 )
669*
670 IF( mycol.EQ.iccol1 )
671 $ CALL daxpy( mpc2, -tauloc( 1 ), work, 1,
672 $ c( ioffc1 ), 1 )
673*
674* sub( C ) := sub( C ) - w * v'
675*
676 IF( mpc2.GT.0 .AND. nqv.GT.0 )
677 $ CALL dger( mpc2, nqv, -tauloc( 1 ), work, 1,
678 $ v( ioffv ), ldv, c( ioffc2 ),
679 $ ldc )
680 END IF
681*
682 END IF
683*
684 ELSE
685*
686* Send V and TAU to the process row ICROW2
687*
688 IF( myrow.EQ.ivrow ) THEN
689*
690 ipw = nqv+1
691 CALL dcopy( nqv, v( ioffv ), ldv, work, 1 )
692 work( ipw ) = tau( iiv )
693 CALL dgesd2d( ictxt, ipw, 1, work, ipw, icrow2,
694 $ mycol )
695*
696 ELSE IF( myrow.EQ.icrow2 ) THEN
697*
698 ipw = nqv+1
699 CALL dgerv2d( ictxt, ipw, 1, work, ipw, ivrow,
700 $ mycol )
701 tauloc( 1 ) = work( ipw )
702*
703 IF( tauloc( 1 ).NE.zero ) THEN
704*
705* w := sub( C ) * v
706*
707 IF( nqv.GT.0 ) THEN
708 CALL dgemv( 'No transpose', mpc2, nqv, one,
709 $ c( ioffc2 ), ldc, work, 1, zero,
710 $ work( ipw ), 1 )
711 ELSE
712 CALL dlaset( 'All', mpc2, 1, zero, zero,
713 $ work( ipw ), max( 1, mpc2 ) )
714 END IF
715 IF( mycol.EQ.iccol1 )
716 $ CALL daxpy( mpc2, one, c( ioffc1 ), 1,
717 $ work( ipw ), 1 )
718 CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
719 $ work( ipw ), max( 1, mpc2 ),
720 $ rdest, iccol2 )
721 IF( mycol.EQ.iccol1 )
722 $ CALL daxpy( mpc2, -tauloc( 1 ), work( ipw ),
723 $ 1, c( ioffc1 ), 1 )
724*
725* sub( C ) := sub( C ) - w * v'
726*
727 CALL dger( mpc2, nqv, -tauloc( 1 ), work( ipw ),
728 $ 1, work, 1, c( ioffc2 ), ldc )
729 END IF
730*
731 END IF
732*
733 END IF
734*
735 ELSE
736*
737* Transpose column vector V (IROFFV = ICOFFC2)
738*
739 ipw = nqv+1
740 CALL pbdtrnv( ictxt, 'Columnwise', 'Transpose', n,
741 $ descv( mb_ ), icoffc2, v( ioffv ), 1, zero,
742 $ work, 1, ivrow, ivcol, icrow2, iccol2,
743 $ work( ipw ) )
744*
745* Perform the local computation within a process column
746*
747 IF( myrow.EQ.icrow2 ) THEN
748*
749 IF( mycol.EQ.ivcol ) THEN
750*
751 CALL dgebs2d( ictxt, 'Rowwise', ' ', 1, 1,
752 $ tau( jjv ), 1 )
753 tauloc( 1 ) = tau( jjv )
754*
755 ELSE
756*
757 CALL dgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,
758 $ 1, myrow, ivcol )
759*
760 END IF
761*
762 IF( tauloc( 1 ).NE.zero ) THEN
763*
764* w := sub( C ) * v
765*
766 IF( nqv.GT.0 ) THEN
767 CALL dgemv( 'No transpose', mpc2, nqv, one,
768 $ c( ioffc2 ), ldc, work, 1, zero,
769 $ work( ipw ), 1 )
770 ELSE
771 CALL dlaset( 'All', mpc2, 1, zero, zero,
772 $ work( ipw ), max( 1, mpc2 ) )
773 END IF
774 IF( mycol.EQ.iccol1 )
775 $ CALL daxpy( mpc2, one, c( ioffc1 ), 1,
776 $ work( ipw ), 1 )
777 CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
778 $ work( ipw ), max( 1, mpc2 ), rdest,
779 $ iccol2 )
780 IF( mycol.EQ.iccol1 )
781 $ CALL daxpy( mpc2, -tauloc( 1 ), work( ipw ), 1,
782 $ c( ioffc1 ), 1 )
783*
784* sub( C ) := sub( C ) - w * v'
785*
786 CALL dger( mpc2, nqv, -tauloc( 1 ), work( ipw ), 1,
787 $ work, 1, c( ioffc2 ), ldc )
788 END IF
789*
790 END IF
791*
792 END IF
793*
794 ELSE
795*
796* sub( C ) is a proper distributed matrix
797*
798 IF( descv( m_ ).EQ.incv ) THEN
799*
800* Broadcast row vector V
801*
802 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',
803 $ colbtop )
804 IF( myrow.EQ.ivrow ) THEN
805*
806 ipw = nqv+1
807 CALL dcopy( nqv, v( ioffv ), ldv, work, 1 )
808 work( ipw ) = tau( iiv )
809 CALL dgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,
810 $ work, ipw )
811 tauloc( 1 ) = tau( iiv )
812*
813 ELSE
814*
815 ipw = nqv+1
816 CALL dgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,
817 $ work, ipw, ivrow, mycol )
818 tauloc( 1 ) = work( ipw )
819*
820 END IF
821*
822 IF( tauloc( 1 ).NE.zero ) THEN
823*
824* w := sub( C ) * v
825*
826 IF( nqv.GT.0 ) THEN
827 CALL dgemv( 'No Transpose', mpc2, nqv, one,
828 $ c( ioffc2 ), ldc, work, 1, zero,
829 $ work( ipw ), 1 )
830 ELSE
831 CALL dlaset( 'All', mpc2, 1, zero, zero,
832 $ work( ipw ), max( 1, mpc2 ) )
833 END IF
834 IF( mycol.EQ.iccol1 )
835 $ CALL daxpy( mpc2, one, c( ioffc1 ), 1,
836 $ work( ipw ), 1 )
837*
838 CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
839 $ work( ipw ), max( 1, mpc2 ), rdest,
840 $ iccol2 )
841 IF( mycol.EQ.iccol1 )
842 $ CALL daxpy( mpc2, -tauloc( 1 ), work( ipw ), 1,
843 $ c( ioffc1 ), 1 )
844*
845* sub( C ) := sub( C ) - w * v'
846*
847 CALL dger( mpc2, nqv, -tauloc( 1 ), work( ipw ), 1,
848 $ work, 1, c( ioffc2 ), ldc )
849 END IF
850*
851 ELSE
852*
853* Transpose and broadcast column vector V (ICOFFC2=IROFFV)
854*
855 ipw = nqv+1
856 CALL pbdtrnv( ictxt, 'Columnwise', 'Transpose', n,
857 $ descv( mb_ ), icoffc2, v( ioffv ), 1, zero,
858 $ work, 1, ivrow, ivcol, -1, iccol2,
859 $ work( ipw ) )
860*
861* Perform the local computation within a process column
862*
863 IF( mycol.EQ.ivcol ) THEN
864*
865 CALL dgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),
866 $ 1 )
867 tauloc( 1 ) = tau( jjv )
868*
869 ELSE
870*
871 CALL dgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,
872 $ myrow, ivcol )
873*
874 END IF
875*
876 IF( tauloc( 1 ).NE.zero ) THEN
877*
878* w := sub( C ) * v
879*
880 IF( nqv.GT.0 ) THEN
881 CALL dgemv( 'No transpose', mpc2, nqv, one,
882 $ c( ioffc2 ), ldc, work, 1, zero,
883 $ work( ipw ), 1 )
884 ELSE
885 CALL dlaset( 'All', mpc2, 1, zero, zero,
886 $ work( ipw ), max( 1, mpc2 ) )
887 END IF
888 IF( mycol.EQ.iccol1 )
889 $ CALL daxpy( mpc2, one, c( ioffc1 ), 1,
890 $ work( ipw ), 1 )
891 CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
892 $ work( ipw ), max( 1, mpc2 ), rdest,
893 $ iccol2 )
894 IF( mycol.EQ.iccol1 )
895 $ CALL daxpy( mpc2, -tauloc( 1 ), work( ipw ), 1,
896 $ c( ioffc1 ), 1 )
897*
898* sub( C ) := sub( C ) - w * v'
899*
900 CALL dger( mpc2, nqv, -tauloc( 1 ), work( ipw ), 1,
901 $ work, 1, c( ioffc2 ), ldc )
902 END IF
903*
904 END IF
905*
906 END IF
907*
908 END IF
909*
910 RETURN
911*
912* End of PDLARZ
913*
914 END
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pbdtrnv(icontxt, xdist, trans, n, nb, nz, x, incx, beta, y, incy, ixrow, ixcol, iyrow, iycol, work)
Definition pbdtrnv.f:4
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdlarz(side, m, n, l, v, iv, jv, descv, incv, tau, c, ic, jc, descc, work)
Definition pdlarz.f:3