SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzlarf.f
Go to the documentation of this file.
1 SUBROUTINE pzlarf( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU,
2 $ C, IC, JC, DESCC, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 25, 2001
8*
9* .. Scalar Arguments ..
10 CHARACTER SIDE
11 INTEGER IC, INCV, IV, JC, JV, M, N
12* ..
13* .. Array Arguments ..
14 INTEGER DESCC( * ), DESCV( * )
15 COMPLEX*16 C( * ), TAU( * ), V( * ), WORK( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PZLARF applies a complex elementary reflector Q to a complex M-by-N
22* distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from either the
23* left or the right. Q is represented in the form
24*
25* Q = I - tau * v * v'
26*
27* where tau is a complex scalar and v is a complex vector.
28*
29* If tau = 0, then Q is taken to be the unit matrix.
30*
31* Notes
32* =====
33*
34* Each global data object is described by an associated description
35* vector. This vector stores the information required to establish
36* the mapping between an object element and its corresponding process
37* and memory location.
38*
39* Let A be a generic term for any 2D block cyclicly distributed array.
40* Such a global array has an associated description vector DESCA.
41* In the following comments, the character _ should be read as
42* "of the global array".
43*
44* NOTATION STORED IN EXPLANATION
45* --------------- -------------- --------------------------------------
46* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
47* DTYPE_A = 1.
48* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49* the BLACS process grid A is distribu-
50* ted over. The context itself is glo-
51* bal, but the handle (the integer
52* value) may vary.
53* M_A (global) DESCA( M_ ) The number of rows in the global
54* array A.
55* N_A (global) DESCA( N_ ) The number of columns in the global
56* array A.
57* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
58* the rows of the array.
59* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
60* the columns of the array.
61* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62* row of the array A is distributed.
63* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64* first column of the array A is
65* distributed.
66* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
67* array. LLD_A >= MAX(1,LOCr(M_A)).
68*
69* Let K be the number of rows or columns of a distributed matrix,
70* and assume that its process grid has dimension p x q.
71* LOCr( K ) denotes the number of elements of K that a process
72* would receive if K were distributed over the p processes of its
73* process column.
74* Similarly, LOCc( K ) denotes the number of elements of K that a
75* process would receive if K were distributed over the q processes of
76* its process row.
77* The values of LOCr() and LOCc() may be determined via a call to the
78* ScaLAPACK tool function, NUMROC:
79* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81* An upper bound for these quantities may be computed by:
82* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84*
85* Because vectors may be viewed as a subclass of matrices, a
86* distributed vector is considered to be a distributed matrix.
87*
88* Restrictions
89* ============
90*
91* If SIDE = 'Left' and INCV = 1, then the row process having the first
92* entry V(IV,JV) must also have the first row of sub( C ). Moreover,
93* MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only
94* the last equality must be satisfied.
95*
96* If SIDE = 'Right' and INCV = M_V then the column process having the
97* first entry V(IV,JV) must also have the first column of sub( C ) and
98* MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the
99* last equality must be satisfied.
100*
101* Arguments
102* =========
103*
104* SIDE (global input) CHARACTER
105* = 'L': form Q * sub( C ),
106* = 'R': form sub( C ) * Q.
107*
108* M (global input) INTEGER
109* The number of rows to be operated on i.e the number of rows
110* of the distributed submatrix sub( C ). M >= 0.
111*
112* N (global input) INTEGER
113* The number of columns to be operated on i.e the number of
114* columns of the distributed submatrix sub( C ). N >= 0.
115*
116* V (local input) COMPLEX*16 pointer into the local memory
117* to an array of dimension (LLD_V,*) containing the local
118* pieces of the distributed vectors V representing the
119* Householder transformation Q,
120* V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1,
121* V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V,
122* V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1,
123* V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V,
124*
125* The vector v in the representation of Q. V is not used if
126* TAU = 0.
127*
128* IV (global input) INTEGER
129* The row index in the global array V indicating the first
130* row of sub( V ).
131*
132* JV (global input) INTEGER
133* The column index in the global array V indicating the
134* first column of sub( V ).
135*
136* DESCV (global and local input) INTEGER array of dimension DLEN_.
137* The array descriptor for the distributed matrix V.
138*
139* INCV (global input) INTEGER
140* The global increment for the elements of V. Only two values
141* of INCV are supported in this version, namely 1 and M_V.
142* INCV must not be zero.
143*
144* TAU (local input) COMPLEX*16, array, dimension LOCc(JV) if
145* INCV = 1, and LOCr(IV) otherwise. This array contains the
146* Householder scalars related to the Householder vectors.
147* TAU is tied to the distributed matrix V.
148*
149* C (local input/local output) COMPLEX*16 pointer into the
150* local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
151* containing the local pieces of sub( C ). On exit, sub( C )
152* is overwritten by the Q * sub( C ) if SIDE = 'L', or
153* sub( C ) * Q if SIDE = 'R'.
154*
155* IC (global input) INTEGER
156* The row index in the global array C indicating the first
157* row of sub( C ).
158*
159* JC (global input) INTEGER
160* The column index in the global array C indicating the
161* first column of sub( C ).
162*
163* DESCC (global and local input) INTEGER array of dimension DLEN_.
164* The array descriptor for the distributed matrix C.
165*
166* WORK (local workspace) COMPLEX*16 array, dimension (LWORK)
167* If INCV = 1,
168* if SIDE = 'L',
169* if IVCOL = ICCOL,
170* LWORK >= NqC0
171* else
172* LWORK >= MpC0 + MAX( 1, NqC0 )
173* end if
174* else if SIDE = 'R',
175* LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
176* N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
177* end if
178* else if INCV = M_V,
179* if SIDE = 'L',
180* LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
181* M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
182* else if SIDE = 'R',
183* if IVROW = ICROW,
184* LWORK >= MpC0
185* else
186* LWORK >= NqC0 + MAX( 1, MpC0 )
187* end if
188* end if
189* end if
190*
191* where LCM is the least common multiple of NPROW and NPCOL and
192* LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
193* LCMQ = LCM / NPCOL,
194*
195* IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
196* ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
197* ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
198* MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
199* NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
200*
201* ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
202* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
203* the subroutine BLACS_GRIDINFO.
204*
205* Alignment requirements
206* ======================
207*
208* The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
209* must verify some alignment properties, namely the following
210* expressions should be true:
211*
212* MB_V = NB_V,
213*
214* If INCV = 1,
215* If SIDE = 'Left',
216* ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
217* If SIDE = 'Right',
218* ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
219* else if INCV = M_V,
220* If SIDE = 'Left',
221* ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
222* If SIDE = 'Right',
223* ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
224* end if
225*
226* =====================================================================
227*
228* .. Parameters ..
229 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
230 $ lld_, mb_, m_, nb_, n_, rsrc_
231 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
232 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
233 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
234 COMPLEX*16 ONE, ZERO
235 parameter( one = ( 1.0d+0, 0.0d+0 ),
236 $ zero = ( 0.0d+0, 0.0d+0 ) )
237* ..
238* .. Local Scalars ..
239 LOGICAL CCBLCK, CRBLCK
240 CHARACTER COLBTOP, ROWBTOP
241 INTEGER ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC,
242 $ ioffv, ipw, iroff, ivcol, ivrow, jjc, jjv, ldc,
243 $ ldv, mycol, myrow, mp, ncc, ncv, npcol, nprow,
244 $ nq, rdest
245 COMPLEX*16 TAULOC( 1 )
246* ..
247* .. External Subroutines ..
248 EXTERNAL blacs_gridinfo, infog2l, pb_topget, pbztrnv,
249 $ zcopy, zgebr2d, zgebs2d, zgemv,
250 $ zgerc, zgerv2d, zgesd2d, zgsum2d,
251 $ zlaset
252* ..
253* .. External Functions ..
254 LOGICAL LSAME
255 INTEGER NUMROC
256 EXTERNAL lsame, numroc
257* ..
258* .. Intrinsic Functions ..
259 INTRINSIC min, mod
260* ..
261* .. Executable Statements ..
262*
263* Quick return if possible
264*
265 IF( m.LE.0 .OR. n.LE.0 )
266 $ RETURN
267*
268* Get grid parameters.
269*
270 ictxt = descc( ctxt_ )
271 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
272*
273* Figure local indexes
274*
275 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
276 $ icrow, iccol )
277 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
278 $ ivrow, ivcol )
279 ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),
280 $ npcol )
281 ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),
282 $ npcol )
283 ldc = descc( lld_ )
284 ldv = descv( lld_ )
285 iic = min( iic, ldc )
286 iiv = min( iiv, ldv )
287 jjc = min( jjc, ncc )
288 jjv = min( jjv, ncv )
289 ioffc = iic+(jjc-1)*ldc
290 ioffv = iiv+(jjv-1)*ldv
291*
292 iroff = mod( ic-1, descc( mb_ ) )
293 icoff = mod( jc-1, descc( nb_ ) )
294 mp = numroc( m+iroff, descc( mb_ ), myrow, icrow, nprow )
295 nq = numroc( n+icoff, descc( nb_ ), mycol, iccol, npcol )
296 IF( myrow.EQ.icrow )
297 $ mp = mp - iroff
298 IF( mycol.EQ.iccol )
299 $ nq = nq - icoff
300*
301* Is sub( C ) only distributed over a process row ?
302*
303 crblck = ( m.LE.(descc( mb_ )-iroff) )
304*
305* Is sub( C ) only distributed over a process column ?
306*
307 ccblck = ( n.LE.(descc( nb_ )-icoff) )
308*
309 IF( lsame( side, 'L' ) ) THEN
310*
311 IF( crblck ) THEN
312 rdest = icrow
313 ELSE
314 rdest = -1
315 END IF
316*
317 IF( ccblck ) THEN
318*
319* sub( C ) is distributed over a process column
320*
321 IF( descv( m_ ).EQ.incv ) THEN
322*
323* Transpose row vector V
324*
325 ipw = mp+1
326 CALL pbztrnv( ictxt, 'Rowwise', 'Transpose', m,
327 $ descv( nb_ ), iroff, v( ioffv ), ldv, zero,
328 $ work, 1, ivrow, ivcol, icrow, iccol,
329 $ work( ipw ) )
330*
331* Perform the local computation within a process column
332*
333 IF( mycol.EQ.iccol ) THEN
334*
335 IF( myrow.EQ.ivrow ) THEN
336*
337 CALL zgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
338 $ tau( iiv ), 1 )
339 tauloc( 1 ) = tau( iiv )
340*
341 ELSE
342*
343 CALL zgebr2d( ictxt, 'Columnwise', ' ', 1, 1,
344 $ tauloc, 1, ivrow, mycol )
345*
346 END IF
347*
348 IF( tauloc( 1 ).NE.zero ) THEN
349*
350* w := sub( C )' * v
351*
352 IF( mp.GT.0 ) THEN
353 CALL zgemv( 'Conjugate transpose', mp, nq, one,
354 $ c( ioffc ), ldc, work, 1, zero,
355 $ work( ipw ), 1 )
356 ELSE
357 CALL zlaset( 'All', nq, 1, zero, zero,
358 $ work( ipw ), max( 1, nq ) )
359 END IF
360 CALL zgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
361 $ work( ipw ), max( 1, nq ), rdest,
362 $ mycol )
363*
364* sub( C ) := sub( C ) - v * w'
365*
366 CALL zgerc( mp, nq, -tauloc( 1 ), work, 1,
367 $ work( ipw ), 1, c( ioffc ), ldc )
368 END IF
369*
370 END IF
371*
372 ELSE
373*
374* V is a column vector
375*
376 IF( ivcol.EQ.iccol ) THEN
377*
378* Perform the local computation within a process column
379*
380 IF( mycol.EQ.iccol ) THEN
381*
382 tauloc( 1 ) = tau( jjv )
383*
384 IF( tauloc( 1 ).NE.zero ) THEN
385*
386* w := sub( C )' * v
387*
388 IF( mp.GT.0 ) THEN
389 CALL zgemv( 'Conjugate transpose', mp, nq,
390 $ one, c( ioffc ), ldc, v( ioffv ), 1,
391 $ zero, work, 1 )
392 ELSE
393 CALL zlaset( 'All', nq, 1, zero, zero,
394 $ work, max( 1, nq ) )
395 END IF
396 CALL zgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
397 $ work, max( 1, nq ), rdest, mycol )
398*
399* sub( C ) := sub( C ) - v * w'
400*
401 CALL zgerc( mp, nq, -tauloc( 1 ), v( ioffv ), 1,
402 $ work, 1, c( ioffc ), ldc )
403 END IF
404*
405 END IF
406*
407 ELSE
408*
409* Send V and TAU to the process column ICCOL
410*
411 IF( mycol.EQ.ivcol ) THEN
412*
413 ipw = mp+1
414 CALL zcopy( mp, v( ioffv ), 1, work, 1 )
415 work( ipw ) = tau( jjv )
416 CALL zgesd2d( ictxt, ipw, 1, work, ipw, myrow,
417 $ iccol )
418*
419 ELSE IF( mycol.EQ.iccol ) THEN
420*
421 ipw = mp+1
422 CALL zgerv2d( ictxt, ipw, 1, work, ipw, myrow,
423 $ ivcol )
424 tauloc( 1 ) = work( ipw )
425*
426 IF( tauloc( 1 ).NE.zero ) THEN
427*
428* w := sub( C )' * v
429*
430 IF( mp.GT.0 ) THEN
431 CALL zgemv( 'Conjugate transpose', mp, nq,
432 $ one, c( ioffc ), ldc, work, 1,
433 $ zero, work( ipw ), 1 )
434 ELSE
435 CALL zlaset( 'All', nq, 1, zero, zero,
436 $ work( ipw ), max( 1, nq ) )
437 END IF
438 CALL zgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
439 $ work( ipw ), max( 1, nq ), rdest,
440 $ mycol )
441*
442* sub( C ) := sub( C ) - v * w'
443*
444 CALL zgerc( mp, nq, -tauloc( 1 ), work, 1,
445 $ work( ipw ), 1, c( ioffc ), ldc )
446 END IF
447*
448 END IF
449*
450 END IF
451*
452 END IF
453*
454 ELSE
455*
456* sub( C ) is a proper distributed matrix
457*
458 IF( descv( m_ ).EQ.incv ) THEN
459*
460* Transpose and broadcast row vector V
461*
462 ipw = mp+1
463 CALL pbztrnv( ictxt, 'Rowwise', 'Transpose', m,
464 $ descv( nb_ ), iroff, v( ioffv ), ldv, zero,
465 $ work, 1, ivrow, ivcol, icrow, -1,
466 $ work( ipw ) )
467*
468* Perform the local computation within a process column
469*
470 IF( myrow.EQ.ivrow ) THEN
471*
472 CALL zgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
473 $ tau( iiv ), 1 )
474 tauloc( 1 ) = tau( iiv )
475*
476 ELSE
477*
478 CALL zgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,
479 $ 1, ivrow, mycol )
480*
481 END IF
482*
483 IF( tauloc( 1 ).NE.zero ) THEN
484*
485* w := sub( C )' * v
486*
487 IF( mp.GT.0 ) THEN
488 IF( ioffc.GT.0 )
489 $ CALL zgemv( 'Conjugate transpose', mp, nq, one,
490 $ c( ioffc ), ldc, work, 1, zero,
491 $ work( ipw ), 1 )
492 ELSE
493 CALL zlaset( 'All', nq, 1, zero, zero,
494 $ work( ipw ), max( 1, nq ) )
495 END IF
496 CALL zgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
497 $ work( ipw ), max( 1, nq ), rdest,
498 $ mycol )
499*
500* sub( C ) := sub( C ) - v * w'
501*
502 IF( ioffc.GT.0 )
503 $ CALL zgerc( mp, nq, -tauloc( 1 ), work, 1,
504 $ work( ipw ), 1, c( ioffc ), ldc )
505 END IF
506*
507 ELSE
508*
509* Broadcast column vector V
510*
511 CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
512 IF( mycol.EQ.ivcol ) THEN
513*
514 ipw = mp+1
515 CALL zcopy( mp, v( ioffv ), 1, work, 1 )
516 work(ipw) = tau( jjv )
517 CALL zgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,
518 $ work, ipw )
519 tauloc( 1 ) = tau( jjv )
520*
521 ELSE
522*
523 ipw = mp+1
524 CALL zgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,
525 $ ipw, myrow, ivcol )
526 tauloc( 1 ) = work( ipw )
527*
528 END IF
529*
530 IF( tauloc( 1 ).NE.zero ) THEN
531*
532* w := sub( C )' * v
533*
534 IF( mp.GT.0 ) THEN
535 IF( ioffc.GT.0 )
536 $ CALL zgemv( 'Conjugate transpose', mp, nq, one,
537 $ c( ioffc ), ldc, work, 1, zero,
538 $ work( ipw ), 1 )
539 ELSE
540 CALL zlaset( 'All', nq, 1, zero, zero,
541 $ work( ipw ), max( 1, nq ) )
542 END IF
543 CALL zgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
544 $ work( ipw ), max( 1, nq ), rdest,
545 $ mycol )
546*
547* sub( C ) := sub( C ) - v * w'
548*
549 IF( ioffc.GT.0 )
550 $ CALL zgerc( mp, nq, -tauloc( 1 ), work, 1,
551 $ work( ipw ), 1, c( ioffc ), ldc )
552 END IF
553*
554 END IF
555*
556 END IF
557*
558 ELSE
559*
560 IF( ccblck ) THEN
561 rdest = myrow
562 ELSE
563 rdest = -1
564 END IF
565*
566 IF( crblck ) THEN
567*
568* sub( C ) is distributed over a process row
569*
570 IF( descv( m_ ).EQ.incv ) THEN
571*
572* V is a row vector
573*
574 IF( ivrow.EQ.icrow ) THEN
575*
576* Perform the local computation within a process row
577*
578 IF( myrow.EQ.icrow ) THEN
579*
580 tauloc( 1 ) = tau( iiv )
581*
582 IF( tauloc( 1 ).NE.zero ) THEN
583*
584* w := sub( C ) * v
585*
586 IF( nq.GT.0 ) THEN
587 CALL zgemv( 'No transpose', mp, nq, one,
588 $ c( ioffc ), ldc, v( ioffv ), ldv,
589 $ zero, work, 1 )
590 ELSE
591 CALL zlaset( 'All', mp, 1, zero, zero,
592 $ work, max( 1, mp ) )
593 END IF
594 CALL zgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
595 $ work, max( 1, mp ), rdest, iccol )
596*
597* sub( C ) := sub( C ) - w * v'
598*
599 IF( ioffv.GT.0 .AND. ioffc.GT.0 )
600 $ CALL zgerc( mp, nq, -tauloc( 1 ), work, 1,
601 $ v( ioffv ), ldv, c( ioffc ),
602 $ ldc )
603 END IF
604*
605 END IF
606*
607 ELSE
608*
609* Send V and TAU to the process row ICROW
610*
611 IF( myrow.EQ.ivrow ) THEN
612*
613 ipw = nq+1
614 CALL zcopy( nq, v( ioffv ), ldv, work, 1 )
615 work(ipw) = tau( iiv )
616 CALL zgesd2d( ictxt, ipw, 1, work, ipw, icrow,
617 $ mycol )
618*
619 ELSE IF( myrow.EQ.icrow ) THEN
620*
621 ipw = nq+1
622 CALL zgerv2d( ictxt, ipw, 1, work, ipw, ivrow,
623 $ mycol )
624 tauloc( 1 ) = work( ipw )
625*
626 IF( tauloc( 1 ).NE.zero ) THEN
627*
628* w := sub( C ) * v
629*
630 IF( nq.GT.0 ) THEN
631 CALL zgemv( 'No transpose', mp, nq, one,
632 $ c( ioffc ), ldc, work, 1, zero,
633 $ work( ipw ), 1 )
634 ELSE
635 CALL zlaset( 'All', mp, 1, zero, zero,
636 $ work( ipw ), max( 1, mp ) )
637 END IF
638 CALL zgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
639 $ work( ipw ), max( 1, mp ), rdest,
640 $ iccol )
641*
642* sub( C ) := sub( C ) - w * v'
643*
644 CALL zgerc( mp, nq, -tauloc( 1 ), work( ipw ),
645 $ 1, work, 1, c( ioffc ), ldc )
646 END IF
647*
648 END IF
649*
650 END IF
651*
652 ELSE
653*
654* Transpose column vector V
655*
656 ipw = nq+1
657 CALL pbztrnv( ictxt, 'Columnwise', 'Transpose', n,
658 $ descv( mb_ ), icoff, v( ioffv ), 1, zero,
659 $ work, 1, ivrow, ivcol, icrow, iccol,
660 $ work( ipw ) )
661*
662* Perform the local computation within a process column
663*
664 IF( myrow.EQ.icrow ) THEN
665*
666 IF( mycol.EQ.ivcol ) THEN
667*
668 CALL zgebs2d( ictxt, 'Rowwise', ' ', 1, 1,
669 $ tau( jjv ), 1 )
670 tauloc( 1 ) = tau( jjv )
671*
672 ELSE
673*
674 CALL zgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,
675 $ 1, myrow, ivcol )
676*
677 END IF
678*
679 IF( tauloc( 1 ).NE.zero ) THEN
680*
681* w := sub( C ) * v
682*
683 IF( nq.GT.0 ) THEN
684 CALL zgemv( 'No transpose', mp, nq, one,
685 $ c( ioffc ), ldc, work, 1, zero,
686 $ work( ipw ), 1 )
687 ELSE
688 CALL zlaset( 'All', mp, 1, zero, zero,
689 $ work( ipw ), max( 1, mp ) )
690 END IF
691 CALL zgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
692 $ work( ipw ), max( 1, mp ), rdest,
693 $ iccol )
694*
695* sub( C ) := sub( C ) - w * v'
696*
697 CALL zgerc( mp, nq, -tauloc( 1 ), work( ipw ), 1,
698 $ work, 1, c( ioffc ), ldc )
699 END IF
700*
701 END IF
702*
703 END IF
704*
705 ELSE
706*
707* sub( C ) is a proper distributed matrix
708*
709 IF( descv( m_ ).EQ.incv ) THEN
710*
711* Broadcast row vector V
712*
713 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',
714 $ colbtop )
715 IF( myrow.EQ.ivrow ) THEN
716*
717 ipw = nq+1
718 IF( ioffv.GT.0 )
719 $ CALL zcopy( nq, v( ioffv ), ldv, work, 1 )
720 work(ipw) = tau( iiv )
721 CALL zgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,
722 $ work, ipw )
723 tauloc( 1 ) = tau( iiv )
724*
725 ELSE
726*
727 ipw = nq+1
728 CALL zgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,
729 $ work, ipw, ivrow, mycol )
730 tauloc( 1 ) = work( ipw )
731*
732 END IF
733*
734 IF( tauloc( 1 ).NE.zero ) THEN
735*
736* w := sub( C ) * v
737*
738 IF( nq.GT.0 ) THEN
739 CALL zgemv( 'No Transpose', mp, nq, one,
740 $ c( ioffc ), ldc, work, 1, zero,
741 $ work( ipw ), 1 )
742 ELSE
743 CALL zlaset( 'All', mp, 1, zero, zero,
744 $ work( ipw ), max( 1, mp ) )
745 END IF
746 CALL zgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
747 $ work( ipw ), max( 1, mp ), rdest,
748 $ iccol )
749*
750* sub( C ) := sub( C ) - w * v'
751*
752 IF( ioffc.GT.0 )
753 $ CALL zgerc( mp, nq, -tauloc( 1 ), work( ipw ), 1,
754 $ work, 1, c( ioffc ), ldc )
755 END IF
756*
757 ELSE
758*
759* Transpose and broadcast column vector V
760*
761 ipw = nq+1
762 CALL pbztrnv( ictxt, 'Columnwise', 'Transpose', n,
763 $ descv( mb_ ), icoff, v( ioffv ), 1, zero,
764 $ work, 1, ivrow, ivcol, -1, iccol,
765 $ work( ipw ) )
766*
767* Perform the local computation within a process column
768*
769 IF( mycol.EQ.ivcol ) THEN
770*
771 CALL zgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),
772 $ 1 )
773 tauloc( 1 ) = tau( jjv )
774*
775 ELSE
776*
777 CALL zgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,
778 $ myrow, ivcol )
779*
780 END IF
781*
782 IF( tauloc( 1 ).NE.zero ) THEN
783*
784* w := sub( C ) * v
785*
786 IF( nq.GT.0 ) THEN
787 CALL zgemv( 'No transpose', mp, nq, one,
788 $ c( ioffc ), ldc, work, 1, zero,
789 $ work( ipw ), 1 )
790 ELSE
791 CALL zlaset( 'All', mp, 1, zero, zero, work( ipw ),
792 $ max( 1, mp ) )
793 END IF
794 CALL zgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
795 $ work( ipw ), max( 1, mp ), rdest,
796 $ iccol )
797*
798* sub( C ) := sub( C ) - w * v'
799*
800 CALL zgerc( mp, nq, -tauloc( 1 ), work( ipw ), 1,
801 $ work, 1, c( ioffc ), ldc )
802 END IF
803*
804 END IF
805*
806 END IF
807*
808 END IF
809*
810 RETURN
811*
812* End of PZLARF
813*
814 END
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pbztrnv(icontxt, xdist, trans, n, nb, nz, x, incx, beta, y, incy, ixrow, ixcol, iyrow, iycol, work)
Definition pbztrnv.f:4
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pzlarf(side, m, n, v, iv, jv, descv, incv, tau, c, ic, jc, descc, work)
Definition pzlarf.f:3