ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlarf.f
Go to the documentation of this file.
1  SUBROUTINE pdlarf( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU,
2  $ C, IC, JC, DESCC, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 25, 2001
8 *
9 * .. Scalar Arguments ..
10  CHARACTER SIDE
11  INTEGER IC, INCV, IV, JC, JV, M, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCC( * ), DESCV( * )
15  DOUBLE PRECISION C( * ), TAU( * ), V( * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PDLARF applies a real elementary reflector Q (or Q**T) to a real
22 * M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from
23 * either the left or the right. Q is represented in the form
24 *
25 * Q = I - tau * v * v'
26 *
27 * where tau is a real scalar and v is a real vector.
28 *
29 * If tau = 0, then Q is taken to be the unit matrix.
30 *
31 * Notes
32 * =====
33 *
34 * Each global data object is described by an associated description
35 * vector. This vector stores the information required to establish
36 * the mapping between an object element and its corresponding process
37 * and memory location.
38 *
39 * Let A be a generic term for any 2D block cyclicly distributed array.
40 * Such a global array has an associated description vector DESCA.
41 * In the following comments, the character _ should be read as
42 * "of the global array".
43 *
44 * NOTATION STORED IN EXPLANATION
45 * --------------- -------------- --------------------------------------
46 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
47 * DTYPE_A = 1.
48 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49 * the BLACS process grid A is distribu-
50 * ted over. The context itself is glo-
51 * bal, but the handle (the integer
52 * value) may vary.
53 * M_A (global) DESCA( M_ ) The number of rows in the global
54 * array A.
55 * N_A (global) DESCA( N_ ) The number of columns in the global
56 * array A.
57 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
58 * the rows of the array.
59 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
60 * the columns of the array.
61 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62 * row of the array A is distributed.
63 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64 * first column of the array A is
65 * distributed.
66 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
67 * array. LLD_A >= MAX(1,LOCr(M_A)).
68 *
69 * Let K be the number of rows or columns of a distributed matrix,
70 * and assume that its process grid has dimension p x q.
71 * LOCr( K ) denotes the number of elements of K that a process
72 * would receive if K were distributed over the p processes of its
73 * process column.
74 * Similarly, LOCc( K ) denotes the number of elements of K that a
75 * process would receive if K were distributed over the q processes of
76 * its process row.
77 * The values of LOCr() and LOCc() may be determined via a call to the
78 * ScaLAPACK tool function, NUMROC:
79 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81 * An upper bound for these quantities may be computed by:
82 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84 *
85 * Because vectors may be viewed as a subclass of matrices, a
86 * distributed vector is considered to be a distributed matrix.
87 *
88 * Restrictions
89 * ============
90 *
91 * If SIDE = 'Left' and INCV = 1, then the row process having the first
92 * entry V(IV,JV) must also have the first row of sub( C ). Moreover,
93 * MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only
94 * the last equality must be satisfied.
95 *
96 * If SIDE = 'Right' and INCV = M_V then the column process having the
97 * first entry V(IV,JV) must also have the first column of sub( C ) and
98 * MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the
99 * last equality must be satisfied.
100 *
101 * Arguments
102 * =========
103 *
104 * SIDE (global input) CHARACTER
105 * = 'L': form Q * sub( C ),
106 * = 'R': form sub( C ) * Q, Q = Q**T.
107 *
108 * M (global input) INTEGER
109 * The number of rows to be operated on i.e the number of rows
110 * of the distributed submatrix sub( C ). M >= 0.
111 *
112 * N (global input) INTEGER
113 * The number of columns to be operated on i.e the number of
114 * columns of the distributed submatrix sub( C ). N >= 0.
115 *
116 * V (local input) DOUBLE PRECISION pointer into the local memory
117 * to an array of dimension (LLD_V,*) containing the local
118 * pieces of the distributed vectors V representing the
119 * Householder transformation Q,
120 * V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1,
121 * V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V,
122 * V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1,
123 * V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V,
124 *
125 * The vector v in the representation of Q. V is not used if
126 * TAU = 0.
127 *
128 * IV (global input) INTEGER
129 * The row index in the global array V indicating the first
130 * row of sub( V ).
131 *
132 * JV (global input) INTEGER
133 * The column index in the global array V indicating the
134 * first column of sub( V ).
135 *
136 * DESCV (global and local input) INTEGER array of dimension DLEN_.
137 * The array descriptor for the distributed matrix V.
138 *
139 * INCV (global input) INTEGER
140 * The global increment for the elements of V. Only two values
141 * of INCV are supported in this version, namely 1 and M_V.
142 * INCV must not be zero.
143 *
144 * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JV) if
145 * INCV = 1, and LOCr(IV) otherwise. This array contains the
146 * Householder scalars related to the Householder vectors.
147 * TAU is tied to the distributed matrix V.
148 *
149 * C (local input/local output) DOUBLE PRECISION pointer into the
150 * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
151 * containing the local pieces of sub( C ). On exit, sub( C )
152 * is overwritten by the Q * sub( C ) if SIDE = 'L', or
153 * sub( C ) * Q if SIDE = 'R'.
154 *
155 * IC (global input) INTEGER
156 * The row index in the global array C indicating the first
157 * row of sub( C ).
158 *
159 * JC (global input) INTEGER
160 * The column index in the global array C indicating the
161 * first column of sub( C ).
162 *
163 * DESCC (global and local input) INTEGER array of dimension DLEN_.
164 * The array descriptor for the distributed matrix C.
165 *
166 * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
167 * If INCV = 1,
168 * if SIDE = 'L',
169 * if IVCOL = ICCOL,
170 * LWORK >= NqC0
171 * else
172 * LWORK >= MpC0 + MAX( 1, NqC0 )
173 * end if
174 * else if SIDE = 'R',
175 * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
176 * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
177 * end if
178 * else if INCV = M_V,
179 * if SIDE = 'L',
180 * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
181 * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
182 * else if SIDE = 'R',
183 * if IVROW = ICROW,
184 * LWORK >= MpC0
185 * else
186 * LWORK >= NqC0 + MAX( 1, MpC0 )
187 * end if
188 * end if
189 * end if
190 *
191 * where LCM is the least common multiple of NPROW and NPCOL and
192 * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
193 * LCMQ = LCM / NPCOL,
194 *
195 * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
196 * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
197 * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
198 * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
199 * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
200 *
201 * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
202 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
203 * the subroutine BLACS_GRIDINFO.
204 *
205 * Alignment requirements
206 * ======================
207 *
208 * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
209 * must verify some alignment properties, namely the following
210 * expressions should be true:
211 *
212 * MB_V = NB_V,
213 *
214 * If INCV = 1,
215 * If SIDE = 'Left',
216 * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
217 * If SIDE = 'Right',
218 * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
219 * else if INCV = M_V,
220 * If SIDE = 'Left',
221 * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
222 * If SIDE = 'Right',
223 * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
224 * end if
225 *
226 * =====================================================================
227 *
228 * .. Parameters ..
229  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
230  $ lld_, mb_, m_, nb_, n_, rsrc_
231  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
232  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
233  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
234  DOUBLE PRECISION ONE, ZERO
235  parameter( one = 1.0d+0, zero = 0.0d+0 )
236 * ..
237 * .. Local Scalars ..
238  LOGICAL CCBLCK, CRBLCK
239  CHARACTER COLBTOP, ROWBTOP
240  INTEGER ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC,
241  $ ioffv, ipw, iroff, ivcol, ivrow, jjc, jjv, ldc,
242  $ ldv, mycol, myrow, mp, ncc, ncv, npcol, nprow,
243  $ nq, rdest
244  DOUBLE PRECISION TAULOC
245 * ..
246 * .. External Subroutines ..
247  EXTERNAL blacs_gridinfo, dcopy, dgebr2d, dgebs2d,
248  $ dgemv, dger, dgerv2d, dgesd2d,
249  $ dgsum2d, dlaset, infog2l, pb_topget,
250  $ pbdtrnv
251 * ..
252 * .. External Functions ..
253  LOGICAL LSAME
254  INTEGER NUMROC
255  EXTERNAL lsame, numroc
256 * ..
257 * .. Intrinsic Functions ..
258  INTRINSIC min, mod
259 * ..
260 * .. Executable Statements ..
261 *
262 * Quick return if possible
263 *
264  IF( m.LE.0 .OR. n.LE.0 )
265  $ RETURN
266 *
267 * Get grid parameters.
268 *
269  ictxt = descc( ctxt_ )
270  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
271 *
272 * Figure local indexes
273 *
274  CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
275  $ icrow, iccol )
276  CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
277  $ ivrow, ivcol )
278  ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),
279  $ npcol )
280  ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),
281  $ npcol )
282  ldc = descc( lld_ )
283  ldv = descv( lld_ )
284  iic = min( iic, ldc )
285  iiv = min( iiv, ldv )
286  jjc = min( jjc, ncc )
287  jjv = min( jjv, ncv )
288  ioffc = iic+(jjc-1)*ldc
289  ioffv = iiv+(jjv-1)*ldv
290 *
291  iroff = mod( ic-1, descc( mb_ ) )
292  icoff = mod( jc-1, descc( nb_ ) )
293  mp = numroc( m+iroff, descc( mb_ ), myrow, icrow, nprow )
294  nq = numroc( n+icoff, descc( nb_ ), mycol, iccol, npcol )
295  IF( myrow.EQ.icrow )
296  $ mp = mp - iroff
297  IF( mycol.EQ.iccol )
298  $ nq = nq - icoff
299 *
300 * Is sub( C ) only distributed over a process row ?
301 *
302  crblck = ( m.LE.(descc( mb_ )-iroff) )
303 *
304 * Is sub( C ) only distributed over a process column ?
305 *
306  ccblck = ( n.LE.(descc( nb_ )-icoff) )
307 *
308  IF( lsame( side, 'L' ) ) THEN
309 *
310  IF( crblck ) THEN
311  rdest = icrow
312  ELSE
313  rdest = -1
314  END IF
315 *
316  IF( ccblck ) THEN
317 *
318 * sub( C ) is distributed over a process column
319 *
320  IF( descv( m_ ).EQ.incv ) THEN
321 *
322 * Transpose row vector V
323 *
324  ipw = mp+1
325  CALL pbdtrnv( ictxt, 'Rowwise', 'Transpose', m,
326  $ descv( nb_ ), iroff, v( ioffv ), ldv, zero,
327  $ work, 1, ivrow, ivcol, icrow, iccol,
328  $ work( ipw ) )
329 *
330 * Perform the local computation within a process column
331 *
332  IF( mycol.EQ.iccol ) THEN
333 *
334  IF( myrow.EQ.ivrow ) THEN
335 *
336  CALL dgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
337  $ tau( iiv ), 1 )
338  tauloc = tau( iiv )
339 *
340  ELSE
341 *
342  CALL dgebr2d( ictxt, 'Columnwise', ' ', 1, 1,
343  $ tauloc, 1, ivrow, mycol )
344 *
345  END IF
346 *
347  IF( tauloc.NE.zero ) THEN
348 *
349 * w := sub( C )' * v
350 *
351  IF( mp.GT.0 ) THEN
352  CALL dgemv( 'Transpose', mp, nq, one,
353  $ c( ioffc ), ldc, work, 1, zero,
354  $ work( ipw ), 1 )
355  ELSE
356  CALL dlaset( 'All', nq, 1, zero, zero,
357  $ work( ipw ), max( 1, nq ) )
358  END IF
359  CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
360  $ work( ipw ), max( 1, nq ), rdest,
361  $ mycol )
362 *
363 * sub( C ) := sub( C ) - v * w'
364 *
365  CALL dger( mp, nq, -tauloc, work, 1, work( ipw ),
366  $ 1, c( ioffc ), ldc )
367  END IF
368 *
369  END IF
370 *
371  ELSE
372 *
373 * V is a column vector
374 *
375  IF( ivcol.EQ.iccol ) THEN
376 *
377 * Perform the local computation within a process column
378 *
379  IF( mycol.EQ.iccol ) THEN
380 *
381  tauloc = tau( jjv )
382 *
383  IF( tauloc.NE.zero ) THEN
384 *
385 * w := sub( C )' * v
386 *
387  IF( mp.GT.0 ) THEN
388  CALL dgemv( 'Transpose', mp, nq, one,
389  $ c( ioffc ), ldc, v( ioffv ), 1,
390  $ zero, work, 1 )
391  ELSE
392  CALL dlaset( 'All', nq, 1, zero, zero,
393  $ work, max( 1, nq ) )
394  END IF
395  CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
396  $ work, max( 1, nq ), rdest, mycol )
397 *
398 * sub( C ) := sub( C ) - v * w'
399 *
400  CALL dger( mp, nq, -tauloc, v( ioffv ), 1, work,
401  $ 1, c( ioffc ), ldc )
402  END IF
403 *
404  END IF
405 *
406  ELSE
407 *
408 * Send V and TAU to the process column ICCOL
409 *
410  IF( mycol.EQ.ivcol ) THEN
411 *
412  ipw = mp+1
413  CALL dcopy( mp, v( ioffv ), 1, work, 1 )
414  work( ipw ) = tau( jjv )
415  CALL dgesd2d( ictxt, ipw, 1, work, ipw, myrow,
416  $ iccol )
417 *
418  ELSE IF( mycol.EQ.iccol ) THEN
419 *
420  ipw = mp+1
421  CALL dgerv2d( ictxt, ipw, 1, work, ipw, myrow,
422  $ ivcol )
423  tauloc = work( ipw )
424 *
425  IF( tauloc.NE.zero ) THEN
426 *
427 * w := sub( C )' * v
428 *
429  IF( mp.GT.0 ) THEN
430  CALL dgemv( 'Transpose', mp, nq, one,
431  $ c( ioffc ), ldc, work, 1, zero,
432  $ work( ipw ), 1 )
433  ELSE
434  CALL dlaset( 'All', nq, 1, zero, zero,
435  $ work( ipw ), max( 1, nq ) )
436  END IF
437  CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
438  $ work( ipw ), max( 1, nq ), rdest,
439  $ mycol )
440 *
441 * sub( C ) := sub( C ) - v * w'
442 *
443  CALL dger( mp, nq, -tauloc, work, 1,
444  $ work( ipw ), 1, c( ioffc ), ldc )
445  END IF
446 *
447  END IF
448 *
449  END IF
450 *
451  END IF
452 *
453  ELSE
454 *
455 * sub( C ) is a proper distributed matrix
456 *
457  IF( descv( m_ ).EQ.incv ) THEN
458 *
459 * Transpose and broadcast row vector V
460 *
461  ipw = mp+1
462  CALL pbdtrnv( ictxt, 'Rowwise', 'Transpose', m,
463  $ descv( nb_ ), iroff, v( ioffv ), ldv, zero,
464  $ work, 1, ivrow, ivcol, icrow, -1,
465  $ work( ipw ) )
466 *
467 * Perform the local computation within a process column
468 *
469  IF( myrow.EQ.ivrow ) THEN
470 *
471  CALL dgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
472  $ tau( iiv ), 1 )
473  tauloc = tau( iiv )
474 *
475  ELSE
476 *
477  CALL dgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,
478  $ 1, ivrow, mycol )
479 *
480  END IF
481 *
482  IF( tauloc.NE.zero ) THEN
483 *
484 * w := sub( C )' * v
485 *
486  IF( mp.GT.0 ) THEN
487  IF( ioffc.GT.0 )
488  $ CALL dgemv( 'Transpose', mp, nq, one,
489  $ c( ioffc ), ldc, work, 1, zero,
490  $ work( ipw ), 1 )
491  ELSE
492  CALL dlaset( 'All', nq, 1, zero, zero,
493  $ work( ipw ), max( 1, nq ) )
494  END IF
495  CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
496  $ work( ipw ), max( 1, nq ), rdest,
497  $ mycol )
498 *
499 * sub( C ) := sub( C ) - v * w'
500 *
501  IF( ioffc.GT.0 )
502  $ CALL dger( mp, nq, -tauloc, work, 1, work( ipw ),
503  $ 1, c( ioffc ), ldc )
504  END IF
505 *
506  ELSE
507 *
508 * Broadcast column vector V
509 *
510  CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
511  IF( mycol.EQ.ivcol ) THEN
512 *
513  ipw = mp+1
514  CALL dcopy( mp, v( ioffv ), 1, work, 1 )
515  work(ipw) = tau( jjv )
516  CALL dgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,
517  $ work, ipw )
518  tauloc = tau( jjv )
519 *
520  ELSE
521 *
522  ipw = mp+1
523  CALL dgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,
524  $ ipw, myrow, ivcol )
525  tauloc = work( ipw )
526 *
527  END IF
528 *
529  IF( tauloc.NE.zero ) THEN
530 *
531 * w := sub( C )' * v
532 *
533  IF( mp.GT.0 ) THEN
534  IF( ioffc.GT.0 )
535  $ CALL dgemv( 'Transpose', mp, nq, one,
536  $ c( ioffc ), ldc, work, 1, zero,
537  $ work( ipw ), 1 )
538  ELSE
539  CALL dlaset( 'All', nq, 1, zero, zero,
540  $ work( ipw ), max( 1, nq ) )
541  END IF
542  CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,
543  $ work( ipw ), max( 1, nq ), rdest,
544  $ mycol )
545 *
546 * sub( C ) := sub( C ) - v * w'
547 *
548  IF( ioffc.GT.0 )
549  $ CALL dger( mp, nq, -tauloc, work, 1, work( ipw ),
550  $ 1, c( ioffc ), ldc )
551  END IF
552 *
553  END IF
554 *
555  END IF
556 *
557  ELSE
558 *
559  IF( ccblck ) THEN
560  rdest = myrow
561  ELSE
562  rdest = -1
563  END IF
564 *
565  IF( crblck ) THEN
566 *
567 * sub( C ) is distributed over a process row
568 *
569  IF( descv( m_ ).EQ.incv ) THEN
570 *
571 * V is a row vector
572 *
573  IF( ivrow.EQ.icrow ) THEN
574 *
575 * Perform the local computation within a process row
576 *
577  IF( myrow.EQ.icrow ) THEN
578 *
579  tauloc = tau( iiv )
580 *
581  IF( tauloc.NE.zero ) THEN
582 *
583 * w := sub( C ) * v
584 *
585  IF( nq.GT.0 ) THEN
586  CALL dgemv( 'No transpose', mp, nq, one,
587  $ c( ioffc ), ldc, v( ioffv ), ldv,
588  $ zero, work, 1 )
589  ELSE
590  CALL dlaset( 'All', mp, 1, zero, zero,
591  $ work, max( 1, mp ) )
592  END IF
593  CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
594  $ work, max( 1, mp ), rdest, iccol )
595 *
596 * sub( C ) := sub( C ) - w * v'
597 *
598  IF( ioffv.GT.0 .AND. ioffc.GT.0 )
599  $ CALL dger( mp, nq, -tauloc, work, 1,
600  $ v( ioffv ), ldv, c( ioffc ), ldc )
601  END IF
602 *
603  END IF
604 *
605  ELSE
606 *
607 * Send V and TAU to the process row ICROW
608 *
609  IF( myrow.EQ.ivrow ) THEN
610 *
611  ipw = nq+1
612  CALL dcopy( nq, v( ioffv ), ldv, work, 1 )
613  work(ipw) = tau( iiv )
614  CALL dgesd2d( ictxt, ipw, 1, work, ipw, icrow,
615  $ mycol )
616 *
617  ELSE IF( myrow.EQ.icrow ) THEN
618 *
619  ipw = nq+1
620  CALL dgerv2d( ictxt, ipw, 1, work, ipw, ivrow,
621  $ mycol )
622  tauloc = work( ipw )
623 *
624  IF( tauloc.NE.zero ) THEN
625 *
626 * w := sub( C ) * v
627 *
628  IF( nq.GT.0 ) THEN
629  CALL dgemv( 'No transpose', mp, nq, one,
630  $ c( ioffc ), ldc, work, 1, zero,
631  $ work( ipw ), 1 )
632  ELSE
633  CALL dlaset( 'All', mp, 1, zero, zero,
634  $ work( ipw ), max( 1, mp ) )
635  END IF
636  CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
637  $ work( ipw ), max( 1, mp ), rdest,
638  $ iccol )
639 *
640 * sub( C ) := sub( C ) - w * v'
641 *
642  CALL dger( mp, nq, -tauloc, work( ipw ), 1,
643  $ work, 1, c( ioffc ), ldc )
644  END IF
645 *
646  END IF
647 *
648  END IF
649 *
650  ELSE
651 *
652 * Transpose column vector V
653 *
654  ipw = nq+1
655  CALL pbdtrnv( ictxt, 'Columnwise', 'Transpose', n,
656  $ descv( mb_ ), icoff, v( ioffv ), 1, zero,
657  $ work, 1, ivrow, ivcol, icrow, iccol,
658  $ work( ipw ) )
659 *
660 * Perform the local computation within a process column
661 *
662  IF( myrow.EQ.icrow ) THEN
663 *
664  IF( mycol.EQ.ivcol ) THEN
665 *
666  CALL dgebs2d( ictxt, 'Rowwise', ' ', 1, 1,
667  $ tau( jjv ), 1 )
668  tauloc = tau( jjv )
669 *
670  ELSE
671 *
672  CALL dgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,
673  $ 1, myrow, ivcol )
674 *
675  END IF
676 *
677  IF( tauloc.NE.zero ) THEN
678 *
679 * w := sub( C ) * v
680 *
681  IF( nq.GT.0 ) THEN
682  CALL dgemv( 'No transpose', mp, nq, one,
683  $ c( ioffc ), ldc, work, 1, zero,
684  $ work( ipw ), 1 )
685  ELSE
686  CALL dlaset( 'All', mp, 1, zero, zero,
687  $ work( ipw ), max( 1, mp ) )
688  END IF
689  CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
690  $ work( ipw ), max( 1, mp ), rdest,
691  $ iccol )
692 *
693 * sub( C ) := sub( C ) - w * v'
694 *
695  CALL dger( mp, nq, -tauloc, work( ipw ), 1, work,
696  $ 1, c( ioffc ), ldc )
697  END IF
698 *
699  END IF
700 *
701  END IF
702 *
703  ELSE
704 *
705 * sub( C ) is a proper distributed matrix
706 *
707  IF( descv( m_ ).EQ.incv ) THEN
708 *
709 * Broadcast row vector V
710 *
711  CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',
712  $ colbtop )
713  IF( myrow.EQ.ivrow ) THEN
714 *
715  ipw = nq+1
716  IF( ioffv.GT.0 )
717  $ CALL dcopy( nq, v( ioffv ), ldv, work, 1 )
718  work(ipw) = tau( iiv )
719  CALL dgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,
720  $ work, ipw )
721  tauloc = tau( iiv )
722 *
723  ELSE
724 *
725  ipw = nq+1
726  CALL dgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,
727  $ work, ipw, ivrow, mycol )
728  tauloc = work( ipw )
729 *
730  END IF
731 *
732  IF( tauloc.NE.zero ) THEN
733 *
734 * w := sub( C ) * v
735 *
736  IF( nq.GT.0 ) THEN
737  CALL dgemv( 'No Transpose', mp, nq, one,
738  $ c( ioffc ), ldc, work, 1, zero,
739  $ work( ipw ), 1 )
740  ELSE
741  CALL dlaset( 'All', mp, 1, zero, zero,
742  $ work( ipw ), max( 1, mp ) )
743  END IF
744  CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
745  $ work( ipw ), max( 1, mp ), rdest,
746  $ iccol )
747 *
748 * sub( C ) := sub( C ) - w * v'
749 *
750  IF( ioffc.GT.0 )
751  $ CALL dger( mp, nq, -tauloc, work( ipw ), 1, work,
752  $ 1, c( ioffc ), ldc )
753  END IF
754 *
755  ELSE
756 *
757 * Transpose and broadcast column vector V
758 *
759  ipw = nq+1
760  CALL pbdtrnv( ictxt, 'Columnwise', 'Transpose', n,
761  $ descv( mb_ ), icoff, v( ioffv ), 1, zero,
762  $ work, 1, ivrow, ivcol, -1, iccol,
763  $ work( ipw ) )
764 *
765 * Perform the local computation within a process column
766 *
767  IF( mycol.EQ.ivcol ) THEN
768 *
769  CALL dgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),
770  $ 1 )
771  tauloc = tau( jjv )
772 *
773  ELSE
774 *
775  CALL dgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,
776  $ myrow, ivcol )
777 *
778  END IF
779 *
780  IF( tauloc.NE.zero ) THEN
781 *
782 * w := sub( C ) * v
783 *
784  IF( nq.GT.0 ) THEN
785  CALL dgemv( 'No transpose', mp, nq, one,
786  $ c( ioffc ), ldc, work, 1, zero,
787  $ work( ipw ), 1 )
788  ELSE
789  CALL dlaset( 'All', mp, 1, zero, zero, work( ipw ),
790  $ max( 1, mp ) )
791  END IF
792  CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,
793  $ work( ipw ), max( 1, mp ), rdest,
794  $ iccol )
795 *
796 * sub( C ) := sub( C ) - w * v'
797 *
798  CALL dger( mp, nq, -tauloc, work( ipw ), 1, work, 1,
799  $ c( ioffc ), ldc )
800  END IF
801 *
802  END IF
803 *
804  END IF
805 *
806  END IF
807 *
808  RETURN
809 *
810 * End of PDLARF
811 *
812  END
pdlarf
subroutine pdlarf(SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, C, IC, JC, DESCC, WORK)
Definition: pdlarf.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pbdtrnv
subroutine pbdtrnv(ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX, BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL, WORK)
Definition: pbdtrnv.f:4
min
#define min(A, B)
Definition: pcgemr.c:181