ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlarfb.f
Go to the documentation of this file.
1  SUBROUTINE pdlarfb( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV,
2  $ JV, DESCV, T, C, IC, JC, DESCC, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 2.0.2) --
5 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6 * May 1 2012
7 *
8 * .. Scalar Arguments ..
9  CHARACTER SIDE, TRANS, DIRECT, STOREV
10  INTEGER IC, IV, JC, JV, K, M, N
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCC( * ), DESCV( * )
14  DOUBLE PRECISION C( * ), T( * ), V( * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PDLARFB applies a real block reflector Q or its transpose Q**T to a
21 * real distributed M-by-N matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1)
22 * from the left or the right.
23 *
24 * Notes
25 * =====
26 *
27 * Each global data object is described by an associated description
28 * vector. This vector stores the information required to establish
29 * the mapping between an object element and its corresponding process
30 * and memory location.
31 *
32 * Let A be a generic term for any 2D block cyclicly distributed array.
33 * Such a global array has an associated description vector DESCA.
34 * In the following comments, the character _ should be read as
35 * "of the global array".
36 *
37 * NOTATION STORED IN EXPLANATION
38 * --------------- -------------- --------------------------------------
39 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
40 * DTYPE_A = 1.
41 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42 * the BLACS process grid A is distribu-
43 * ted over. The context itself is glo-
44 * bal, but the handle (the integer
45 * value) may vary.
46 * M_A (global) DESCA( M_ ) The number of rows in the global
47 * array A.
48 * N_A (global) DESCA( N_ ) The number of columns in the global
49 * array A.
50 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
51 * the rows of the array.
52 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
53 * the columns of the array.
54 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55 * row of the array A is distributed.
56 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57 * first column of the array A is
58 * distributed.
59 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
60 * array. LLD_A >= MAX(1,LOCr(M_A)).
61 *
62 * Let K be the number of rows or columns of a distributed matrix,
63 * and assume that its process grid has dimension p x q.
64 * LOCr( K ) denotes the number of elements of K that a process
65 * would receive if K were distributed over the p processes of its
66 * process column.
67 * Similarly, LOCc( K ) denotes the number of elements of K that a
68 * process would receive if K were distributed over the q processes of
69 * its process row.
70 * The values of LOCr() and LOCc() may be determined via a call to the
71 * ScaLAPACK tool function, NUMROC:
72 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74 * An upper bound for these quantities may be computed by:
75 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77 *
78 * Arguments
79 * =========
80 *
81 * SIDE (global input) CHARACTER
82 * = 'L': apply Q or Q**T from the Left;
83 * = 'R': apply Q or Q**T from the Right.
84 *
85 * TRANS (global input) CHARACTER
86 * = 'N': No transpose, apply Q;
87 * = 'T': Transpose, apply Q**T.
88 *
89 * DIRECT (global input) CHARACTER
90 * Indicates how Q is formed from a product of elementary
91 * reflectors
92 * = 'F': Q = H(1) H(2) . . . H(k) (Forward)
93 * = 'B': Q = H(k) . . . H(2) H(1) (Backward)
94 *
95 * STOREV (global input) CHARACTER
96 * Indicates how the vectors which define the elementary
97 * reflectors are stored:
98 * = 'C': Columnwise
99 * = 'R': Rowwise
100 *
101 * M (global input) INTEGER
102 * The number of rows to be operated on i.e the number of rows
103 * of the distributed submatrix sub( C ). M >= 0.
104 *
105 * N (global input) INTEGER
106 * The number of columns to be operated on i.e the number of
107 * columns of the distributed submatrix sub( C ). N >= 0.
108 *
109 * K (global input) INTEGER
110 * The order of the matrix T (= the number of elementary
111 * reflectors whose product defines the block reflector).
112 *
113 * V (local input) DOUBLE PRECISION pointer into the local memory
114 * to an array of dimension ( LLD_V, LOCc(JV+K-1) ) if
115 * STOREV = 'C', ( LLD_V, LOCc(JV+M-1)) if STOREV = 'R' and
116 * SIDE = 'L', ( LLD_V, LOCc(JV+N-1) ) if STOREV = 'R' and
117 * SIDE = 'R'. It contains the local pieces of the distributed
118 * vectors V representing the Householder transformation.
119 * See further details.
120 * If STOREV = 'C' and SIDE = 'L', LLD_V >= MAX(1,LOCr(IV+M-1));
121 * if STOREV = 'C' and SIDE = 'R', LLD_V >= MAX(1,LOCr(IV+N-1));
122 * if STOREV = 'R', LLD_V >= LOCr(IV+K-1).
123 *
124 * IV (global input) INTEGER
125 * The row index in the global array V indicating the first
126 * row of sub( V ).
127 *
128 * JV (global input) INTEGER
129 * The column index in the global array V indicating the
130 * first column of sub( V ).
131 *
132 * DESCV (global and local input) INTEGER array of dimension DLEN_.
133 * The array descriptor for the distributed matrix V.
134 *
135 * T (local input) DOUBLE PRECISION array, dimension MB_V by MB_V
136 * if STOREV = 'R' and NB_V by NB_V if STOREV = 'C'. The trian-
137 * gular matrix T in the representation of the block reflector.
138 *
139 * C (local input/local output) DOUBLE PRECISION pointer into the
140 * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
141 * On entry, the M-by-N distributed matrix sub( C ). On exit,
142 * sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or
143 * sub( C )*Q or sub( C )*Q'.
144 *
145 * IC (global input) INTEGER
146 * The row index in the global array C indicating the first
147 * row of sub( C ).
148 *
149 * JC (global input) INTEGER
150 * The column index in the global array C indicating the
151 * first column of sub( C ).
152 *
153 * DESCC (global and local input) INTEGER array of dimension DLEN_.
154 * The array descriptor for the distributed matrix C.
155 *
156 * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
157 * If STOREV = 'C',
158 * if SIDE = 'L',
159 * LWORK >= ( NqC0 + MpC0 ) * K
160 * else if SIDE = 'R',
161 * LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC,
162 * NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ),
163 * MpC0 ) ) * K
164 * end if
165 * else if STOREV = 'R',
166 * if SIDE = 'L',
167 * LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC,
168 * MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ),
169 * NqC0 ) ) * K
170 * else if SIDE = 'R',
171 * LWORK >= ( MpC0 + NqC0 ) * K
172 * end if
173 * end if
174 *
175 * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ),
176 *
177 * IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ),
178 * IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ),
179 * IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ),
180 * MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ),
181 * NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ),
182 *
183 * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
184 * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
185 * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
186 * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
187 * NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ),
188 * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
189 *
190 * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
191 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
192 * the subroutine BLACS_GRIDINFO.
193 *
194 * Alignment requirements
195 * ======================
196 *
197 * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
198 * must verify some alignment properties, namely the following
199 * expressions should be true:
200 *
201 * If STOREV = 'Columnwise'
202 * If SIDE = 'Left',
203 * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
204 * If SIDE = 'Right',
205 * ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
206 * else if STOREV = 'Rowwise'
207 * If SIDE = 'Left',
208 * ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
209 * If SIDE = 'Right',
210 * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
211 * end if
212 *
213 * =====================================================================
214 *
215 * .. Parameters ..
216  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
217  $ lld_, mb_, m_, nb_, n_, rsrc_
218  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
219  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
220  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
221  DOUBLE PRECISION ONE, ZERO
222  parameter( one = 1.0d+0, zero = 0.0d+0 )
223 * ..
224 * .. Local Scalars ..
225  LOGICAL FORWARD
226  CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO
227  INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW,
228  $ ictxt, ii, iibeg, iic, iiend, iinxt, iiv,
229  $ ilastcol, ilastrow, ileft, ioff, ioffc, ioffv,
230  $ ipt, ipv, ipw, ipw1, iright, iroffc, iroffv,
231  $ itop, ivcol, ivrow, jj, jjbeg, jjc, jjend,
232  $ jjnxt, jjv, kp, kq, ldc, ldv, lv, lw, mbv, mpc,
233  $ mpc0, mqv, mqv0, mycol, mydist, myrow, nbv,
234  $ npv, npv0, npcol, nprow, nqc, nqc0, wide
235 * ..
236 * .. External Subroutines ..
237  EXTERNAL blacs_gridinfo, dgebr2d, dgebs2d,dgemm,
238  $ dgsum2d, dlamov, dlaset, dtrbr2d,
239  $ dtrbs2d, dtrmm, infog1l, infog2l, pb_topget,
240  $ pbdtran
241 * ..
242 * .. Intrinsic Functions ..
243  INTRINSIC max, min, mod
244 * ..
245 * .. External Functions ..
246  LOGICAL LSAME
247  INTEGER ICEIL, NUMROC
248  EXTERNAL iceil, lsame, numroc
249 * ..
250 * .. Executable Statements ..
251 *
252 * Quick return if possible
253 *
254  IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
255  $ RETURN
256 *
257 * Get grid parameters
258 *
259  ictxt = descc( ctxt_ )
260  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
261 *
262  IF( lsame( trans, 'N' ) ) THEN
263  transt = 'T'
264  ELSE
265  transt = 'N'
266  END IF
267  forward = lsame( direct, 'F' )
268  IF( forward ) THEN
269  uplo = 'U'
270  ELSE
271  uplo = 'L'
272  END IF
273 *
274  CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
275  $ ivrow, ivcol )
276  CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
277  $ icrow, iccol )
278  ldc = descc( lld_ )
279  ldv = descv( lld_ )
280  iic = min( iic, ldc )
281  iiv = min( iiv, ldv )
282  iroffc = mod( ic-1, descc( mb_ ) )
283  icoffc = mod( jc-1, descc( nb_ ) )
284  mbv = descv( mb_ )
285  nbv = descv( nb_ )
286  iroffv = mod( iv-1, mbv )
287  icoffv = mod( jv-1, nbv )
288  mpc = numroc( m+iroffc, descc( mb_ ), myrow, icrow, nprow )
289  nqc = numroc( n+icoffc, descc( nb_ ), mycol, iccol, npcol )
290  IF( mycol.EQ.iccol )
291  $ nqc = nqc - icoffc
292  IF( myrow.EQ.icrow )
293  $ mpc = mpc - iroffc
294  jjc = min( jjc, max( 1, jjc+nqc-1 ) )
295  jjv = min( jjv, max( 1, numroc( descv( n_ ), nbv, mycol,
296  $ descv( csrc_ ), npcol ) ) )
297  ioffc = iic + ( jjc-1 ) * ldc
298  ioffv = iiv + ( jjv-1 ) * ldv
299 *
300  IF( lsame( storev, 'C' ) ) THEN
301 *
302 * V is stored columnwise
303 *
304  IF( lsame( side, 'L' ) ) THEN
305 *
306 * Form Q*sub( C ) or Q'*sub( C )
307 *
308 * Locally V( IOFFV ) is MPV x K, C( IOFFC ) is MPC x NQC
309 * WORK( IPV ) is MPC x K = V( IOFFV ), MPC = MPV
310 * WORK( IPW ) is NQC x K = C( IOFFC )' * V( IOFFV )
311 *
312  ipv = 1
313  ipw = ipv + mpc * k
314  lv = max( 1, mpc )
315  lw = max( 1, nqc )
316 *
317 * Broadcast V to the other process columns.
318 *
319  CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
320  IF( mycol.EQ.ivcol ) THEN
321  CALL dgebs2d( ictxt, 'Rowwise', rowbtop, mpc, k,
322  $ v( ioffv ), ldv )
323  IF( myrow.EQ.ivrow )
324  $ CALL dtrbs2d( ictxt, 'Rowwise', rowbtop, uplo,
325  $ 'Non unit', k, k, t, nbv )
326  CALL dlamov( 'All', mpc, k, v( ioffv ), ldv, work( ipv ),
327  $ lv )
328  ELSE
329  CALL dgebr2d( ictxt, 'Rowwise', rowbtop, mpc, k,
330  $ work( ipv ), lv, myrow, ivcol )
331  IF( myrow.EQ.ivrow )
332  $ CALL dtrbr2d( ictxt, 'Rowwise', rowbtop, uplo,
333  $ 'Non unit', k, k, t, nbv, myrow, ivcol )
334  END IF
335 *
336  IF( forward ) THEN
337 *
338 * WORK(IPV) = ( V1 ) where V1 is unit lower triangular,
339 * ( V2 ) zeroes upper triangular part of V1
340 *
341  mydist = mod( myrow-ivrow+nprow, nprow )
342  itop = max( 0, mydist*mbv - iroffv )
343  iibeg = iiv
344  iiend = iibeg + mpc - 1
345  iinxt = min( iceil( iibeg, mbv )*mbv, iiend )
346 *
347  10 CONTINUE
348  IF( k-itop .GT.0 ) THEN
349  CALL dlaset( 'Upper', iinxt-iibeg+1, k-itop, zero,
350  $ one, work( ipv+iibeg-iiv+itop*lv ), lv )
351  mydist = mydist + nprow
352  itop = mydist * mbv - iroffv
353  iibeg = iinxt + 1
354  iinxt = min( iinxt+mbv, iiend )
355  GO TO 10
356  END IF
357 *
358  ELSE
359 *
360 * WORK(IPV) = ( V1 ) where V2 is unit upper triangular,
361 * ( V2 ) zeroes lower triangular part of V2
362 *
363  jj = jjv
364  ioff = mod( iv+m-k-1, mbv )
365  CALL infog1l( iv+m-k, mbv, nprow, myrow, descv( rsrc_ ),
366  $ ii, ilastrow )
367  kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
368  IF( myrow.EQ.ilastrow )
369  $ kp = kp - ioff
370  mydist = mod( myrow-ilastrow+nprow, nprow )
371  itop = mydist * mbv - ioff
372  ibase = min( itop+mbv, k )
373  itop = min( max( 0, itop ), k )
374 *
375  20 CONTINUE
376  IF( jj.LE.( jjv+k-1 ) ) THEN
377  height = ibase - itop
378  CALL dlaset( 'All', kp, itop-jj+jjv, zero, zero,
379  $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
380  CALL dlaset( 'Lower', kp, height, zero, one,
381  $ work( ipv+ii-iiv+itop*lv ), lv )
382  kp = max( 0, kp - height )
383  ii = ii + height
384  jj = jjv + ibase
385  mydist = mydist + nprow
386  itop = mydist * mbv - ioff
387  ibase = min( itop + mbv, k )
388  itop = min( itop, k )
389  GO TO 20
390  END IF
391 *
392  END IF
393 *
394 * WORK( IPW ) = C( IOFFC )' * V (NQC x MPC x K) -> NQC x K
395 *
396  IF( mpc.GT.0 ) THEN
397  CALL dgemm( 'Transpose', 'No transpose', nqc, k, mpc,
398  $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
399  $ work( ipw ), lw )
400  ELSE
401  CALL dlaset( 'All', nqc, k, zero, zero, work( ipw ), lw )
402  END IF
403 *
404  CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc, k, work( ipw ),
405  $ lw, ivrow, mycol )
406 *
407  IF( myrow.EQ.ivrow ) THEN
408 *
409 * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
410 *
411  CALL dtrmm( 'Right', uplo, transt, 'Non unit', nqc, k,
412  $ one, t, nbv, work( ipw ), lw )
413  CALL dgebs2d( ictxt, 'Columnwise', ' ', nqc, k,
414  $ work( ipw ), lw )
415  ELSE
416  CALL dgebr2d( ictxt, 'Columnwise', ' ', nqc, k,
417  $ work( ipw ), lw, ivrow, mycol )
418  END IF
419 *
420 * C C - V * W'
421 * C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
422 * MPC x NQC MPC x K K x NQC
423 *
424  CALL dgemm( 'No transpose', 'Transpose', mpc, nqc, k, -one,
425  $ work( ipv ), lv, work( ipw ), lw, one,
426  $ c( ioffc ), ldc )
427 *
428  ELSE
429 *
430 * Form sub( C )*Q or sub( C )*Q'
431 *
432 * ICOFFC = IROFFV is required by the current transposition
433 * routine PBDTRAN
434 *
435  npv0 = numroc( n+iroffv, mbv, myrow, ivrow, nprow )
436  IF( myrow.EQ.ivrow ) THEN
437  npv = npv0 - iroffv
438  ELSE
439  npv = npv0
440  END IF
441  IF( mycol.EQ.iccol ) THEN
442  nqc0 = nqc + icoffc
443  ELSE
444  nqc0 = nqc
445  END IF
446 *
447 * Locally V( IOFFV ) is NPV x K C( IOFFC ) is MPC x NQC
448 * WORK( IPV ) is K x NQC0 = [ . V( IOFFV ) ]'
449 * WORK( IPW ) is NPV0 x K = [ . V( IOFFV )' ]'
450 * WORK( IPT ) is the workspace for PBDTRAN
451 *
452  ipv = 1
453  ipw = ipv + k * nqc0
454  ipt = ipw + npv0 * k
455  lv = max( 1, k )
456  lw = max( 1, npv0 )
457 *
458  IF( mycol.EQ.ivcol ) THEN
459  IF( myrow.EQ.ivrow ) THEN
460  CALL dlaset( 'All', iroffv, k, zero, zero,
461  $ work( ipw ), lw )
462  ipw1 = ipw + iroffv
463  CALL dlamov( 'All', npv, k, v( ioffv ), ldv,
464  $ work( ipw1 ), lw )
465  ELSE
466  ipw1 = ipw
467  CALL dlamov( 'All', npv, k, v( ioffv ), ldv,
468  $ work( ipw1 ), lw )
469  END IF
470 *
471  IF( forward ) THEN
472 *
473 * WORK(IPW) = ( . V1' V2' )' where V1 is unit lower
474 * triangular, zeroes upper triangular part of V1
475 *
476  mydist = mod( myrow-ivrow+nprow, nprow )
477  itop = max( 0, mydist*mbv - iroffv )
478  iibeg = iiv
479  iiend = iibeg + npv - 1
480  iinxt = min( iceil( iibeg, mbv )*mbv, iiend )
481 *
482  30 CONTINUE
483  IF( ( k-itop ).GT.0 ) THEN
484  CALL dlaset( 'Upper', iinxt-iibeg+1, k-itop, zero,
485  $ one, work( ipw1+iibeg-iiv+itop*lw ),
486  $ lw )
487  mydist = mydist + nprow
488  itop = mydist * mbv - iroffv
489  iibeg = iinxt + 1
490  iinxt = min( iinxt+mbv, iiend )
491  GO TO 30
492  END IF
493 *
494  ELSE
495 *
496 * WORK( IPW ) = ( . V1' V2' )' where V2 is unit upper
497 * triangular, zeroes lower triangular part of V2.
498 *
499  jj = jjv
500  CALL infog1l( iv+n-k, mbv, nprow, myrow,
501  $ descv( rsrc_ ), ii, ilastrow )
502  ioff = mod( iv+n-k-1, mbv )
503  kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
504  IF( myrow.EQ.ilastrow )
505  $ kp = kp - ioff
506  mydist = mod( myrow-ilastrow+nprow, nprow )
507  itop = mydist * mbv - ioff
508  ibase = min( itop+mbv, k )
509  itop = min( max( 0, itop ), k )
510 *
511  40 CONTINUE
512  IF( jj.LE.( jjv+k-1 ) ) THEN
513  height = ibase - itop
514  CALL dlaset( 'All', kp, itop-jj+jjv, zero, zero,
515  $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
516  CALL dlaset( 'Lower', kp, height, zero, one,
517  $ work( ipw1+ii-iiv+itop*lw ), lw )
518  kp = max( 0, kp - height )
519  ii = ii + height
520  jj = jjv + ibase
521  mydist = mydist + nprow
522  itop = mydist * mbv - ioff
523  ibase = min( itop + mbv, k )
524  itop = min( itop, k )
525  GO TO 40
526  END IF
527  END IF
528  END IF
529 *
530  CALL pbdtran( ictxt, 'Columnwise', 'Transpose', n+iroffv, k,
531  $ mbv, work( ipw ), lw, zero, work( ipv ), lv,
532  $ ivrow, ivcol, -1, iccol, work( ipt ) )
533 *
534 * WORK( IPV ) = ( . V' ) -> WORK( IPV ) = V' is K x NQC
535 *
536  IF( mycol.EQ.iccol )
537  $ ipv = ipv + icoffc * lv
538 *
539 * WORK( IPW ) becomes MPC x K = C( IOFFC ) * V
540 * WORK( IPW ) = C( IOFFC ) * V (MPC x NQC x K) -> MPC x K
541 *
542  lw = max( 1, mpc )
543 *
544  IF( nqc.GT.0 ) THEN
545  CALL dgemm( 'No transpose', 'Transpose', mpc, k, nqc,
546  $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
547  $ work( ipw ), lw )
548  ELSE
549  CALL dlaset( 'All', mpc, k, zero, zero, work( ipw ), lw )
550  END IF
551 *
552  CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
553  $ lw, myrow, ivcol )
554 *
555 * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
556 *
557  IF( mycol.EQ.ivcol ) THEN
558  IF( myrow.EQ.ivrow ) THEN
559 *
560 * Broadcast the block reflector to the other rows.
561 *
562  CALL dtrbs2d( ictxt, 'Columnwise', ' ', uplo,
563  $ 'Non unit', k, k, t, nbv )
564  ELSE
565  CALL dtrbr2d( ictxt, 'Columnwise', ' ', uplo,
566  $ 'Non unit', k, k, t, nbv, ivrow, mycol )
567  END IF
568  CALL dtrmm( 'Right', uplo, trans, 'Non unit', mpc, k,
569  $ one, t, nbv, work( ipw ), lw )
570 *
571  CALL dgebs2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
572  $ lw )
573  ELSE
574  CALL dgebr2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
575  $ lw, myrow, ivcol )
576  END IF
577 *
578 * C C - W * V'
579 * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
580 * MPC x NQC MPC x K K x NQC
581 *
582  CALL dgemm( 'No transpose', 'No transpose', mpc, nqc, k,
583  $ -one, work( ipw ), lw, work( ipv ), lv, one,
584  $ c( ioffc ), ldc )
585  END IF
586 *
587  ELSE
588 *
589 * V is stored rowwise
590 *
591  IF( lsame( side, 'L' ) ) THEN
592 *
593 * Form Q*sub( C ) or Q'*sub( C )
594 *
595 * IROFFC = ICOFFV is required by the current transposition
596 * routine PBDTRAN
597 *
598  mqv0 = numroc( m+icoffv, nbv, mycol, ivcol, npcol )
599  IF( mycol.EQ.ivcol ) THEN
600  mqv = mqv0 - icoffv
601  ELSE
602  mqv = mqv0
603  END IF
604  IF( myrow.EQ.icrow ) THEN
605  mpc0 = mpc + iroffc
606  ELSE
607  mpc0 = mpc
608  END IF
609 *
610 * Locally V( IOFFV ) is K x MQV, C( IOFFC ) is MPC x NQC
611 * WORK( IPV ) is MPC0 x K = [ . V( IOFFV ) ]'
612 * WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ]
613 * WORK( IPT ) is the workspace for PBDTRAN
614 *
615  ipv = 1
616  ipw = ipv + mpc0 * k
617  ipt = ipw + k * mqv0
618  lv = max( 1, mpc0 )
619  lw = max( 1, k )
620 *
621  IF( myrow.EQ.ivrow ) THEN
622  IF( mycol.EQ.ivcol ) THEN
623  CALL dlaset( 'All', k, icoffv, zero, zero,
624  $ work( ipw ), lw )
625  ipw1 = ipw + icoffv * lw
626  CALL dlamov( 'All', k, mqv, v( ioffv ), ldv,
627  $ work( ipw1 ), lw )
628  ELSE
629  ipw1 = ipw
630  CALL dlamov( 'All', k, mqv, v( ioffv ), ldv,
631  $ work( ipw1 ), lw )
632  END IF
633 *
634  IF( forward ) THEN
635 *
636 * WORK( IPW ) = ( . V1 V2 ) where V1 is unit upper
637 * triangular, zeroes lower triangular part of V1
638 *
639  mydist = mod( mycol-ivcol+npcol, npcol )
640  ileft = max( 0, mydist * nbv - icoffv )
641  jjbeg = jjv
642  jjend = jjv + mqv - 1
643  jjnxt = min( iceil( jjbeg, nbv ) * nbv, jjend )
644 *
645  50 CONTINUE
646  IF( ( k-ileft ).GT.0 ) THEN
647  CALL dlaset( 'Lower', k-ileft, jjnxt-jjbeg+1, zero,
648  $ one,
649  $ work( ipw1+ileft+(jjbeg-jjv)*lw ),
650  $ lw )
651  mydist = mydist + npcol
652  ileft = mydist * nbv - icoffv
653  jjbeg = jjnxt + 1
654  jjnxt = min( jjnxt+nbv, jjend )
655  GO TO 50
656  END IF
657 *
658  ELSE
659 *
660 * WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
661 * triangular, zeroes upper triangular part of V2.
662 *
663  ii = iiv
664  CALL infog1l( jv+m-k, nbv, npcol, mycol,
665  $ descv( csrc_ ), jj, ilastcol )
666  ioff = mod( jv+m-k-1, nbv )
667  kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
668  IF( mycol.EQ.ilastcol )
669  $ kq = kq - ioff
670  mydist = mod( mycol-ilastcol+npcol, npcol )
671  ileft = mydist * nbv - ioff
672  iright = min( ileft+nbv, k )
673  ileft = min( max( 0, ileft ), k )
674 *
675  60 CONTINUE
676  IF( ii.LE.( iiv+k-1 ) ) THEN
677  wide = iright - ileft
678  CALL dlaset( 'All', ileft-ii+iiv, kq, zero, zero,
679  $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
680  CALL dlaset( 'Upper', wide, kq, zero, one,
681  $ work( ipw1+ileft+(jj-jjv)*lw ), lw )
682  kq = max( 0, kq - wide )
683  ii = iiv + iright
684  jj = jj + wide
685  mydist = mydist + npcol
686  ileft = mydist * nbv - ioff
687  iright = min( ileft + nbv, k )
688  ileft = min( ileft, k )
689  GO TO 60
690  END IF
691  END IF
692  END IF
693 *
694 * WORK( IPV ) = WORK( IPW )' (replicated) is MPC0 x K
695 *
696  CALL pbdtran( ictxt, 'Rowwise', 'Transpose', k, m+icoffv,
697  $ nbv, work( ipw ), lw, zero, work( ipv ), lv,
698  $ ivrow, ivcol, icrow, -1, work( ipt ) )
699 *
700 * WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC x K
701 *
702  IF( myrow.EQ.icrow )
703  $ ipv = ipv + iroffc
704 *
705 * WORK( IPW ) becomes NQC x K = C( IOFFC )' * V'
706 * WORK( IPW ) = C( IOFFC )' * V' (NQC x MPC x K) -> NQC x K
707 *
708  lw = max( 1, nqc )
709 *
710  IF( mpc.GT.0 ) THEN
711  CALL dgemm( 'Transpose', 'No transpose', nqc, k, mpc,
712  $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
713  $ work( ipw ), lw )
714  ELSE
715  CALL dlaset( 'All', nqc, k, zero, zero, work( ipw ), lw )
716  END IF
717 *
718  CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc, k, work( ipw ),
719  $ lw, ivrow, mycol )
720 *
721 * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
722 *
723  IF( myrow.EQ.ivrow ) THEN
724  IF( mycol.EQ.ivcol ) THEN
725 *
726 * Broadcast the block reflector to the other columns.
727 *
728  CALL dtrbs2d( ictxt, 'Rowwise', ' ', uplo, 'Non unit',
729  $ k, k, t, mbv )
730  ELSE
731  CALL dtrbr2d( ictxt, 'Rowwise', ' ', uplo, 'Non unit',
732  $ k, k, t, mbv, myrow, ivcol )
733  END IF
734  CALL dtrmm( 'Right', uplo, transt, 'Non unit', nqc, k,
735  $ one, t, mbv, work( ipw ), lw )
736 *
737  CALL dgebs2d( ictxt, 'Columnwise', ' ', nqc, k,
738  $ work( ipw ), lw )
739  ELSE
740  CALL dgebr2d( ictxt, 'Columnwise', ' ', nqc, k,
741  $ work( ipw ), lw, ivrow, mycol )
742  END IF
743 *
744 * C C - V' * W'
745 * C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
746 * MPC x NQC MPC x K K x NQC
747 *
748  CALL dgemm( 'No transpose', 'Transpose', mpc, nqc, k, -one,
749  $ work( ipv ), lv, work( ipw ), lw, one,
750  $ c( ioffc ), ldc )
751 *
752  ELSE
753 *
754 * Form Q*sub( C ) or Q'*sub( C )
755 *
756 * Locally V( IOFFV ) is K x NQV, C( IOFFC ) is MPC x NQC
757 * WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC
758 * WORK( IPW ) is MPC x K = C( IOFFC ) * V( IOFFV )'
759 *
760  ipv = 1
761  ipw = ipv + k * nqc
762  lv = max( 1, k )
763  lw = max( 1, mpc )
764 *
765 * Broadcast V to the other process rows.
766 *
767  CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
768  IF( myrow.EQ.ivrow ) THEN
769  CALL dgebs2d( ictxt, 'Columnwise', colbtop, k, nqc,
770  $ v( ioffv ), ldv )
771  IF( mycol.EQ.ivcol )
772  $ CALL dtrbs2d( ictxt, 'Columnwise', colbtop, uplo,
773  $ 'Non unit', k, k, t, mbv )
774  CALL dlamov( 'All', k, nqc, v( ioffv ), ldv, work( ipv ),
775  $ lv )
776  ELSE
777  CALL dgebr2d( ictxt, 'Columnwise', colbtop, k, nqc,
778  $ work( ipv ), lv, ivrow, mycol )
779  IF( mycol.EQ.ivcol )
780  $ CALL dtrbr2d( ictxt, 'Columnwise', colbtop, uplo,
781  $ 'Non unit', k, k, t, mbv, ivrow, mycol )
782  END IF
783 *
784  IF( forward ) THEN
785 *
786 * WORK(IPW) = ( V1 V2 ) where V1 is unit upper
787 * triangular, zeroes lower triangular part of V1
788 *
789  mydist = mod( mycol-ivcol+npcol, npcol )
790  ileft = max( 0, mydist * nbv - icoffv )
791  jjbeg = jjv
792  jjend = jjv + nqc - 1
793  jjnxt = min( iceil( jjbeg, nbv ) * nbv, jjend )
794 *
795  70 CONTINUE
796  IF( ( k-ileft ).GT.0 ) THEN
797  CALL dlaset( 'Lower', k-ileft, jjnxt-jjbeg+1, zero,
798  $ one, work( ipv+ileft+(jjbeg-jjv)*lv ),
799  $ lv )
800  mydist = mydist + npcol
801  ileft = mydist * nbv - icoffv
802  jjbeg = jjnxt + 1
803  jjnxt = min( jjnxt+nbv, jjend )
804  GO TO 70
805  END IF
806 *
807  ELSE
808 *
809 * WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
810 * triangular, zeroes upper triangular part of V2.
811 *
812  ii = iiv
813  CALL infog1l( jv+n-k, nbv, npcol, mycol, descv( csrc_ ),
814  $ jj, ilastcol )
815  ioff = mod( jv+n-k-1, nbv )
816  kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
817  IF( mycol.EQ.ilastcol )
818  $ kq = kq - ioff
819  mydist = mod( mycol-ilastcol+npcol, npcol )
820  ileft = mydist * nbv - ioff
821  iright = min( ileft+nbv, k )
822  ileft = min( max( 0, ileft ), k )
823 *
824  80 CONTINUE
825  IF( ii.LE.( iiv+k-1 ) ) THEN
826  wide = iright - ileft
827  CALL dlaset( 'All', ileft-ii+iiv, kq, zero, zero,
828  $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
829  CALL dlaset( 'Upper', wide, kq, zero, one,
830  $ work( ipv+ileft+(jj-jjv)*lv ), lv )
831  kq = max( 0, kq - wide )
832  ii = iiv + iright
833  jj = jj + wide
834  mydist = mydist + npcol
835  ileft = mydist * nbv - ioff
836  iright = min( ileft + nbv, k )
837  ileft = min( ileft, k )
838  GO TO 80
839  END IF
840 *
841  END IF
842 *
843 * WORK( IPV ) is K x NQC = V = V( IOFFV )
844 * WORK( IPW ) = C( IOFFC ) * V' (MPC x NQC x K) -> MPC x K
845 *
846  IF( nqc.GT.0 ) THEN
847  CALL dgemm( 'No Transpose', 'Transpose', mpc, k, nqc,
848  $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
849  $ work( ipw ), lw )
850  ELSE
851  CALL dlaset( 'All', mpc, k, zero, zero, work( ipw ), lw )
852  END IF
853 *
854  CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
855  $ lw, myrow, ivcol )
856 *
857 * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
858 *
859  IF( mycol.EQ.ivcol ) THEN
860  CALL dtrmm( 'Right', uplo, trans, 'Non unit', mpc, k,
861  $ one, t, mbv, work( ipw ), lw )
862  CALL dgebs2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
863  $ lw )
864  ELSE
865  CALL dgebr2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
866  $ lw, myrow, ivcol )
867  END IF
868 *
869 * C C - W * V
870 * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
871 * MPC x NQC MPC x K K x NQC
872 *
873  CALL dgemm( 'No transpose', 'No transpose', mpc, nqc, k,
874  $ -one, work( ipw ), lw, work( ipv ), lv, one,
875  $ c( ioffc ), ldc )
876 *
877  END IF
878 *
879  END IF
880 *
881  RETURN
882 *
883 * End of PDLARFB
884 *
885  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pbdtran
subroutine pbdtran(ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK)
Definition: pbdtran.f:3
pdlarfb
subroutine pdlarfb(SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, JV, DESCV, T, C, IC, JC, DESCC, WORK)
Definition: pdlarfb.f:3
min
#define min(A, B)
Definition: pcgemr.c:181