ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzlarfb.f
Go to the documentation of this file.
1  SUBROUTINE pzlarfb( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV,
2  $ JV, DESCV, T, C, IC, JC, DESCC, WORK )
3 *
4 * -- ScaLAPACK routine (version 2.0.2) --
5 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6 * May 1 2012
7 *
8 * .. Scalar Arguments ..
9  CHARACTER SIDE, TRANS, DIRECT, STOREV
10  INTEGER IC, IV, JC, JV, K, M, N
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCC( * ), DESCV( * )
14  COMPLEX*16 C( * ), T( * ), V( * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PZLARFB applies a complex block reflector Q or its conjugate
21 * transpose Q**H to a complex M-by-N distributed matrix sub( C )
22 * denoting C(IC:IC+M-1,JC:JC+N-1), from the left or the right.
23 *
24 * Notes
25 * =====
26 *
27 * Each global data object is described by an associated description
28 * vector. This vector stores the information required to establish
29 * the mapping between an object element and its corresponding process
30 * and memory location.
31 *
32 * Let A be a generic term for any 2D block cyclicly distributed array.
33 * Such a global array has an associated description vector DESCA.
34 * In the following comments, the character _ should be read as
35 * "of the global array".
36 *
37 * NOTATION STORED IN EXPLANATION
38 * --------------- -------------- --------------------------------------
39 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
40 * DTYPE_A = 1.
41 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42 * the BLACS process grid A is distribu-
43 * ted over. The context itself is glo-
44 * bal, but the handle (the integer
45 * value) may vary.
46 * M_A (global) DESCA( M_ ) The number of rows in the global
47 * array A.
48 * N_A (global) DESCA( N_ ) The number of columns in the global
49 * array A.
50 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
51 * the rows of the array.
52 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
53 * the columns of the array.
54 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55 * row of the array A is distributed.
56 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57 * first column of the array A is
58 * distributed.
59 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
60 * array. LLD_A >= MAX(1,LOCr(M_A)).
61 *
62 * Let K be the number of rows or columns of a distributed matrix,
63 * and assume that its process grid has dimension p x q.
64 * LOCr( K ) denotes the number of elements of K that a process
65 * would receive if K were distributed over the p processes of its
66 * process column.
67 * Similarly, LOCc( K ) denotes the number of elements of K that a
68 * process would receive if K were distributed over the q processes of
69 * its process row.
70 * The values of LOCr() and LOCc() may be determined via a call to the
71 * ScaLAPACK tool function, NUMROC:
72 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74 * An upper bound for these quantities may be computed by:
75 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77 *
78 * Arguments
79 * =========
80 *
81 * SIDE (global input) CHARACTER
82 * = 'L': apply Q or Q**H from the Left;
83 * = 'R': apply Q or Q**H from the Right.
84 *
85 * TRANS (global input) CHARACTER
86 * = 'N': No transpose, apply Q;
87 * = 'C': Conjugate transpose, apply Q**H.
88 *
89 * DIRECT (global input) CHARACTER
90 * Indicates how Q is formed from a product of elementary
91 * reflectors
92 * = 'F': Q = H(1) H(2) . . . H(k) (Forward)
93 * = 'B': Q = H(k) . . . H(2) H(1) (Backward)
94 *
95 * STOREV (global input) CHARACTER
96 * Indicates how the vectors which define the elementary
97 * reflectors are stored:
98 * = 'C': Columnwise
99 * = 'R': Rowwise
100 *
101 * M (global input) INTEGER
102 * The number of rows to be operated on i.e the number of rows
103 * of the distributed submatrix sub( C ). M >= 0.
104 *
105 * N (global input) INTEGER
106 * The number of columns to be operated on i.e the number of
107 * columns of the distributed submatrix sub( C ). N >= 0.
108 *
109 * K (global input) INTEGER
110 * The order of the matrix T (= the number of elementary
111 * reflectors whose product defines the block reflector).
112 *
113 * V (local input) COMPLEX*16 pointer into the local memory
114 * to an array of dimension ( LLD_V, LOCc(JV+K-1) ) if
115 * STOREV = 'C', ( LLD_V, LOCc(JV+M-1)) if STOREV = 'R' and
116 * SIDE = 'L', ( LLD_V, LOCc(JV+N-1) ) if STOREV = 'R' and
117 * SIDE = 'R'. It contains the local pieces of the distributed
118 * vectors V representing the Householder transformation.
119 * See further details.
120 * If STOREV = 'C' and SIDE = 'L', LLD_V >= MAX(1,LOCr(IV+M-1));
121 * if STOREV = 'C' and SIDE = 'R', LLD_V >= MAX(1,LOCr(IV+N-1));
122 * if STOREV = 'R', LLD_V >= LOCr(IV+K-1).
123 *
124 * IV (global input) INTEGER
125 * The row index in the global array V indicating the first
126 * row of sub( V ).
127 *
128 * JV (global input) INTEGER
129 * The column index in the global array V indicating the
130 * first column of sub( V ).
131 *
132 * DESCV (global and local input) INTEGER array of dimension DLEN_.
133 * The array descriptor for the distributed matrix V.
134 *
135 * T (local input) COMPLEX*16 array, dimension MB_V by MB_V
136 * if STOREV = 'R' and NB_V by NB_V if STOREV = 'C'. The trian-
137 * gular matrix T in the representation of the block reflector.
138 *
139 * C (local input/local output) COMPLEX*16 pointer into the
140 * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
141 * On entry, the M-by-N distributed matrix sub( C ). On exit,
142 * sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or
143 * sub( C )*Q or sub( C )*Q'.
144 *
145 * IC (global input) INTEGER
146 * The row index in the global array C indicating the first
147 * row of sub( C ).
148 *
149 * JC (global input) INTEGER
150 * The column index in the global array C indicating the
151 * first column of sub( C ).
152 *
153 * DESCC (global and local input) INTEGER array of dimension DLEN_.
154 * The array descriptor for the distributed matrix C.
155 *
156 * WORK (local workspace) COMPLEX*16 array, dimension (LWORK)
157 * If STOREV = 'C',
158 * if SIDE = 'L',
159 * LWORK >= ( NqC0 + MpC0 ) * K
160 * else if SIDE = 'R',
161 * LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC,
162 * NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ),
163 * MpC0 ) ) * K
164 * end if
165 * else if STOREV = 'R',
166 * if SIDE = 'L',
167 * LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC,
168 * MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ),
169 * NqC0 ) ) * K
170 * else if SIDE = 'R',
171 * LWORK >= ( MpC0 + NqC0 ) * K
172 * end if
173 * end if
174 *
175 * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ),
176 *
177 * IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ),
178 * IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ),
179 * IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ),
180 * MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ),
181 * NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ),
182 *
183 * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
184 * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
185 * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
186 * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
187 * NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ),
188 * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
189 *
190 * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
191 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
192 * the subroutine BLACS_GRIDINFO.
193 *
194 * Alignment requirements
195 * ======================
196 *
197 * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
198 * must verify some alignment properties, namely the following
199 * expressions should be true:
200 *
201 * If STOREV = 'Columnwise'
202 * If SIDE = 'Left',
203 * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
204 * If SIDE = 'Right',
205 * ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
206 * else if STOREV = 'Rowwise'
207 * If SIDE = 'Left',
208 * ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
209 * If SIDE = 'Right',
210 * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
211 * end if
212 *
213 * =====================================================================
214 *
215 * .. Parameters ..
216  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
217  $ lld_, mb_, m_, nb_, n_, rsrc_
218  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
219  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
220  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
221  COMPLEX*16 ONE, ZERO
222  parameter( one = ( 1.0d+0, 0.0d+0 ),
223  $ zero = ( 0.0d+0, 0.0d+0 ) )
224 * ..
225 * .. Local Scalars ..
226  LOGICAL FORWARD
227  CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO
228  INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW,
229  $ ictxt, ii, iibeg, iic, iiend, iinxt, iiv,
230  $ ilastcol, ilastrow, ileft, ioff, ioffc, ioffv,
231  $ ipt, ipv, ipw, ipw1, iright, iroffc, iroffv,
232  $ itop, ivcol, ivrow, jj, jjbeg, jjc, jjend,
233  $ jjnxt, jjv, kp, kq, ldc, ldv, lv, lw, mbv, mpc,
234  $ mpc0, mqv, mqv0, mycol, mydist, myrow, nbv,
235  $ npv, npv0, npcol, nprow, nqc, nqc0, wide
236 * ..
237 * .. External Subroutines ..
238  EXTERNAL blacs_gridinfo, infog1l, infog2l, pb_topget,
239  $ pbztran, zgebr2d, zgebs2d, zgemm,
240  $ zgsum2d, zlamov, zlaset, ztrbr2d,
241  $ ztrbs2d, ztrmm
242 * ..
243 * .. Intrinsic Functions ..
244  INTRINSIC max, min, mod
245 * ..
246 * .. External Functions ..
247  LOGICAL LSAME
248  INTEGER ICEIL, NUMROC
249  EXTERNAL iceil, lsame, numroc
250 * ..
251 * .. Executable Statements ..
252 *
253 * Quick return if possible
254 *
255  IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
256  $ RETURN
257 *
258 * Get grid parameters
259 *
260  ictxt = descc( ctxt_ )
261  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
262 *
263  IF( lsame( trans, 'N' ) ) THEN
264  transt = 'C'
265  ELSE
266  transt = 'N'
267  END IF
268  forward = lsame( direct, 'F' )
269  IF( forward ) THEN
270  uplo = 'U'
271  ELSE
272  uplo = 'L'
273  END IF
274 *
275  CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
276  $ ivrow, ivcol )
277  CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
278  $ icrow, iccol )
279  ldc = descc( lld_ )
280  ldv = descv( lld_ )
281  iic = min( iic, ldc )
282  iiv = min( iiv, ldv )
283  iroffc = mod( ic-1, descc( mb_ ) )
284  icoffc = mod( jc-1, descc( nb_ ) )
285  mbv = descv( mb_ )
286  nbv = descv( nb_ )
287  iroffv = mod( iv-1, mbv )
288  icoffv = mod( jv-1, nbv )
289  mpc = numroc( m+iroffc, descc( mb_ ), myrow, icrow, nprow )
290  nqc = numroc( n+icoffc, descc( nb_ ), mycol, iccol, npcol )
291  IF( mycol.EQ.iccol )
292  $ nqc = nqc - icoffc
293  IF( myrow.EQ.icrow )
294  $ mpc = mpc - iroffc
295  jjc = min( jjc, max( 1, jjc+nqc-1 ) )
296  jjv = min( jjv, max( 1, numroc( descv( n_ ), nbv, mycol,
297  $ descv( csrc_ ), npcol ) ) )
298  ioffc = iic + ( jjc-1 ) * ldc
299  ioffv = iiv + ( jjv-1 ) * ldv
300 *
301  IF( lsame( storev, 'C' ) ) THEN
302 *
303 * V is stored columnwise
304 *
305  IF( lsame( side, 'L' ) ) THEN
306 *
307 * Form Q*sub( C ) or Q'*sub( C )
308 *
309 * Locally V( IOFFV ) is MPV x K, C( IOFFC ) is MPC x NQC
310 * WORK( IPV ) is MPC x K = V( IOFFV ), MPC = MPV
311 * WORK( IPW ) is NQC x K = C( IOFFC )' * V( IOFFV )
312 *
313  ipv = 1
314  ipw = ipv + mpc * k
315  lv = max( 1, mpc )
316  lw = max( 1, nqc )
317 *
318 * Broadcast V to the other process columns.
319 *
320  CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
321  IF( mycol.EQ.ivcol ) THEN
322  CALL zgebs2d( ictxt, 'Rowwise', rowbtop, mpc, k,
323  $ v( ioffv ), ldv )
324  IF( myrow.EQ.ivrow )
325  $ CALL ztrbs2d( ictxt, 'Rowwise', rowbtop, uplo,
326  $ 'Non unit', k, k, t, nbv )
327  CALL zlamov( 'All', mpc, k, v( ioffv ), ldv, work( ipv ),
328  $ lv )
329  ELSE
330  CALL zgebr2d( ictxt, 'Rowwise', rowbtop, mpc, k,
331  $ work( ipv ), lv, myrow, ivcol )
332  IF( myrow.EQ.ivrow )
333  $ CALL ztrbr2d( ictxt, 'Rowwise', rowbtop, uplo,
334  $ 'Non unit', k, k, t, nbv, myrow, ivcol )
335  END IF
336 *
337  IF( forward ) THEN
338 *
339 * WORK(IPV) = ( V1 ) where V1 is unit lower triangular,
340 * ( V2 ) zeroes upper triangular part of V1
341 *
342  mydist = mod( myrow-ivrow+nprow, nprow )
343  itop = max( 0, mydist*mbv - iroffv )
344  iibeg = iiv
345  iiend = iibeg + mpc - 1
346  iinxt = min( iceil( iibeg, mbv )*mbv, iiend )
347 *
348  10 CONTINUE
349  IF( k-itop .GT.0 ) THEN
350  CALL zlaset( 'Upper', iinxt-iibeg+1, k-itop, zero,
351  $ one, work( ipv+iibeg-iiv+itop*lv ), lv )
352  mydist = mydist + nprow
353  itop = mydist * mbv - iroffv
354  iibeg = iinxt + 1
355  iinxt = min( iinxt+mbv, iiend )
356  GO TO 10
357  END IF
358 *
359  ELSE
360 *
361 * WORK(IPV) = ( V1 ) where V2 is unit upper triangular,
362 * ( V2 ) zeroes lower triangular part of V2
363 *
364  jj = jjv
365  ioff = mod( iv+m-k-1, mbv )
366  CALL infog1l( iv+m-k, mbv, nprow, myrow, descv( rsrc_ ),
367  $ ii, ilastrow )
368  kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
369  IF( myrow.EQ.ilastrow )
370  $ kp = kp - ioff
371  mydist = mod( myrow-ilastrow+nprow, nprow )
372  itop = mydist * mbv - ioff
373  ibase = min( itop+mbv, k )
374  itop = min( max( 0, itop ), k )
375 *
376  20 CONTINUE
377  IF( jj.LE.( jjv+k-1 ) ) THEN
378  height = ibase - itop
379  CALL zlaset( 'All', kp, itop-jj+jjv, zero, zero,
380  $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
381  CALL zlaset( 'Lower', kp, height, zero, one,
382  $ work( ipv+ii-iiv+itop*lv ), lv )
383  kp = max( 0, kp - height )
384  ii = ii + height
385  jj = jjv + ibase
386  mydist = mydist + nprow
387  itop = mydist * mbv - ioff
388  ibase = min( itop + mbv, k )
389  itop = min( itop, k )
390  GO TO 20
391  END IF
392 *
393  END IF
394 *
395 * WORK( IPW ) = C( IOFFC )' * V (NQC x MPC x K) -> NQC x K
396 *
397  IF( mpc.GT.0 ) THEN
398  CALL zgemm( 'Conjugate transpose', 'No transpose', nqc,
399  $ k, mpc, one, c( ioffc ), ldc, work( ipv ), lv,
400  $ zero, work( ipw ), lw )
401  ELSE
402  CALL zlaset( 'All', nqc, k, zero, zero, work( ipw ), lw )
403  END IF
404 *
405  CALL zgsum2d( ictxt, 'Columnwise', ' ', nqc, k, work( ipw ),
406  $ lw, ivrow, mycol )
407 *
408  IF( myrow.EQ.ivrow ) THEN
409 *
410 * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
411 *
412  CALL ztrmm( 'Right', uplo, transt, 'Non unit', nqc, k,
413  $ one, t, nbv, work( ipw ), lw )
414  CALL zgebs2d( ictxt, 'Columnwise', ' ', nqc, k,
415  $ work( ipw ), lw )
416  ELSE
417  CALL zgebr2d( ictxt, 'Columnwise', ' ', nqc, k,
418  $ work( ipw ), lw, ivrow, mycol )
419  END IF
420 *
421 * C C - V * W'
422 * C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
423 * MPC x NQC MPC x K K x NQC
424 *
425  CALL zgemm( 'No transpose', 'Conjugate transpose', mpc, nqc,
426  $ k, -one, work( ipv ), lv, work( ipw ), lw, one,
427  $ c( ioffc ), ldc )
428 *
429  ELSE
430 *
431 * Form sub( C )*Q or sub( C )*Q'
432 *
433 * ICOFFC = IROFFV is required by the current transposition
434 * routine PBZTRAN
435 *
436  npv0 = numroc( n+iroffv, mbv, myrow, ivrow, nprow )
437  IF( myrow.EQ.ivrow ) THEN
438  npv = npv0 - iroffv
439  ELSE
440  npv = npv0
441  END IF
442  IF( mycol.EQ.iccol ) THEN
443  nqc0 = nqc + icoffc
444  ELSE
445  nqc0 = nqc
446  END IF
447 *
448 * Locally V( IOFFV ) is NPV x K C( IOFFC ) is MPC x NQC
449 * WORK( IPV ) is K x NQC0 = [ . V( IOFFV ) ]'
450 * WORK( IPW ) is NPV0 x K = [ . V( IOFFV )' ]'
451 * WORK( IPT ) is the workspace for PBZTRAN
452 *
453  ipv = 1
454  ipw = ipv + k * nqc0
455  ipt = ipw + npv0 * k
456  lv = max( 1, k )
457  lw = max( 1, npv0 )
458 *
459  IF( mycol.EQ.ivcol ) THEN
460  IF( myrow.EQ.ivrow ) THEN
461  CALL zlaset( 'All', iroffv, k, zero, zero,
462  $ work( ipw ), lw )
463  ipw1 = ipw + iroffv
464  CALL zlamov( 'All', npv, k, v( ioffv ), ldv,
465  $ work( ipw1 ), lw )
466  ELSE
467  ipw1 = ipw
468  CALL zlamov( 'All', npv, k, v( ioffv ), ldv,
469  $ work( ipw1 ), lw )
470  END IF
471 *
472  IF( forward ) THEN
473 *
474 * WORK(IPW) = ( . V1' V2' )' where V1 is unit lower
475 * triangular, zeroes upper triangular part of V1
476 *
477  mydist = mod( myrow-ivrow+nprow, nprow )
478  itop = max( 0, mydist*mbv - iroffv )
479  iibeg = iiv
480  iiend = iibeg + npv - 1
481  iinxt = min( iceil( iibeg, mbv )*mbv, iiend )
482 *
483  30 CONTINUE
484  IF( ( k-itop ).GT.0 ) THEN
485  CALL zlaset( 'Upper', iinxt-iibeg+1, k-itop, zero,
486  $ one, work( ipw1+iibeg-iiv+itop*lw ),
487  $ lw )
488  mydist = mydist + nprow
489  itop = mydist * mbv - iroffv
490  iibeg = iinxt + 1
491  iinxt = min( iinxt+mbv, iiend )
492  GO TO 30
493  END IF
494 *
495  ELSE
496 *
497 * WORK( IPW ) = ( . V1' V2' )' where V2 is unit upper
498 * triangular, zeroes lower triangular part of V2.
499 *
500  jj = jjv
501  CALL infog1l( iv+n-k, mbv, nprow, myrow,
502  $ descv( rsrc_ ), ii, ilastrow )
503  ioff = mod( iv+n-k-1, mbv )
504  kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
505  IF( myrow.EQ.ilastrow )
506  $ kp = kp - ioff
507  mydist = mod( myrow-ilastrow+nprow, nprow )
508  itop = mydist * mbv - ioff
509  ibase = min( itop+mbv, k )
510  itop = min( max( 0, itop ), k )
511 *
512  40 CONTINUE
513  IF( jj.LE.( jjv+k-1 ) ) THEN
514  height = ibase - itop
515  CALL zlaset( 'All', kp, itop-jj+jjv, zero, zero,
516  $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
517  CALL zlaset( 'Lower', kp, height, zero, one,
518  $ work( ipw1+ii-iiv+itop*lw ), lw )
519  kp = max( 0, kp - height )
520  ii = ii + height
521  jj = jjv + ibase
522  mydist = mydist + nprow
523  itop = mydist * mbv - ioff
524  ibase = min( itop + mbv, k )
525  itop = min( itop, k )
526  GO TO 40
527  END IF
528  END IF
529  END IF
530 *
531  CALL pbztran( ictxt, 'Columnwise', 'Conjugate transpose',
532  $ n+iroffv, k, mbv, work( ipw ), lw, zero,
533  $ work( ipv ), lv, ivrow, ivcol, -1, iccol,
534  $ work( ipt ) )
535 *
536 * WORK( IPV ) = ( . V' ) -> WORK( IPV ) = V' is K x NQC
537 *
538  IF( mycol.EQ.iccol )
539  $ ipv = ipv + icoffc * lv
540 *
541 * WORK( IPW ) becomes MPC x K = C( IOFFC ) * V
542 * WORK( IPW ) = C( IOFFC ) * V (MPC x NQC x K) -> MPC x K
543 *
544  lw = max( 1, mpc )
545 *
546  IF( nqc.GT.0 ) THEN
547  CALL zgemm( 'No transpose', 'Conjugate transpose', mpc,
548  $ k, nqc, one, c( ioffc ), ldc, work( ipv ),
549  $ lv, zero, work( ipw ), lw )
550  ELSE
551  CALL zlaset( 'All', mpc, k, zero, zero, work( ipw ), lw )
552  END IF
553 *
554  CALL zgsum2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
555  $ lw, myrow, ivcol )
556 *
557 * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
558 *
559  IF( mycol.EQ.ivcol ) THEN
560  IF( myrow.EQ.ivrow ) THEN
561 *
562 * Broadcast the block reflector to the other rows.
563 *
564  CALL ztrbs2d( ictxt, 'Columnwise', ' ', uplo,
565  $ 'Non unit', k, k, t, nbv )
566  ELSE
567  CALL ztrbr2d( ictxt, 'Columnwise', ' ', uplo,
568  $ 'Non unit', k, k, t, nbv, ivrow, mycol )
569  END IF
570  CALL ztrmm( 'Right', uplo, trans, 'Non unit', mpc, k,
571  $ one, t, nbv, work( ipw ), lw )
572 *
573  CALL zgebs2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
574  $ lw )
575  ELSE
576  CALL zgebr2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
577  $ lw, myrow, ivcol )
578  END IF
579 *
580 * C C - W * V'
581 * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
582 * MPC x NQC MPC x K K x NQC
583 *
584  CALL zgemm( 'No transpose', 'No transpose', mpc, nqc, k,
585  $ -one, work( ipw ), lw, work( ipv ), lv, one,
586  $ c( ioffc ), ldc )
587  END IF
588 *
589  ELSE
590 *
591 * V is stored rowwise
592 *
593  IF( lsame( side, 'L' ) ) THEN
594 *
595 * Form Q*sub( C ) or Q'*sub( C )
596 *
597 * IROFFC = ICOFFV is required by the current transposition
598 * routine PBZTRAN
599 *
600  mqv0 = numroc( m+icoffv, nbv, mycol, ivcol, npcol )
601  IF( mycol.EQ.ivcol ) THEN
602  mqv = mqv0 - icoffv
603  ELSE
604  mqv = mqv0
605  END IF
606  IF( myrow.EQ.icrow ) THEN
607  mpc0 = mpc + iroffc
608  ELSE
609  mpc0 = mpc
610  END IF
611 *
612 * Locally V( IOFFV ) is K x MQV, C( IOFFC ) is MPC x NQC
613 * WORK( IPV ) is MPC0 x K = [ . V( IOFFV ) ]'
614 * WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ]
615 * WORK( IPT ) is the workspace for PBZTRAN
616 *
617  ipv = 1
618  ipw = ipv + mpc0 * k
619  ipt = ipw + k * mqv0
620  lv = max( 1, mpc0 )
621  lw = max( 1, k )
622 *
623  IF( myrow.EQ.ivrow ) THEN
624  IF( mycol.EQ.ivcol ) THEN
625  CALL zlaset( 'All', k, icoffv, zero, zero,
626  $ work( ipw ), lw )
627  ipw1 = ipw + icoffv * lw
628  CALL zlamov( 'All', k, mqv, v( ioffv ), ldv,
629  $ work( ipw1 ), lw )
630  ELSE
631  ipw1 = ipw
632  CALL zlamov( 'All', k, mqv, v( ioffv ), ldv,
633  $ work( ipw1 ), lw )
634  END IF
635 *
636  IF( forward ) THEN
637 *
638 * WORK( IPW ) = ( . V1 V2 ) where V1 is unit upper
639 * triangular, zeroes lower triangular part of V1
640 *
641  mydist = mod( mycol-ivcol+npcol, npcol )
642  ileft = max( 0, mydist * nbv - icoffv )
643  jjbeg = jjv
644  jjend = jjv + mqv - 1
645  jjnxt = min( iceil( jjbeg, nbv ) * nbv, jjend )
646 *
647  50 CONTINUE
648  IF( ( k-ileft ).GT.0 ) THEN
649  CALL zlaset( 'Lower', k-ileft, jjnxt-jjbeg+1, zero,
650  $ one,
651  $ work( ipw1+ileft+(jjbeg-jjv)*lw ),
652  $ lw )
653  mydist = mydist + npcol
654  ileft = mydist * nbv - icoffv
655  jjbeg = jjnxt + 1
656  jjnxt = min( jjnxt+nbv, jjend )
657  GO TO 50
658  END IF
659 *
660  ELSE
661 *
662 * WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
663 * triangular, zeroes upper triangular part of V2.
664 *
665  ii = iiv
666  CALL infog1l( jv+m-k, nbv, npcol, mycol,
667  $ descv( csrc_ ), jj, ilastcol )
668  ioff = mod( jv+m-k-1, nbv )
669  kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
670  IF( mycol.EQ.ilastcol )
671  $ kq = kq - ioff
672  mydist = mod( mycol-ilastcol+npcol, npcol )
673  ileft = mydist * nbv - ioff
674  iright = min( ileft+nbv, k )
675  ileft = min( max( 0, ileft ), k )
676 *
677  60 CONTINUE
678  IF( ii.LE.( iiv+k-1 ) ) THEN
679  wide = iright - ileft
680  CALL zlaset( 'All', ileft-ii+iiv, kq, zero, zero,
681  $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
682  CALL zlaset( 'Upper', wide, kq, zero, one,
683  $ work( ipw1+ileft+(jj-jjv)*lw ), lw )
684  kq = max( 0, kq - wide )
685  ii = iiv + iright
686  jj = jj + wide
687  mydist = mydist + npcol
688  ileft = mydist * nbv - ioff
689  iright = min( ileft + nbv, k )
690  ileft = min( ileft, k )
691  GO TO 60
692  END IF
693  END IF
694  END IF
695 *
696 * WORK( IPV ) = WORK( IPW )' (replicated) is MPC0 x K
697 *
698  CALL pbztran( ictxt, 'Rowwise', 'Conjugate transpose', k,
699  $ m+icoffv, nbv, work( ipw ), lw, zero,
700  $ work( ipv ), lv, ivrow, ivcol, icrow, -1,
701  $ work( ipt ) )
702 *
703 * WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC x K
704 *
705  IF( myrow.EQ.icrow )
706  $ ipv = ipv + iroffc
707 *
708 * WORK( IPW ) becomes NQC x K = C( IOFFC )' * V'
709 * WORK( IPW ) = C( IOFFC )' * V' (NQC x MPC x K) -> NQC x K
710 *
711  lw = max( 1, nqc )
712 *
713  IF( mpc.GT.0 ) THEN
714  CALL zgemm( 'Conjugate transpose', 'No transpose', nqc,
715  $ k, mpc, one, c( ioffc ), ldc, work( ipv ),
716  $ lv, zero, work( ipw ), lw )
717  ELSE
718  CALL zlaset( 'All', nqc, k, zero, zero, work( ipw ), lw )
719  END IF
720 *
721  CALL zgsum2d( ictxt, 'Columnwise', ' ', nqc, k, work( ipw ),
722  $ lw, ivrow, mycol )
723 *
724 * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
725 *
726  IF( myrow.EQ.ivrow ) THEN
727  IF( mycol.EQ.ivcol ) THEN
728 *
729 * Broadcast the block reflector to the other columns.
730 *
731  CALL ztrbs2d( ictxt, 'Rowwise', ' ', uplo, 'Non unit',
732  $ k, k, t, mbv )
733  ELSE
734  CALL ztrbr2d( ictxt, 'Rowwise', ' ', uplo, 'Non unit',
735  $ k, k, t, mbv, myrow, ivcol )
736  END IF
737  CALL ztrmm( 'Right', uplo, transt, 'Non unit', nqc, k,
738  $ one, t, mbv, work( ipw ), lw )
739 *
740  CALL zgebs2d( ictxt, 'Columnwise', ' ', nqc, k,
741  $ work( ipw ), lw )
742  ELSE
743  CALL zgebr2d( ictxt, 'Columnwise', ' ', nqc, k,
744  $ work( ipw ), lw, ivrow, mycol )
745  END IF
746 *
747 * C C - V' * W'
748 * C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
749 * MPC x NQC MPC x K K x NQC
750 *
751  CALL zgemm( 'No transpose', 'Conjugate transpose', mpc, nqc,
752  $ k, -one, work( ipv ), lv, work( ipw ), lw, one,
753  $ c( ioffc ), ldc )
754 *
755  ELSE
756 *
757 * Form Q*sub( C ) or Q'*sub( C )
758 *
759 * Locally V( IOFFV ) is K x NQV, C( IOFFC ) is MPC x NQC
760 * WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC
761 * WORK( IPW ) is MPC x K = C( IOFFC ) * V( IOFFV )'
762 *
763  ipv = 1
764  ipw = ipv + k * nqc
765  lv = max( 1, k )
766  lw = max( 1, mpc )
767 *
768 * Broadcast V to the other process rows.
769 *
770  CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
771  IF( myrow.EQ.ivrow ) THEN
772  CALL zgebs2d( ictxt, 'Columnwise', colbtop, k, nqc,
773  $ v( ioffv ), ldv )
774  IF( mycol.EQ.ivcol )
775  $ CALL ztrbs2d( ictxt, 'Columnwise', colbtop, uplo,
776  $ 'Non unit', k, k, t, mbv )
777  CALL zlamov( 'All', k, nqc, v( ioffv ), ldv, work( ipv ),
778  $ lv )
779  ELSE
780  CALL zgebr2d( ictxt, 'Columnwise', colbtop, k, nqc,
781  $ work( ipv ), lv, ivrow, mycol )
782  IF( mycol.EQ.ivcol )
783  $ CALL ztrbr2d( ictxt, 'Columnwise', colbtop, uplo,
784  $ 'Non unit', k, k, t, mbv, ivrow, mycol )
785  END IF
786 *
787  IF( forward ) THEN
788 *
789 * WORK(IPW) = ( V1 V2 ) where V1 is unit upper
790 * triangular, zeroes lower triangular part of V1
791 *
792  mydist = mod( mycol-ivcol+npcol, npcol )
793  ileft = max( 0, mydist * nbv - icoffv )
794  jjbeg = jjv
795  jjend = jjv + nqc - 1
796  jjnxt = min( iceil( jjbeg, nbv ) * nbv, jjend )
797 *
798  70 CONTINUE
799  IF( ( k-ileft ).GT.0 ) THEN
800  CALL zlaset( 'Lower', k-ileft, jjnxt-jjbeg+1, zero,
801  $ one, work( ipv+ileft+(jjbeg-jjv)*lv ),
802  $ lv )
803  mydist = mydist + npcol
804  ileft = mydist * nbv - icoffv
805  jjbeg = jjnxt + 1
806  jjnxt = min( jjnxt+nbv, jjend )
807  GO TO 70
808  END IF
809 *
810  ELSE
811 *
812 * WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
813 * triangular, zeroes upper triangular part of V2.
814 *
815  ii = iiv
816  CALL infog1l( jv+n-k, nbv, npcol, mycol, descv( csrc_ ),
817  $ jj, ilastcol )
818  ioff = mod( jv+n-k-1, nbv )
819  kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
820  IF( mycol.EQ.ilastcol )
821  $ kq = kq - ioff
822  mydist = mod( mycol-ilastcol+npcol, npcol )
823  ileft = mydist * nbv - ioff
824  iright = min( ileft+nbv, k )
825  ileft = min( max( 0, ileft ), k )
826 *
827  80 CONTINUE
828  IF( ii.LE.( iiv+k-1 ) ) THEN
829  wide = iright - ileft
830  CALL zlaset( 'All', ileft-ii+iiv, kq, zero, zero,
831  $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
832  CALL zlaset( 'Upper', wide, kq, zero, one,
833  $ work( ipv+ileft+(jj-jjv)*lv ), lv )
834  kq = max( 0, kq - wide )
835  ii = iiv + iright
836  jj = jj + wide
837  mydist = mydist + npcol
838  ileft = mydist * nbv - ioff
839  iright = min( ileft + nbv, k )
840  ileft = min( ileft, k )
841  GO TO 80
842  END IF
843 *
844  END IF
845 *
846 * WORK( IPV ) is K x NQC = V = V( IOFFV )
847 * WORK( IPW ) = C( IOFFC ) * V' (MPC x NQC x K) -> MPC x K
848 *
849  IF( nqc.GT.0 ) THEN
850  CALL zgemm( 'No transpose', 'Conjugate transpose', mpc,
851  $ k, nqc, one, c( ioffc ), ldc, work( ipv ),
852  $ lv, zero, work( ipw ), lw )
853  ELSE
854  CALL zlaset( 'All', mpc, k, zero, zero, work( ipw ), lw )
855  END IF
856 *
857  CALL zgsum2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
858  $ lw, myrow, ivcol )
859 *
860 * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
861 *
862  IF( mycol.EQ.ivcol ) THEN
863  CALL ztrmm( 'Right', uplo, trans, 'Non unit', mpc, k,
864  $ one, t, mbv, work( ipw ), lw )
865  CALL zgebs2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
866  $ lw )
867  ELSE
868  CALL zgebr2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
869  $ lw, myrow, ivcol )
870  END IF
871 *
872 * C C - W * V
873 * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
874 * MPC x NQC MPC x K K x NQC
875 *
876  CALL zgemm( 'No transpose', 'No transpose', mpc, nqc, k,
877  $ -one, work( ipw ), lw, work( ipv ), lv, one,
878  $ c( ioffc ), ldc )
879 *
880  END IF
881 *
882  END IF
883 *
884  RETURN
885 *
886 * End of PZLARFB
887 *
888  END
pzlarfb
subroutine pzlarfb(SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, JV, DESCV, T, C, IC, JC, DESCC, WORK)
Definition: pzlarfb.f:3
pbztran
subroutine pbztran(ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK)
Definition: pbztran.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
min
#define min(A, B)
Definition: pcgemr.c:181