SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pclarfb.f
Go to the documentation of this file.
1 SUBROUTINE pclarfb( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV,
2 $ JV, DESCV, T, C, IC, JC, DESCC, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER SIDE, TRANS, DIRECT, STOREV
10 INTEGER IC, IV, JC, JV, K, M, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCC( * ), DESCV( * )
14 COMPLEX C( * ), T( * ), V( * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PCLARFB applies a complex block reflector Q or its conjugate
21* transpose Q**H to a complex M-by-N distributed matrix sub( C )
22* denoting C(IC:IC+M-1,JC:JC+N-1), from the left or the right.
23*
24* Notes
25* =====
26*
27* Each global data object is described by an associated description
28* vector. This vector stores the information required to establish
29* the mapping between an object element and its corresponding process
30* and memory location.
31*
32* Let A be a generic term for any 2D block cyclicly distributed array.
33* Such a global array has an associated description vector DESCA.
34* In the following comments, the character _ should be read as
35* "of the global array".
36*
37* NOTATION STORED IN EXPLANATION
38* --------------- -------------- --------------------------------------
39* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
40* DTYPE_A = 1.
41* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42* the BLACS process grid A is distribu-
43* ted over. The context itself is glo-
44* bal, but the handle (the integer
45* value) may vary.
46* M_A (global) DESCA( M_ ) The number of rows in the global
47* array A.
48* N_A (global) DESCA( N_ ) The number of columns in the global
49* array A.
50* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
51* the rows of the array.
52* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
53* the columns of the array.
54* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55* row of the array A is distributed.
56* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57* first column of the array A is
58* distributed.
59* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
60* array. LLD_A >= MAX(1,LOCr(M_A)).
61*
62* Let K be the number of rows or columns of a distributed matrix,
63* and assume that its process grid has dimension p x q.
64* LOCr( K ) denotes the number of elements of K that a process
65* would receive if K were distributed over the p processes of its
66* process column.
67* Similarly, LOCc( K ) denotes the number of elements of K that a
68* process would receive if K were distributed over the q processes of
69* its process row.
70* The values of LOCr() and LOCc() may be determined via a call to the
71* ScaLAPACK tool function, NUMROC:
72* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74* An upper bound for these quantities may be computed by:
75* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77*
78* Arguments
79* =========
80*
81* SIDE (global input) CHARACTER
82* = 'L': apply Q or Q**H from the Left;
83* = 'R': apply Q or Q**H from the Right.
84*
85* TRANS (global input) CHARACTER
86* = 'N': No transpose, apply Q;
87* = 'C': Conjugate transpose, apply Q**H.
88*
89* DIRECT (global input) CHARACTER
90* Indicates how Q is formed from a product of elementary
91* reflectors
92* = 'F': Q = H(1) H(2) . . . H(k) (Forward)
93* = 'B': Q = H(k) . . . H(2) H(1) (Backward)
94*
95* STOREV (global input) CHARACTER
96* Indicates how the vectors which define the elementary
97* reflectors are stored:
98* = 'C': Columnwise
99* = 'R': Rowwise
100*
101* M (global input) INTEGER
102* The number of rows to be operated on i.e the number of rows
103* of the distributed submatrix sub( C ). M >= 0.
104*
105* N (global input) INTEGER
106* The number of columns to be operated on i.e the number of
107* columns of the distributed submatrix sub( C ). N >= 0.
108*
109* K (global input) INTEGER
110* The order of the matrix T (= the number of elementary
111* reflectors whose product defines the block reflector).
112*
113* V (local input) COMPLEX pointer into the local memory
114* to an array of dimension ( LLD_V, LOCc(JV+K-1) ) if
115* STOREV = 'C', ( LLD_V, LOCc(JV+M-1)) if STOREV = 'R' and
116* SIDE = 'L', ( LLD_V, LOCc(JV+N-1) ) if STOREV = 'R' and
117* SIDE = 'R'. It contains the local pieces of the distributed
118* vectors V representing the Householder transformation.
119* See further details.
120* If STOREV = 'C' and SIDE = 'L', LLD_V >= MAX(1,LOCr(IV+M-1));
121* if STOREV = 'C' and SIDE = 'R', LLD_V >= MAX(1,LOCr(IV+N-1));
122* if STOREV = 'R', LLD_V >= LOCr(IV+K-1).
123*
124* IV (global input) INTEGER
125* The row index in the global array V indicating the first
126* row of sub( V ).
127*
128* JV (global input) INTEGER
129* The column index in the global array V indicating the
130* first column of sub( V ).
131*
132* DESCV (global and local input) INTEGER array of dimension DLEN_.
133* The array descriptor for the distributed matrix V.
134*
135* T (local input) COMPLEX array, dimension MB_V by MB_V
136* if STOREV = 'R' and NB_V by NB_V if STOREV = 'C'. The trian-
137* gular matrix T in the representation of the block reflector.
138*
139* C (local input/local output) COMPLEX pointer into the
140* local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
141* On entry, the M-by-N distributed matrix sub( C ). On exit,
142* sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or
143* sub( C )*Q or sub( C )*Q'.
144*
145* IC (global input) INTEGER
146* The row index in the global array C indicating the first
147* row of sub( C ).
148*
149* JC (global input) INTEGER
150* The column index in the global array C indicating the
151* first column of sub( C ).
152*
153* DESCC (global and local input) INTEGER array of dimension DLEN_.
154* The array descriptor for the distributed matrix C.
155*
156* WORK (local workspace) COMPLEX array, dimension (LWORK)
157* If STOREV = 'C',
158* if SIDE = 'L',
159* LWORK >= ( NqC0 + MpC0 ) * K
160* else if SIDE = 'R',
161* LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC,
162* NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ),
163* MpC0 ) ) * K
164* end if
165* else if STOREV = 'R',
166* if SIDE = 'L',
167* LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC,
168* MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ),
169* NqC0 ) ) * K
170* else if SIDE = 'R',
171* LWORK >= ( MpC0 + NqC0 ) * K
172* end if
173* end if
174*
175* where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ),
176*
177* IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ),
178* IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ),
179* IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ),
180* MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ),
181* NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ),
182*
183* IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
184* ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
185* ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
186* MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
187* NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ),
188* NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
189*
190* ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
191* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
192* the subroutine BLACS_GRIDINFO.
193*
194* Alignment requirements
195* ======================
196*
197* The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
198* must verify some alignment properties, namely the following
199* expressions should be true:
200*
201* If STOREV = 'Columnwise'
202* If SIDE = 'Left',
203* ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
204* If SIDE = 'Right',
205* ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
206* else if STOREV = 'Rowwise'
207* If SIDE = 'Left',
208* ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
209* If SIDE = 'Right',
210* ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
211* end if
212*
213* =====================================================================
214*
215* .. Parameters ..
216 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
217 $ lld_, mb_, m_, nb_, n_, rsrc_
218 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
219 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
220 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
221 COMPLEX ONE, ZERO
222 parameter( one = ( 1.0e+0, 0.0e+0 ),
223 $ zero = ( 0.0e+0, 0.0e+0 ) )
224* ..
225* .. Local Scalars ..
226 LOGICAL FORWARD
227 CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO
228 INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW,
229 $ ictxt, ii, iibeg, iic, iiend, iinxt, iiv,
230 $ ilastcol, ilastrow, ileft, ioff, ioffc, ioffv,
231 $ ipt, ipv, ipw, ipw1, iright, iroffc, iroffv,
232 $ itop, ivcol, ivrow, jj, jjbeg, jjc, jjend,
233 $ jjnxt, jjv, kp, kq, ldc, ldv, lv, lw, mbv, mpc,
234 $ mpc0, mqv, mqv0, mycol, mydist, myrow, nbv,
235 $ npv, npv0, npcol, nprow, nqc, nqc0, wide
236* ..
237* .. External Subroutines ..
238 EXTERNAL blacs_gridinfo, cgebr2d, cgebs2d,cgemm,
239 $ cgsum2d, clamov, claset, ctrbr2d,
240 $ ctrbs2d, ctrmm, infog1l, infog2l, pb_topget,
241 $ pbctran
242* ..
243* .. Intrinsic Functions ..
244 INTRINSIC max, min, mod
245* ..
246* .. External Functions ..
247 LOGICAL LSAME
248 INTEGER ICEIL, NUMROC
249 EXTERNAL iceil, lsame, numroc
250* ..
251* .. Executable Statements ..
252*
253* Quick return if possible
254*
255 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
256 $ RETURN
257*
258* Get grid parameters
259*
260 ictxt = descc( ctxt_ )
261 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
262*
263 IF( lsame( trans, 'N' ) ) THEN
264 transt = 'C'
265 ELSE
266 transt = 'N'
267 END IF
268 forward = lsame( direct, 'F' )
269 IF( forward ) THEN
270 uplo = 'U'
271 ELSE
272 uplo = 'L'
273 END IF
274*
275 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
276 $ ivrow, ivcol )
277 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
278 $ icrow, iccol )
279 ldc = descc( lld_ )
280 ldv = descv( lld_ )
281 iic = min( iic, ldc )
282 iiv = min( iiv, ldv )
283 iroffc = mod( ic-1, descc( mb_ ) )
284 icoffc = mod( jc-1, descc( nb_ ) )
285 mbv = descv( mb_ )
286 nbv = descv( nb_ )
287 iroffv = mod( iv-1, mbv )
288 icoffv = mod( jv-1, nbv )
289 mpc = numroc( m+iroffc, descc( mb_ ), myrow, icrow, nprow )
290 nqc = numroc( n+icoffc, descc( nb_ ), mycol, iccol, npcol )
291 IF( mycol.EQ.iccol )
292 $ nqc = nqc - icoffc
293 IF( myrow.EQ.icrow )
294 $ mpc = mpc - iroffc
295 jjc = min( jjc, max( 1, jjc+nqc-1 ) )
296 jjv = min( jjv, max( 1, numroc( descv( n_ ), nbv, mycol,
297 $ descv( csrc_ ), npcol ) ) )
298 ioffc = iic + ( jjc-1 ) * ldc
299 ioffv = iiv + ( jjv-1 ) * ldv
300*
301 IF( lsame( storev, 'C' ) ) THEN
302*
303* V is stored columnwise
304*
305 IF( lsame( side, 'L' ) ) THEN
306*
307* Form Q*sub( C ) or Q'*sub( C )
308*
309* Locally V( IOFFV ) is MPV x K, C( IOFFC ) is MPC x NQC
310* WORK( IPV ) is MPC x K = V( IOFFV ), MPC = MPV
311* WORK( IPW ) is NQC x K = C( IOFFC )' * V( IOFFV )
312*
313 ipv = 1
314 ipw = ipv + mpc * k
315 lv = max( 1, mpc )
316 lw = max( 1, nqc )
317*
318* Broadcast V to the other process columns.
319*
320 CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
321 IF( mycol.EQ.ivcol ) THEN
322 CALL cgebs2d( ictxt, 'Rowwise', rowbtop, mpc, k,
323 $ v( ioffv ), ldv )
324 IF( myrow.EQ.ivrow )
325 $ CALL ctrbs2d( ictxt, 'Rowwise', rowbtop, uplo,
326 $ 'Non unit', k, k, t, nbv )
327 CALL clamov( 'All', mpc, k, v( ioffv ), ldv, work( ipv ),
328 $ lv )
329 ELSE
330 CALL cgebr2d( ictxt, 'Rowwise', rowbtop, mpc, k,
331 $ work( ipv ), lv, myrow, ivcol )
332 IF( myrow.EQ.ivrow )
333 $ CALL ctrbr2d( ictxt, 'Rowwise', rowbtop, uplo,
334 $ 'Non unit', k, k, t, nbv, myrow, ivcol )
335 END IF
336*
337 IF( forward ) THEN
338*
339* WORK(IPV) = ( V1 ) where V1 is unit lower triangular,
340* ( V2 ) zeroes upper triangular part of V1
341*
342 mydist = mod( myrow-ivrow+nprow, nprow )
343 itop = max( 0, mydist*mbv - iroffv )
344 iibeg = iiv
345 iiend = iibeg + mpc - 1
346 iinxt = min( iceil( iibeg, mbv )*mbv, iiend )
347*
348 10 CONTINUE
349 IF( k-itop .GT.0 ) THEN
350 CALL claset( 'Upper', iinxt-iibeg+1, k-itop, zero,
351 $ one, work( ipv+iibeg-iiv+itop*lv ), lv )
352 mydist = mydist + nprow
353 itop = mydist * mbv - iroffv
354 iibeg = iinxt + 1
355 iinxt = min( iinxt+mbv, iiend )
356 GO TO 10
357 END IF
358*
359 ELSE
360*
361* WORK(IPV) = ( V1 ) where V2 is unit upper triangular,
362* ( V2 ) zeroes lower triangular part of V2
363*
364 jj = jjv
365 ioff = mod( iv+m-k-1, mbv )
366 CALL infog1l( iv+m-k, mbv, nprow, myrow, descv( rsrc_ ),
367 $ ii, ilastrow )
368 kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
369 IF( myrow.EQ.ilastrow )
370 $ kp = kp - ioff
371 mydist = mod( myrow-ilastrow+nprow, nprow )
372 itop = mydist * mbv - ioff
373 ibase = min( itop+mbv, k )
374 itop = min( max( 0, itop ), k )
375*
376 20 CONTINUE
377 IF( jj.LE.( jjv+k-1 ) ) THEN
378 height = ibase - itop
379 CALL claset( 'All', kp, itop-jj+jjv, zero, zero,
380 $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
381 CALL claset( 'Lower', kp, height, zero, one,
382 $ work( ipv+ii-iiv+itop*lv ), lv )
383 kp = max( 0, kp - height )
384 ii = ii + height
385 jj = jjv + ibase
386 mydist = mydist + nprow
387 itop = mydist * mbv - ioff
388 ibase = min( itop + mbv, k )
389 itop = min( itop, k )
390 GO TO 20
391 END IF
392*
393 END IF
394*
395* WORK( IPW ) = C( IOFFC )' * V (NQC x MPC x K) -> NQC x K
396*
397 IF( mpc.GT.0 ) THEN
398 CALL cgemm( 'Conjugate transpose', 'No transpose', nqc,
399 $ k, mpc, one, c( ioffc ), ldc, work( ipv ), lv,
400 $ zero, work( ipw ), lw )
401 ELSE
402 CALL claset( 'All', nqc, k, zero, zero, work( ipw ), lw )
403 END IF
404*
405 CALL cgsum2d( ictxt, 'Columnwise', ' ', nqc, k, work( ipw ),
406 $ lw, ivrow, mycol )
407*
408 IF( myrow.EQ.ivrow ) THEN
409*
410* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
411*
412 CALL ctrmm( 'Right', uplo, transt, 'Non unit', nqc, k,
413 $ one, t, nbv, work( ipw ), lw )
414 CALL cgebs2d( ictxt, 'Columnwise', ' ', nqc, k,
415 $ work( ipw ), lw )
416 ELSE
417 CALL cgebr2d( ictxt, 'Columnwise', ' ', nqc, k,
418 $ work( ipw ), lw, ivrow, mycol )
419 END IF
420*
421* C C - V * W'
422* C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
423* MPC x NQC MPC x K K x NQC
424*
425 CALL cgemm( 'No transpose', 'Conjugate transpose', mpc, nqc,
426 $ k, -one, work( ipv ), lv, work( ipw ), lw, one,
427 $ c( ioffc ), ldc )
428*
429 ELSE
430*
431* Form sub( C )*Q or sub( C )*Q'
432*
433* ICOFFC = IROFFV is required by the current transposition
434* routine PBCTRAN
435*
436 npv0 = numroc( n+iroffv, mbv, myrow, ivrow, nprow )
437 IF( myrow.EQ.ivrow ) THEN
438 npv = npv0 - iroffv
439 ELSE
440 npv = npv0
441 END IF
442 IF( mycol.EQ.iccol ) THEN
443 nqc0 = nqc + icoffc
444 ELSE
445 nqc0 = nqc
446 END IF
447*
448* Locally V( IOFFV ) is NPV x K C( IOFFC ) is MPC x NQC
449* WORK( IPV ) is K x NQC0 = [ . V( IOFFV ) ]'
450* WORK( IPW ) is NPV0 x K = [ . V( IOFFV )' ]'
451* WORK( IPT ) is the workspace for PBCTRAN
452*
453 ipv = 1
454 ipw = ipv + k * nqc0
455 ipt = ipw + npv0 * k
456 lv = max( 1, k )
457 lw = max( 1, npv0 )
458*
459 IF( mycol.EQ.ivcol ) THEN
460 IF( myrow.EQ.ivrow ) THEN
461 CALL claset( 'All', iroffv, k, zero, zero,
462 $ work( ipw ), lw )
463 ipw1 = ipw + iroffv
464 CALL clamov( 'All', npv, k, v( ioffv ), ldv,
465 $ work( ipw1 ), lw )
466 ELSE
467 ipw1 = ipw
468 CALL clamov( 'All', npv, k, v( ioffv ), ldv,
469 $ work( ipw1 ), lw )
470 END IF
471*
472 IF( forward ) THEN
473*
474* WORK(IPW) = ( . V1' V2' )' where V1 is unit lower
475* triangular, zeroes upper triangular part of V1
476*
477 mydist = mod( myrow-ivrow+nprow, nprow )
478 itop = max( 0, mydist*mbv - iroffv )
479 iibeg = iiv
480 iiend = iibeg + npv - 1
481 iinxt = min( iceil( iibeg, mbv )*mbv, iiend )
482*
483 30 CONTINUE
484 IF( ( k-itop ).GT.0 ) THEN
485 CALL claset( 'Upper', iinxt-iibeg+1, k-itop, zero,
486 $ one, work( ipw1+iibeg-iiv+itop*lw ),
487 $ lw )
488 mydist = mydist + nprow
489 itop = mydist * mbv - iroffv
490 iibeg = iinxt + 1
491 iinxt = min( iinxt+mbv, iiend )
492 GO TO 30
493 END IF
494*
495 ELSE
496*
497* WORK( IPW ) = ( . V1' V2' )' where V2 is unit upper
498* triangular, zeroes lower triangular part of V2.
499*
500 jj = jjv
501 CALL infog1l( iv+n-k, mbv, nprow, myrow,
502 $ descv( rsrc_ ), ii, ilastrow )
503 ioff = mod( iv+n-k-1, mbv )
504 kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
505 IF( myrow.EQ.ilastrow )
506 $ kp = kp - ioff
507 mydist = mod( myrow-ilastrow+nprow, nprow )
508 itop = mydist * mbv - ioff
509 ibase = min( itop+mbv, k )
510 itop = min( max( 0, itop ), k )
511*
512 40 CONTINUE
513 IF( jj.LE.( jjv+k-1 ) ) THEN
514 height = ibase - itop
515 CALL claset( 'All', kp, itop-jj+jjv, zero, zero,
516 $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
517 CALL claset( 'Lower', kp, height, zero, one,
518 $ work( ipw1+ii-iiv+itop*lw ), lw )
519 kp = max( 0, kp - height )
520 ii = ii + height
521 jj = jjv + ibase
522 mydist = mydist + nprow
523 itop = mydist * mbv - ioff
524 ibase = min( itop + mbv, k )
525 itop = min( itop, k )
526 GO TO 40
527 END IF
528 END IF
529 END IF
530*
531 CALL pbctran( ictxt, 'Columnwise', 'Conjugate transpose',
532 $ n+iroffv, k, mbv, work( ipw ), lw, zero,
533 $ work( ipv ), lv, ivrow, ivcol, -1, iccol,
534 $ work( ipt ) )
535*
536* WORK( IPV ) = ( . V' ) -> WORK( IPV ) = V' is K x NQC
537*
538 IF( mycol.EQ.iccol )
539 $ ipv = ipv + icoffc * lv
540*
541* WORK( IPW ) becomes MPC x K = C( IOFFC ) * V
542* WORK( IPW ) = C( IOFFC ) * V (MPC x NQC x K) -> MPC x K
543*
544 lw = max( 1, mpc )
545*
546 IF( nqc.GT.0 ) THEN
547 CALL cgemm( 'No transpose', 'Conjugate transpose', mpc,
548 $ k, nqc, one, c( ioffc ), ldc, work( ipv ),
549 $ lv, zero, work( ipw ), lw )
550 ELSE
551 CALL claset( 'All', mpc, k, zero, zero, work( ipw ), lw )
552 END IF
553*
554 CALL cgsum2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
555 $ lw, myrow, ivcol )
556*
557* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
558*
559 IF( mycol.EQ.ivcol ) THEN
560 IF( myrow.EQ.ivrow ) THEN
561*
562* Broadcast the block reflector to the other rows.
563*
564 CALL ctrbs2d( ictxt, 'Columnwise', ' ', uplo,
565 $ 'Non unit', k, k, t, nbv )
566 ELSE
567 CALL ctrbr2d( ictxt, 'Columnwise', ' ', uplo,
568 $ 'Non unit', k, k, t, nbv, ivrow, mycol )
569 END IF
570 CALL ctrmm( 'Right', uplo, trans, 'Non unit', mpc, k,
571 $ one, t, nbv, work( ipw ), lw )
572*
573 CALL cgebs2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
574 $ lw )
575 ELSE
576 CALL cgebr2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
577 $ lw, myrow, ivcol )
578 END IF
579*
580* C C - W * V'
581* C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
582* MPC x NQC MPC x K K x NQC
583*
584 CALL cgemm( 'No transpose', 'No transpose', mpc, nqc, k,
585 $ -one, work( ipw ), lw, work( ipv ), lv, one,
586 $ c( ioffc ), ldc )
587 END IF
588*
589 ELSE
590*
591* V is stored rowwise
592*
593 IF( lsame( side, 'L' ) ) THEN
594*
595* Form Q*sub( C ) or Q'*sub( C )
596*
597* IROFFC = ICOFFV is required by the current transposition
598* routine PBCTRAN
599*
600 mqv0 = numroc( m+icoffv, nbv, mycol, ivcol, npcol )
601 IF( mycol.EQ.ivcol ) THEN
602 mqv = mqv0 - icoffv
603 ELSE
604 mqv = mqv0
605 END IF
606 IF( myrow.EQ.icrow ) THEN
607 mpc0 = mpc + iroffc
608 ELSE
609 mpc0 = mpc
610 END IF
611*
612* Locally V( IOFFV ) is K x MQV, C( IOFFC ) is MPC x NQC
613* WORK( IPV ) is MPC0 x K = [ . V( IOFFV ) ]'
614* WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ]
615* WORK( IPT ) is the workspace for PBCTRAN
616*
617 ipv = 1
618 ipw = ipv + mpc0 * k
619 ipt = ipw + k * mqv0
620 lv = max( 1, mpc0 )
621 lw = max( 1, k )
622*
623 IF( myrow.EQ.ivrow ) THEN
624 IF( mycol.EQ.ivcol ) THEN
625 CALL claset( 'All', k, icoffv, zero, zero,
626 $ work( ipw ), lw )
627 ipw1 = ipw + icoffv * lw
628 CALL clamov( 'All', k, mqv, v( ioffv ), ldv,
629 $ work( ipw1 ), lw )
630 ELSE
631 ipw1 = ipw
632 CALL clamov( 'All', k, mqv, v( ioffv ), ldv,
633 $ work( ipw1 ), lw )
634 END IF
635*
636 IF( forward ) THEN
637*
638* WORK( IPW ) = ( . V1 V2 ) where V1 is unit upper
639* triangular, zeroes lower triangular part of V1
640*
641 mydist = mod( mycol-ivcol+npcol, npcol )
642 ileft = max( 0, mydist * nbv - icoffv )
643 jjbeg = jjv
644 jjend = jjv + mqv - 1
645 jjnxt = min( iceil( jjbeg, nbv ) * nbv, jjend )
646*
647 50 CONTINUE
648 IF( ( k-ileft ).GT.0 ) THEN
649 CALL claset( 'Lower', k-ileft, jjnxt-jjbeg+1, zero,
650 $ one,
651 $ work( ipw1+ileft+(jjbeg-jjv)*lw ),
652 $ lw )
653 mydist = mydist + npcol
654 ileft = mydist * nbv - icoffv
655 jjbeg = jjnxt + 1
656 jjnxt = min( jjnxt+nbv, jjend )
657 GO TO 50
658 END IF
659*
660 ELSE
661*
662* WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
663* triangular, zeroes upper triangular part of V2.
664*
665 ii = iiv
666 CALL infog1l( jv+m-k, nbv, npcol, mycol,
667 $ descv( csrc_ ), jj, ilastcol )
668 ioff = mod( jv+m-k-1, nbv )
669 kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
670 IF( mycol.EQ.ilastcol )
671 $ kq = kq - ioff
672 mydist = mod( mycol-ilastcol+npcol, npcol )
673 ileft = mydist * nbv - ioff
674 iright = min( ileft+nbv, k )
675 ileft = min( max( 0, ileft ), k )
676*
677 60 CONTINUE
678 IF( ii.LE.( iiv+k-1 ) ) THEN
679 wide = iright - ileft
680 CALL claset( 'All', ileft-ii+iiv, kq, zero, zero,
681 $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
682 CALL claset( 'Upper', wide, kq, zero, one,
683 $ work( ipw1+ileft+(jj-jjv)*lw ), lw )
684 kq = max( 0, kq - wide )
685 ii = iiv + iright
686 jj = jj + wide
687 mydist = mydist + npcol
688 ileft = mydist * nbv - ioff
689 iright = min( ileft + nbv, k )
690 ileft = min( ileft, k )
691 GO TO 60
692 END IF
693 END IF
694 END IF
695*
696* WORK( IPV ) = WORK( IPW )' (replicated) is MPC0 x K
697*
698 CALL pbctran( ictxt, 'Rowwise', 'Conjugate transpose', k,
699 $ m+icoffv, nbv, work( ipw ), lw, zero,
700 $ work( ipv ), lv, ivrow, ivcol, icrow, -1,
701 $ work( ipt ) )
702*
703* WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC x K
704*
705 IF( myrow.EQ.icrow )
706 $ ipv = ipv + iroffc
707*
708* WORK( IPW ) becomes NQC x K = C( IOFFC )' * V'
709* WORK( IPW ) = C( IOFFC )' * V' (NQC x MPC x K) -> NQC x K
710*
711 lw = max( 1, nqc )
712*
713 IF( mpc.GT.0 ) THEN
714 CALL cgemm( 'Conjugate transpose', 'No transpose', nqc,
715 $ k, mpc, one, c( ioffc ), ldc, work( ipv ),
716 $ lv, zero, work( ipw ), lw )
717 ELSE
718 CALL claset( 'All', nqc, k, zero, zero, work( ipw ), lw )
719 END IF
720*
721 CALL cgsum2d( ictxt, 'Columnwise', ' ', nqc, k, work( ipw ),
722 $ lw, ivrow, mycol )
723*
724* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
725*
726 IF( myrow.EQ.ivrow ) THEN
727 IF( mycol.EQ.ivcol ) THEN
728*
729* Broadcast the block reflector to the other columns.
730*
731 CALL ctrbs2d( ictxt, 'Rowwise', ' ', uplo, 'Non unit',
732 $ k, k, t, mbv )
733 ELSE
734 CALL ctrbr2d( ictxt, 'Rowwise', ' ', uplo, 'Non unit',
735 $ k, k, t, mbv, myrow, ivcol )
736 END IF
737 CALL ctrmm( 'Right', uplo, transt, 'Non unit', nqc, k,
738 $ one, t, mbv, work( ipw ), lw )
739*
740 CALL cgebs2d( ictxt, 'Columnwise', ' ', nqc, k,
741 $ work( ipw ), lw )
742 ELSE
743 CALL cgebr2d( ictxt, 'Columnwise', ' ', nqc, k,
744 $ work( ipw ), lw, ivrow, mycol )
745 END IF
746*
747* C C - V' * W'
748* C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
749* MPC x NQC MPC x K K x NQC
750*
751 CALL cgemm( 'No transpose', 'Conjugate transpose', mpc, nqc,
752 $ k, -one, work( ipv ), lv, work( ipw ), lw, one,
753 $ c( ioffc ), ldc )
754*
755 ELSE
756*
757* Form Q*sub( C ) or Q'*sub( C )
758*
759* Locally V( IOFFV ) is K x NQV, C( IOFFC ) is MPC x NQC
760* WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC
761* WORK( IPW ) is MPC x K = C( IOFFC ) * V( IOFFV )'
762*
763 ipv = 1
764 ipw = ipv + k * nqc
765 lv = max( 1, k )
766 lw = max( 1, mpc )
767*
768* Broadcast V to the other process rows.
769*
770 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
771 IF( myrow.EQ.ivrow ) THEN
772 CALL cgebs2d( ictxt, 'Columnwise', colbtop, k, nqc,
773 $ v( ioffv ), ldv )
774 IF( mycol.EQ.ivcol )
775 $ CALL ctrbs2d( ictxt, 'Columnwise', colbtop, uplo,
776 $ 'Non unit', k, k, t, mbv )
777 CALL clamov( 'All', k, nqc, v( ioffv ), ldv, work( ipv ),
778 $ lv )
779 ELSE
780 CALL cgebr2d( ictxt, 'Columnwise', colbtop, k, nqc,
781 $ work( ipv ), lv, ivrow, mycol )
782 IF( mycol.EQ.ivcol )
783 $ CALL ctrbr2d( ictxt, 'Columnwise', colbtop, uplo,
784 $ 'Non unit', k, k, t, mbv, ivrow, mycol )
785 END IF
786*
787 IF( forward ) THEN
788*
789* WORK(IPW) = ( V1 V2 ) where V1 is unit upper
790* triangular, zeroes lower triangular part of V1
791*
792 mydist = mod( mycol-ivcol+npcol, npcol )
793 ileft = max( 0, mydist * nbv - icoffv )
794 jjbeg = jjv
795 jjend = jjv + nqc - 1
796 jjnxt = min( iceil( jjbeg, nbv ) * nbv, jjend )
797*
798 70 CONTINUE
799 IF( ( k-ileft ).GT.0 ) THEN
800 CALL claset( 'Lower', k-ileft, jjnxt-jjbeg+1, zero,
801 $ one, work( ipv+ileft+(jjbeg-jjv)*lv ),
802 $ lv )
803 mydist = mydist + npcol
804 ileft = mydist * nbv - icoffv
805 jjbeg = jjnxt + 1
806 jjnxt = min( jjnxt+nbv, jjend )
807 GO TO 70
808 END IF
809*
810 ELSE
811*
812* WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
813* triangular, zeroes upper triangular part of V2.
814*
815 ii = iiv
816 CALL infog1l( jv+n-k, nbv, npcol, mycol, descv( csrc_ ),
817 $ jj, ilastcol )
818 ioff = mod( jv+n-k-1, nbv )
819 kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
820 IF( mycol.EQ.ilastcol )
821 $ kq = kq - ioff
822 mydist = mod( mycol-ilastcol+npcol, npcol )
823 ileft = mydist * nbv - ioff
824 iright = min( ileft+nbv, k )
825 ileft = min( max( 0, ileft ), k )
826*
827 80 CONTINUE
828 IF( ii.LE.( iiv+k-1 ) ) THEN
829 wide = iright - ileft
830 CALL claset( 'All', ileft-ii+iiv, kq, zero, zero,
831 $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
832 CALL claset( 'Upper', wide, kq, zero, one,
833 $ work( ipv+ileft+(jj-jjv)*lv ), lv )
834 kq = max( 0, kq - wide )
835 ii = iiv + iright
836 jj = jj + wide
837 mydist = mydist + npcol
838 ileft = mydist * nbv - ioff
839 iright = min( ileft + nbv, k )
840 ileft = min( ileft, k )
841 GO TO 80
842 END IF
843*
844 END IF
845*
846* WORK( IPV ) is K x NQC = V = V( IOFFV )
847* WORK( IPW ) = C( IOFFC ) * V' (MPC x NQC x K) -> MPC x K
848*
849 IF( nqc.GT.0 ) THEN
850 CALL cgemm( 'No transpose', 'Conjugate transpose', mpc,
851 $ k, nqc, one, c( ioffc ), ldc, work( ipv ),
852 $ lv, zero, work( ipw ), lw )
853 ELSE
854 CALL claset( 'All', mpc, k, zero, zero, work( ipw ), lw )
855 END IF
856*
857 CALL cgsum2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
858 $ lw, myrow, ivcol )
859*
860* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
861*
862 IF( mycol.EQ.ivcol ) THEN
863 CALL ctrmm( 'Right', uplo, trans, 'Non unit', mpc, k,
864 $ one, t, mbv, work( ipw ), lw )
865 CALL cgebs2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
866 $ lw )
867 ELSE
868 CALL cgebr2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
869 $ lw, myrow, ivcol )
870 END IF
871*
872* C C - W * V
873* C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
874* MPC x NQC MPC x K K x NQC
875*
876 CALL cgemm( 'No transpose', 'No transpose', mpc, nqc, k,
877 $ -one, work( ipw ), lw, work( ipv ), lv, one,
878 $ c( ioffc ), ldc )
879*
880 END IF
881*
882 END IF
883*
884 RETURN
885*
886* End of PCLARFB
887*
888 END
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pbctran(icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
Definition pbctran.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pclarfb(side, trans, direct, storev, m, n, k, v, iv, jv, descv, t, c, ic, jc, descc, work)
Definition pclarfb.f:3