SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdlarfb.f
Go to the documentation of this file.
1 SUBROUTINE pdlarfb( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV,
2 $ JV, DESCV, T, C, IC, JC, DESCC, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER SIDE, TRANS, DIRECT, STOREV
10 INTEGER IC, IV, JC, JV, K, M, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCC( * ), DESCV( * )
14 DOUBLE PRECISION C( * ), T( * ), V( * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PDLARFB applies a real block reflector Q or its transpose Q**T to a
21* real distributed M-by-N matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1)
22* from the left or the right.
23*
24* Notes
25* =====
26*
27* Each global data object is described by an associated description
28* vector. This vector stores the information required to establish
29* the mapping between an object element and its corresponding process
30* and memory location.
31*
32* Let A be a generic term for any 2D block cyclicly distributed array.
33* Such a global array has an associated description vector DESCA.
34* In the following comments, the character _ should be read as
35* "of the global array".
36*
37* NOTATION STORED IN EXPLANATION
38* --------------- -------------- --------------------------------------
39* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
40* DTYPE_A = 1.
41* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42* the BLACS process grid A is distribu-
43* ted over. The context itself is glo-
44* bal, but the handle (the integer
45* value) may vary.
46* M_A (global) DESCA( M_ ) The number of rows in the global
47* array A.
48* N_A (global) DESCA( N_ ) The number of columns in the global
49* array A.
50* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
51* the rows of the array.
52* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
53* the columns of the array.
54* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55* row of the array A is distributed.
56* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57* first column of the array A is
58* distributed.
59* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
60* array. LLD_A >= MAX(1,LOCr(M_A)).
61*
62* Let K be the number of rows or columns of a distributed matrix,
63* and assume that its process grid has dimension p x q.
64* LOCr( K ) denotes the number of elements of K that a process
65* would receive if K were distributed over the p processes of its
66* process column.
67* Similarly, LOCc( K ) denotes the number of elements of K that a
68* process would receive if K were distributed over the q processes of
69* its process row.
70* The values of LOCr() and LOCc() may be determined via a call to the
71* ScaLAPACK tool function, NUMROC:
72* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74* An upper bound for these quantities may be computed by:
75* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77*
78* Arguments
79* =========
80*
81* SIDE (global input) CHARACTER
82* = 'L': apply Q or Q**T from the Left;
83* = 'R': apply Q or Q**T from the Right.
84*
85* TRANS (global input) CHARACTER
86* = 'N': No transpose, apply Q;
87* = 'T': Transpose, apply Q**T.
88*
89* DIRECT (global input) CHARACTER
90* Indicates how Q is formed from a product of elementary
91* reflectors
92* = 'F': Q = H(1) H(2) . . . H(k) (Forward)
93* = 'B': Q = H(k) . . . H(2) H(1) (Backward)
94*
95* STOREV (global input) CHARACTER
96* Indicates how the vectors which define the elementary
97* reflectors are stored:
98* = 'C': Columnwise
99* = 'R': Rowwise
100*
101* M (global input) INTEGER
102* The number of rows to be operated on i.e the number of rows
103* of the distributed submatrix sub( C ). M >= 0.
104*
105* N (global input) INTEGER
106* The number of columns to be operated on i.e the number of
107* columns of the distributed submatrix sub( C ). N >= 0.
108*
109* K (global input) INTEGER
110* The order of the matrix T (= the number of elementary
111* reflectors whose product defines the block reflector).
112*
113* V (local input) DOUBLE PRECISION pointer into the local memory
114* to an array of dimension ( LLD_V, LOCc(JV+K-1) ) if
115* STOREV = 'C', ( LLD_V, LOCc(JV+M-1)) if STOREV = 'R' and
116* SIDE = 'L', ( LLD_V, LOCc(JV+N-1) ) if STOREV = 'R' and
117* SIDE = 'R'. It contains the local pieces of the distributed
118* vectors V representing the Householder transformation.
119* See further details.
120* If STOREV = 'C' and SIDE = 'L', LLD_V >= MAX(1,LOCr(IV+M-1));
121* if STOREV = 'C' and SIDE = 'R', LLD_V >= MAX(1,LOCr(IV+N-1));
122* if STOREV = 'R', LLD_V >= LOCr(IV+K-1).
123*
124* IV (global input) INTEGER
125* The row index in the global array V indicating the first
126* row of sub( V ).
127*
128* JV (global input) INTEGER
129* The column index in the global array V indicating the
130* first column of sub( V ).
131*
132* DESCV (global and local input) INTEGER array of dimension DLEN_.
133* The array descriptor for the distributed matrix V.
134*
135* T (local input) DOUBLE PRECISION array, dimension MB_V by MB_V
136* if STOREV = 'R' and NB_V by NB_V if STOREV = 'C'. The trian-
137* gular matrix T in the representation of the block reflector.
138*
139* C (local input/local output) DOUBLE PRECISION pointer into the
140* local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
141* On entry, the M-by-N distributed matrix sub( C ). On exit,
142* sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or
143* sub( C )*Q or sub( C )*Q'.
144*
145* IC (global input) INTEGER
146* The row index in the global array C indicating the first
147* row of sub( C ).
148*
149* JC (global input) INTEGER
150* The column index in the global array C indicating the
151* first column of sub( C ).
152*
153* DESCC (global and local input) INTEGER array of dimension DLEN_.
154* The array descriptor for the distributed matrix C.
155*
156* WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
157* If STOREV = 'C',
158* if SIDE = 'L',
159* LWORK >= ( NqC0 + MpC0 ) * K
160* else if SIDE = 'R',
161* LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC,
162* NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ),
163* MpC0 ) ) * K
164* end if
165* else if STOREV = 'R',
166* if SIDE = 'L',
167* LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC,
168* MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ),
169* NqC0 ) ) * K
170* else if SIDE = 'R',
171* LWORK >= ( MpC0 + NqC0 ) * K
172* end if
173* end if
174*
175* where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ),
176*
177* IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ),
178* IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ),
179* IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ),
180* MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ),
181* NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ),
182*
183* IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
184* ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
185* ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
186* MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
187* NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ),
188* NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
189*
190* ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
191* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
192* the subroutine BLACS_GRIDINFO.
193*
194* Alignment requirements
195* ======================
196*
197* The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
198* must verify some alignment properties, namely the following
199* expressions should be true:
200*
201* If STOREV = 'Columnwise'
202* If SIDE = 'Left',
203* ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
204* If SIDE = 'Right',
205* ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
206* else if STOREV = 'Rowwise'
207* If SIDE = 'Left',
208* ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
209* If SIDE = 'Right',
210* ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
211* end if
212*
213* =====================================================================
214*
215* .. Parameters ..
216 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
217 $ lld_, mb_, m_, nb_, n_, rsrc_
218 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
219 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
220 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
221 DOUBLE PRECISION ONE, ZERO
222 parameter( one = 1.0d+0, zero = 0.0d+0 )
223* ..
224* .. Local Scalars ..
225 LOGICAL FORWARD
226 CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO
227 INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW,
228 $ ictxt, ii, iibeg, iic, iiend, iinxt, iiv,
229 $ ilastcol, ilastrow, ileft, ioff, ioffc, ioffv,
230 $ ipt, ipv, ipw, ipw1, iright, iroffc, iroffv,
231 $ itop, ivcol, ivrow, jj, jjbeg, jjc, jjend,
232 $ jjnxt, jjv, kp, kq, ldc, ldv, lv, lw, mbv, mpc,
233 $ mpc0, mqv, mqv0, mycol, mydist, myrow, nbv,
234 $ npv, npv0, npcol, nprow, nqc, nqc0, wide
235* ..
236* .. External Subroutines ..
237 EXTERNAL blacs_gridinfo, dgebr2d, dgebs2d,dgemm,
238 $ dgsum2d, dlamov, dlaset, dtrbr2d,
239 $ dtrbs2d, dtrmm, infog1l, infog2l, pb_topget,
240 $ pbdtran
241* ..
242* .. Intrinsic Functions ..
243 INTRINSIC max, min, mod
244* ..
245* .. External Functions ..
246 LOGICAL LSAME
247 INTEGER ICEIL, NUMROC
248 EXTERNAL iceil, lsame, numroc
249* ..
250* .. Executable Statements ..
251*
252* Quick return if possible
253*
254 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
255 $ RETURN
256*
257* Get grid parameters
258*
259 ictxt = descc( ctxt_ )
260 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
261*
262 IF( lsame( trans, 'N' ) ) THEN
263 transt = 'T'
264 ELSE
265 transt = 'N'
266 END IF
267 forward = lsame( direct, 'F' )
268 IF( forward ) THEN
269 uplo = 'U'
270 ELSE
271 uplo = 'L'
272 END IF
273*
274 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
275 $ ivrow, ivcol )
276 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
277 $ icrow, iccol )
278 ldc = descc( lld_ )
279 ldv = descv( lld_ )
280 iic = min( iic, ldc )
281 iiv = min( iiv, ldv )
282 iroffc = mod( ic-1, descc( mb_ ) )
283 icoffc = mod( jc-1, descc( nb_ ) )
284 mbv = descv( mb_ )
285 nbv = descv( nb_ )
286 iroffv = mod( iv-1, mbv )
287 icoffv = mod( jv-1, nbv )
288 mpc = numroc( m+iroffc, descc( mb_ ), myrow, icrow, nprow )
289 nqc = numroc( n+icoffc, descc( nb_ ), mycol, iccol, npcol )
290 IF( mycol.EQ.iccol )
291 $ nqc = nqc - icoffc
292 IF( myrow.EQ.icrow )
293 $ mpc = mpc - iroffc
294 jjc = min( jjc, max( 1, jjc+nqc-1 ) )
295 jjv = min( jjv, max( 1, numroc( descv( n_ ), nbv, mycol,
296 $ descv( csrc_ ), npcol ) ) )
297 ioffc = iic + ( jjc-1 ) * ldc
298 ioffv = iiv + ( jjv-1 ) * ldv
299*
300 IF( lsame( storev, 'C' ) ) THEN
301*
302* V is stored columnwise
303*
304 IF( lsame( side, 'L' ) ) THEN
305*
306* Form Q*sub( C ) or Q'*sub( C )
307*
308* Locally V( IOFFV ) is MPV x K, C( IOFFC ) is MPC x NQC
309* WORK( IPV ) is MPC x K = V( IOFFV ), MPC = MPV
310* WORK( IPW ) is NQC x K = C( IOFFC )' * V( IOFFV )
311*
312 ipv = 1
313 ipw = ipv + mpc * k
314 lv = max( 1, mpc )
315 lw = max( 1, nqc )
316*
317* Broadcast V to the other process columns.
318*
319 CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
320 IF( mycol.EQ.ivcol ) THEN
321 CALL dgebs2d( ictxt, 'Rowwise', rowbtop, mpc, k,
322 $ v( ioffv ), ldv )
323 IF( myrow.EQ.ivrow )
324 $ CALL dtrbs2d( ictxt, 'Rowwise', rowbtop, uplo,
325 $ 'Non unit', k, k, t, nbv )
326 CALL dlamov( 'All', mpc, k, v( ioffv ), ldv, work( ipv ),
327 $ lv )
328 ELSE
329 CALL dgebr2d( ictxt, 'Rowwise', rowbtop, mpc, k,
330 $ work( ipv ), lv, myrow, ivcol )
331 IF( myrow.EQ.ivrow )
332 $ CALL dtrbr2d( ictxt, 'Rowwise', rowbtop, uplo,
333 $ 'Non unit', k, k, t, nbv, myrow, ivcol )
334 END IF
335*
336 IF( forward ) THEN
337*
338* WORK(IPV) = ( V1 ) where V1 is unit lower triangular,
339* ( V2 ) zeroes upper triangular part of V1
340*
341 mydist = mod( myrow-ivrow+nprow, nprow )
342 itop = max( 0, mydist*mbv - iroffv )
343 iibeg = iiv
344 iiend = iibeg + mpc - 1
345 iinxt = min( iceil( iibeg, mbv )*mbv, iiend )
346*
347 10 CONTINUE
348 IF( k-itop .GT.0 ) THEN
349 CALL dlaset( 'Upper', iinxt-iibeg+1, k-itop, zero,
350 $ one, work( ipv+iibeg-iiv+itop*lv ), lv )
351 mydist = mydist + nprow
352 itop = mydist * mbv - iroffv
353 iibeg = iinxt + 1
354 iinxt = min( iinxt+mbv, iiend )
355 GO TO 10
356 END IF
357*
358 ELSE
359*
360* WORK(IPV) = ( V1 ) where V2 is unit upper triangular,
361* ( V2 ) zeroes lower triangular part of V2
362*
363 jj = jjv
364 ioff = mod( iv+m-k-1, mbv )
365 CALL infog1l( iv+m-k, mbv, nprow, myrow, descv( rsrc_ ),
366 $ ii, ilastrow )
367 kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
368 IF( myrow.EQ.ilastrow )
369 $ kp = kp - ioff
370 mydist = mod( myrow-ilastrow+nprow, nprow )
371 itop = mydist * mbv - ioff
372 ibase = min( itop+mbv, k )
373 itop = min( max( 0, itop ), k )
374*
375 20 CONTINUE
376 IF( jj.LE.( jjv+k-1 ) ) THEN
377 height = ibase - itop
378 CALL dlaset( 'All', kp, itop-jj+jjv, zero, zero,
379 $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
380 CALL dlaset( 'Lower', kp, height, zero, one,
381 $ work( ipv+ii-iiv+itop*lv ), lv )
382 kp = max( 0, kp - height )
383 ii = ii + height
384 jj = jjv + ibase
385 mydist = mydist + nprow
386 itop = mydist * mbv - ioff
387 ibase = min( itop + mbv, k )
388 itop = min( itop, k )
389 GO TO 20
390 END IF
391*
392 END IF
393*
394* WORK( IPW ) = C( IOFFC )' * V (NQC x MPC x K) -> NQC x K
395*
396 IF( mpc.GT.0 ) THEN
397 CALL dgemm( 'Transpose', 'No transpose', nqc, k, mpc,
398 $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
399 $ work( ipw ), lw )
400 ELSE
401 CALL dlaset( 'All', nqc, k, zero, zero, work( ipw ), lw )
402 END IF
403*
404 CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc, k, work( ipw ),
405 $ lw, ivrow, mycol )
406*
407 IF( myrow.EQ.ivrow ) THEN
408*
409* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
410*
411 CALL dtrmm( 'Right', uplo, transt, 'Non unit', nqc, k,
412 $ one, t, nbv, work( ipw ), lw )
413 CALL dgebs2d( ictxt, 'Columnwise', ' ', nqc, k,
414 $ work( ipw ), lw )
415 ELSE
416 CALL dgebr2d( ictxt, 'Columnwise', ' ', nqc, k,
417 $ work( ipw ), lw, ivrow, mycol )
418 END IF
419*
420* C C - V * W'
421* C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
422* MPC x NQC MPC x K K x NQC
423*
424 CALL dgemm( 'No transpose', 'Transpose', mpc, nqc, k, -one,
425 $ work( ipv ), lv, work( ipw ), lw, one,
426 $ c( ioffc ), ldc )
427*
428 ELSE
429*
430* Form sub( C )*Q or sub( C )*Q'
431*
432* ICOFFC = IROFFV is required by the current transposition
433* routine PBDTRAN
434*
435 npv0 = numroc( n+iroffv, mbv, myrow, ivrow, nprow )
436 IF( myrow.EQ.ivrow ) THEN
437 npv = npv0 - iroffv
438 ELSE
439 npv = npv0
440 END IF
441 IF( mycol.EQ.iccol ) THEN
442 nqc0 = nqc + icoffc
443 ELSE
444 nqc0 = nqc
445 END IF
446*
447* Locally V( IOFFV ) is NPV x K C( IOFFC ) is MPC x NQC
448* WORK( IPV ) is K x NQC0 = [ . V( IOFFV ) ]'
449* WORK( IPW ) is NPV0 x K = [ . V( IOFFV )' ]'
450* WORK( IPT ) is the workspace for PBDTRAN
451*
452 ipv = 1
453 ipw = ipv + k * nqc0
454 ipt = ipw + npv0 * k
455 lv = max( 1, k )
456 lw = max( 1, npv0 )
457*
458 IF( mycol.EQ.ivcol ) THEN
459 IF( myrow.EQ.ivrow ) THEN
460 CALL dlaset( 'All', iroffv, k, zero, zero,
461 $ work( ipw ), lw )
462 ipw1 = ipw + iroffv
463 CALL dlamov( 'All', npv, k, v( ioffv ), ldv,
464 $ work( ipw1 ), lw )
465 ELSE
466 ipw1 = ipw
467 CALL dlamov( 'All', npv, k, v( ioffv ), ldv,
468 $ work( ipw1 ), lw )
469 END IF
470*
471 IF( forward ) THEN
472*
473* WORK(IPW) = ( . V1' V2' )' where V1 is unit lower
474* triangular, zeroes upper triangular part of V1
475*
476 mydist = mod( myrow-ivrow+nprow, nprow )
477 itop = max( 0, mydist*mbv - iroffv )
478 iibeg = iiv
479 iiend = iibeg + npv - 1
480 iinxt = min( iceil( iibeg, mbv )*mbv, iiend )
481*
482 30 CONTINUE
483 IF( ( k-itop ).GT.0 ) THEN
484 CALL dlaset( 'Upper', iinxt-iibeg+1, k-itop, zero,
485 $ one, work( ipw1+iibeg-iiv+itop*lw ),
486 $ lw )
487 mydist = mydist + nprow
488 itop = mydist * mbv - iroffv
489 iibeg = iinxt + 1
490 iinxt = min( iinxt+mbv, iiend )
491 GO TO 30
492 END IF
493*
494 ELSE
495*
496* WORK( IPW ) = ( . V1' V2' )' where V2 is unit upper
497* triangular, zeroes lower triangular part of V2.
498*
499 jj = jjv
500 CALL infog1l( iv+n-k, mbv, nprow, myrow,
501 $ descv( rsrc_ ), ii, ilastrow )
502 ioff = mod( iv+n-k-1, mbv )
503 kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
504 IF( myrow.EQ.ilastrow )
505 $ kp = kp - ioff
506 mydist = mod( myrow-ilastrow+nprow, nprow )
507 itop = mydist * mbv - ioff
508 ibase = min( itop+mbv, k )
509 itop = min( max( 0, itop ), k )
510*
511 40 CONTINUE
512 IF( jj.LE.( jjv+k-1 ) ) THEN
513 height = ibase - itop
514 CALL dlaset( 'All', kp, itop-jj+jjv, zero, zero,
515 $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
516 CALL dlaset( 'Lower', kp, height, zero, one,
517 $ work( ipw1+ii-iiv+itop*lw ), lw )
518 kp = max( 0, kp - height )
519 ii = ii + height
520 jj = jjv + ibase
521 mydist = mydist + nprow
522 itop = mydist * mbv - ioff
523 ibase = min( itop + mbv, k )
524 itop = min( itop, k )
525 GO TO 40
526 END IF
527 END IF
528 END IF
529*
530 CALL pbdtran( ictxt, 'Columnwise', 'Transpose', n+iroffv, k,
531 $ mbv, work( ipw ), lw, zero, work( ipv ), lv,
532 $ ivrow, ivcol, -1, iccol, work( ipt ) )
533*
534* WORK( IPV ) = ( . V' ) -> WORK( IPV ) = V' is K x NQC
535*
536 IF( mycol.EQ.iccol )
537 $ ipv = ipv + icoffc * lv
538*
539* WORK( IPW ) becomes MPC x K = C( IOFFC ) * V
540* WORK( IPW ) = C( IOFFC ) * V (MPC x NQC x K) -> MPC x K
541*
542 lw = max( 1, mpc )
543*
544 IF( nqc.GT.0 ) THEN
545 CALL dgemm( 'No transpose', 'Transpose', mpc, k, nqc,
546 $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
547 $ work( ipw ), lw )
548 ELSE
549 CALL dlaset( 'All', mpc, k, zero, zero, work( ipw ), lw )
550 END IF
551*
552 CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
553 $ lw, myrow, ivcol )
554*
555* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
556*
557 IF( mycol.EQ.ivcol ) THEN
558 IF( myrow.EQ.ivrow ) THEN
559*
560* Broadcast the block reflector to the other rows.
561*
562 CALL dtrbs2d( ictxt, 'Columnwise', ' ', uplo,
563 $ 'Non unit', k, k, t, nbv )
564 ELSE
565 CALL dtrbr2d( ictxt, 'Columnwise', ' ', uplo,
566 $ 'Non unit', k, k, t, nbv, ivrow, mycol )
567 END IF
568 CALL dtrmm( 'Right', uplo, trans, 'Non unit', mpc, k,
569 $ one, t, nbv, work( ipw ), lw )
570*
571 CALL dgebs2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
572 $ lw )
573 ELSE
574 CALL dgebr2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
575 $ lw, myrow, ivcol )
576 END IF
577*
578* C C - W * V'
579* C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
580* MPC x NQC MPC x K K x NQC
581*
582 CALL dgemm( 'No transpose', 'No transpose', mpc, nqc, k,
583 $ -one, work( ipw ), lw, work( ipv ), lv, one,
584 $ c( ioffc ), ldc )
585 END IF
586*
587 ELSE
588*
589* V is stored rowwise
590*
591 IF( lsame( side, 'L' ) ) THEN
592*
593* Form Q*sub( C ) or Q'*sub( C )
594*
595* IROFFC = ICOFFV is required by the current transposition
596* routine PBDTRAN
597*
598 mqv0 = numroc( m+icoffv, nbv, mycol, ivcol, npcol )
599 IF( mycol.EQ.ivcol ) THEN
600 mqv = mqv0 - icoffv
601 ELSE
602 mqv = mqv0
603 END IF
604 IF( myrow.EQ.icrow ) THEN
605 mpc0 = mpc + iroffc
606 ELSE
607 mpc0 = mpc
608 END IF
609*
610* Locally V( IOFFV ) is K x MQV, C( IOFFC ) is MPC x NQC
611* WORK( IPV ) is MPC0 x K = [ . V( IOFFV ) ]'
612* WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ]
613* WORK( IPT ) is the workspace for PBDTRAN
614*
615 ipv = 1
616 ipw = ipv + mpc0 * k
617 ipt = ipw + k * mqv0
618 lv = max( 1, mpc0 )
619 lw = max( 1, k )
620*
621 IF( myrow.EQ.ivrow ) THEN
622 IF( mycol.EQ.ivcol ) THEN
623 CALL dlaset( 'All', k, icoffv, zero, zero,
624 $ work( ipw ), lw )
625 ipw1 = ipw + icoffv * lw
626 CALL dlamov( 'All', k, mqv, v( ioffv ), ldv,
627 $ work( ipw1 ), lw )
628 ELSE
629 ipw1 = ipw
630 CALL dlamov( 'All', k, mqv, v( ioffv ), ldv,
631 $ work( ipw1 ), lw )
632 END IF
633*
634 IF( forward ) THEN
635*
636* WORK( IPW ) = ( . V1 V2 ) where V1 is unit upper
637* triangular, zeroes lower triangular part of V1
638*
639 mydist = mod( mycol-ivcol+npcol, npcol )
640 ileft = max( 0, mydist * nbv - icoffv )
641 jjbeg = jjv
642 jjend = jjv + mqv - 1
643 jjnxt = min( iceil( jjbeg, nbv ) * nbv, jjend )
644*
645 50 CONTINUE
646 IF( ( k-ileft ).GT.0 ) THEN
647 CALL dlaset( 'Lower', k-ileft, jjnxt-jjbeg+1, zero,
648 $ one,
649 $ work( ipw1+ileft+(jjbeg-jjv)*lw ),
650 $ lw )
651 mydist = mydist + npcol
652 ileft = mydist * nbv - icoffv
653 jjbeg = jjnxt + 1
654 jjnxt = min( jjnxt+nbv, jjend )
655 GO TO 50
656 END IF
657*
658 ELSE
659*
660* WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
661* triangular, zeroes upper triangular part of V2.
662*
663 ii = iiv
664 CALL infog1l( jv+m-k, nbv, npcol, mycol,
665 $ descv( csrc_ ), jj, ilastcol )
666 ioff = mod( jv+m-k-1, nbv )
667 kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
668 IF( mycol.EQ.ilastcol )
669 $ kq = kq - ioff
670 mydist = mod( mycol-ilastcol+npcol, npcol )
671 ileft = mydist * nbv - ioff
672 iright = min( ileft+nbv, k )
673 ileft = min( max( 0, ileft ), k )
674*
675 60 CONTINUE
676 IF( ii.LE.( iiv+k-1 ) ) THEN
677 wide = iright - ileft
678 CALL dlaset( 'All', ileft-ii+iiv, kq, zero, zero,
679 $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
680 CALL dlaset( 'Upper', wide, kq, zero, one,
681 $ work( ipw1+ileft+(jj-jjv)*lw ), lw )
682 kq = max( 0, kq - wide )
683 ii = iiv + iright
684 jj = jj + wide
685 mydist = mydist + npcol
686 ileft = mydist * nbv - ioff
687 iright = min( ileft + nbv, k )
688 ileft = min( ileft, k )
689 GO TO 60
690 END IF
691 END IF
692 END IF
693*
694* WORK( IPV ) = WORK( IPW )' (replicated) is MPC0 x K
695*
696 CALL pbdtran( ictxt, 'Rowwise', 'Transpose', k, m+icoffv,
697 $ nbv, work( ipw ), lw, zero, work( ipv ), lv,
698 $ ivrow, ivcol, icrow, -1, work( ipt ) )
699*
700* WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC x K
701*
702 IF( myrow.EQ.icrow )
703 $ ipv = ipv + iroffc
704*
705* WORK( IPW ) becomes NQC x K = C( IOFFC )' * V'
706* WORK( IPW ) = C( IOFFC )' * V' (NQC x MPC x K) -> NQC x K
707*
708 lw = max( 1, nqc )
709*
710 IF( mpc.GT.0 ) THEN
711 CALL dgemm( 'Transpose', 'No transpose', nqc, k, mpc,
712 $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
713 $ work( ipw ), lw )
714 ELSE
715 CALL dlaset( 'All', nqc, k, zero, zero, work( ipw ), lw )
716 END IF
717*
718 CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc, k, work( ipw ),
719 $ lw, ivrow, mycol )
720*
721* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
722*
723 IF( myrow.EQ.ivrow ) THEN
724 IF( mycol.EQ.ivcol ) THEN
725*
726* Broadcast the block reflector to the other columns.
727*
728 CALL dtrbs2d( ictxt, 'Rowwise', ' ', uplo, 'Non unit',
729 $ k, k, t, mbv )
730 ELSE
731 CALL dtrbr2d( ictxt, 'Rowwise', ' ', uplo, 'Non unit',
732 $ k, k, t, mbv, myrow, ivcol )
733 END IF
734 CALL dtrmm( 'Right', uplo, transt, 'Non unit', nqc, k,
735 $ one, t, mbv, work( ipw ), lw )
736*
737 CALL dgebs2d( ictxt, 'Columnwise', ' ', nqc, k,
738 $ work( ipw ), lw )
739 ELSE
740 CALL dgebr2d( ictxt, 'Columnwise', ' ', nqc, k,
741 $ work( ipw ), lw, ivrow, mycol )
742 END IF
743*
744* C C - V' * W'
745* C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
746* MPC x NQC MPC x K K x NQC
747*
748 CALL dgemm( 'No transpose', 'Transpose', mpc, nqc, k, -one,
749 $ work( ipv ), lv, work( ipw ), lw, one,
750 $ c( ioffc ), ldc )
751*
752 ELSE
753*
754* Form Q*sub( C ) or Q'*sub( C )
755*
756* Locally V( IOFFV ) is K x NQV, C( IOFFC ) is MPC x NQC
757* WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC
758* WORK( IPW ) is MPC x K = C( IOFFC ) * V( IOFFV )'
759*
760 ipv = 1
761 ipw = ipv + k * nqc
762 lv = max( 1, k )
763 lw = max( 1, mpc )
764*
765* Broadcast V to the other process rows.
766*
767 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
768 IF( myrow.EQ.ivrow ) THEN
769 CALL dgebs2d( ictxt, 'Columnwise', colbtop, k, nqc,
770 $ v( ioffv ), ldv )
771 IF( mycol.EQ.ivcol )
772 $ CALL dtrbs2d( ictxt, 'Columnwise', colbtop, uplo,
773 $ 'Non unit', k, k, t, mbv )
774 CALL dlamov( 'All', k, nqc, v( ioffv ), ldv, work( ipv ),
775 $ lv )
776 ELSE
777 CALL dgebr2d( ictxt, 'Columnwise', colbtop, k, nqc,
778 $ work( ipv ), lv, ivrow, mycol )
779 IF( mycol.EQ.ivcol )
780 $ CALL dtrbr2d( ictxt, 'Columnwise', colbtop, uplo,
781 $ 'Non unit', k, k, t, mbv, ivrow, mycol )
782 END IF
783*
784 IF( forward ) THEN
785*
786* WORK(IPW) = ( V1 V2 ) where V1 is unit upper
787* triangular, zeroes lower triangular part of V1
788*
789 mydist = mod( mycol-ivcol+npcol, npcol )
790 ileft = max( 0, mydist * nbv - icoffv )
791 jjbeg = jjv
792 jjend = jjv + nqc - 1
793 jjnxt = min( iceil( jjbeg, nbv ) * nbv, jjend )
794*
795 70 CONTINUE
796 IF( ( k-ileft ).GT.0 ) THEN
797 CALL dlaset( 'Lower', k-ileft, jjnxt-jjbeg+1, zero,
798 $ one, work( ipv+ileft+(jjbeg-jjv)*lv ),
799 $ lv )
800 mydist = mydist + npcol
801 ileft = mydist * nbv - icoffv
802 jjbeg = jjnxt + 1
803 jjnxt = min( jjnxt+nbv, jjend )
804 GO TO 70
805 END IF
806*
807 ELSE
808*
809* WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
810* triangular, zeroes upper triangular part of V2.
811*
812 ii = iiv
813 CALL infog1l( jv+n-k, nbv, npcol, mycol, descv( csrc_ ),
814 $ jj, ilastcol )
815 ioff = mod( jv+n-k-1, nbv )
816 kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
817 IF( mycol.EQ.ilastcol )
818 $ kq = kq - ioff
819 mydist = mod( mycol-ilastcol+npcol, npcol )
820 ileft = mydist * nbv - ioff
821 iright = min( ileft+nbv, k )
822 ileft = min( max( 0, ileft ), k )
823*
824 80 CONTINUE
825 IF( ii.LE.( iiv+k-1 ) ) THEN
826 wide = iright - ileft
827 CALL dlaset( 'All', ileft-ii+iiv, kq, zero, zero,
828 $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
829 CALL dlaset( 'Upper', wide, kq, zero, one,
830 $ work( ipv+ileft+(jj-jjv)*lv ), lv )
831 kq = max( 0, kq - wide )
832 ii = iiv + iright
833 jj = jj + wide
834 mydist = mydist + npcol
835 ileft = mydist * nbv - ioff
836 iright = min( ileft + nbv, k )
837 ileft = min( ileft, k )
838 GO TO 80
839 END IF
840*
841 END IF
842*
843* WORK( IPV ) is K x NQC = V = V( IOFFV )
844* WORK( IPW ) = C( IOFFC ) * V' (MPC x NQC x K) -> MPC x K
845*
846 IF( nqc.GT.0 ) THEN
847 CALL dgemm( 'No Transpose', 'Transpose', mpc, k, nqc,
848 $ one, c( ioffc ), ldc, work( ipv ), lv, zero,
849 $ work( ipw ), lw )
850 ELSE
851 CALL dlaset( 'All', mpc, k, zero, zero, work( ipw ), lw )
852 END IF
853*
854 CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
855 $ lw, myrow, ivcol )
856*
857* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
858*
859 IF( mycol.EQ.ivcol ) THEN
860 CALL dtrmm( 'Right', uplo, trans, 'Non unit', mpc, k,
861 $ one, t, mbv, work( ipw ), lw )
862 CALL dgebs2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
863 $ lw )
864 ELSE
865 CALL dgebr2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
866 $ lw, myrow, ivcol )
867 END IF
868*
869* C C - W * V
870* C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
871* MPC x NQC MPC x K K x NQC
872*
873 CALL dgemm( 'No transpose', 'No transpose', mpc, nqc, k,
874 $ -one, work( ipw ), lw, work( ipv ), lv, one,
875 $ c( ioffc ), ldc )
876*
877 END IF
878*
879 END IF
880*
881 RETURN
882*
883* End of PDLARFB
884*
885 END
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pbdtran(icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
Definition pbdtran.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdlarfb(side, trans, direct, storev, m, n, k, v, iv, jv, descv, t, c, ic, jc, descc, work)
Definition pdlarfb.f:3