SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
psrot.f
Go to the documentation of this file.
1 SUBROUTINE psrot( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY,
2 $ INCY, CS, SN, WORK, LWORK, INFO )
3*
4* Contribution from the Department of Computing Science and HPC2N,
5* Umea University, Sweden
6*
7* -- ScaLAPACK auxiliary routine (version 2.0.1) --
8* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9* Univ. of Colorado Denver and University of California, Berkeley.
10* January, 2012
11*
12 IMPLICIT NONE
13*
14* .. Scalar Arguments ..
15 INTEGER N, IX, JX, INCX, IY, JY, INCY, LWORK, INFO
16 REAL CS, SN
17* ..
18* .. Array Arguments ..
19 INTEGER DESCX( * ), DESCY( * )
20 REAL X( * ), Y( * ), WORK( * )
21* ..
22*
23* Purpose
24* =======
25* PSROT applies a planar rotation defined by CS and SN to the
26* two distributed vectors sub(X) and sub(Y).
27*
28* Notes
29* =====
30*
31* Each global data object is described by an associated description
32* vector. This vector stores the information required to establish
33* the mapping between an object element and its corresponding process
34* and memory location.
35*
36* Let A be a generic term for any 2D block cyclicly distributed array.
37* Such a global array has an associated description vector DESCA.
38* In the following comments, the character _ should be read as
39* "of the global array".
40*
41* NOTATION STORED IN EXPLANATION
42* --------------- -------------- --------------------------------------
43* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
44* DTYPE_A = 1.
45* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
46* the BLACS process grid A is distribu-
47* ted over. The context itself is glo-
48* bal, but the handle (the integer
49* value) may vary.
50* M_A (global) DESCA( M_ ) The number of rows in the global
51* array A.
52* N_A (global) DESCA( N_ ) The number of columns in the global
53* array A.
54* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
55* the rows of the array.
56* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
57* the columns of the array.
58* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
59* row of the array A is distributed.
60* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
61* first column of the array A is
62* distributed.
63* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
64* array. LLD_A >= MAX(1,LOCr(M_A)).
65*
66* Let K be the number of rows or columns of a distributed matrix,
67* and assume that its process grid has dimension p x q.
68* LOCr( K ) denotes the number of elements of K that a process
69* would receive if K were distributed over the p processes of its
70* process column.
71* Similarly, LOCc( K ) denotes the number of elements of K that a
72* process would receive if K were distributed over the q processes of
73* its process row.
74* The values of LOCr() and LOCc() may be determined via a call to the
75* ScaLAPACK tool function, NUMROC:
76* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
77* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
78* An upper bound for these quantities may be computed by:
79* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
80* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
81*
82* Arguments
83* =========
84*
85* N (global input) INTEGER
86* The number of elements to operate on when applying the planar
87* rotation to X and Y. N>=0.
88*
89* X (local input/local output) DOUBLE PRECSION array of dimension
90* ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
91* This array contains the entries of the distributed vector
92* sub( X ).
93*
94* IX (global input) INTEGER
95* The global row index of the submatrix of the distributed
96* matrix X to operate on. If INCX = 1, then it is required
97* that IX = IY. 1 <= IX <= M_X.
98*
99* JX (global input) INTEGER
100* The global column index of the submatrix of the distributed
101* matrix X to operate on. If INCX = M_X, then it is required
102* that JX = JY. 1 <= IX <= N_X.
103*
104* DESCX (global and local input) INTEGER array of dimension 9
105* The array descriptor of the distributed matrix X.
106*
107* INCX (global input) INTEGER
108* The global increment for the elements of X. Only two values
109* of INCX are supported in this version, namely 1 and M_X.
110* Moreover, it must hold that INCX = M_X if INCY = M_Y and
111* that INCX = 1 if INCY = 1.
112*
113* Y (local input/local output) DOUBLE PRECSION array of dimension
114* ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
115* This array contains the entries of the distributed vector
116* sub( Y ).
117*
118* IY (global input) INTEGER
119* The global row index of the submatrix of the distributed
120* matrix Y to operate on. If INCY = 1, then it is required
121* that IY = IX. 1 <= IY <= M_Y.
122*
123* JY (global input) INTEGER
124* The global column index of the submatrix of the distributed
125* matrix Y to operate on. If INCY = M_X, then it is required
126* that JY = JX. 1 <= JY <= N_Y.
127*
128* DESCY (global and local input) INTEGER array of dimension 9
129* The array descriptor of the distributed matrix Y.
130*
131* INCY (global input) INTEGER
132* The global increment for the elements of Y. Only two values
133* of INCY are supported in this version, namely 1 and M_Y.
134* Moreover, it must hold that INCY = M_Y if INCX = M_X and
135* that INCY = 1 if INCX = 1.
136*
137* CS (global input) REAL
138* SN (global input) REAL
139* The parameters defining the properties of the planar
140* rotation. It must hold that 0 <= CS,SN <= 1 and that
141* SN**2 + CS**2 = 1. The latter is hardly checked in
142* finite precision arithmetics.
143*
144* WORK (local input) REAL array of dimension LWORK
145* Local workspace area.
146*
147* LWORK (local input) INTEGER
148* The length of the workspace array WORK.
149* If INCX = 1 and INCY = 1, then LWORK = 2*MB_X
150*
151* If LWORK = -1, then a workspace query is assumed; the
152* routine only calculates the optimal size of the WORK array,
153* returns this value as the first entry of the IWORK array, and
154* no error message related to LIWORK is issued by PXERBLA.
155*
156* INFO (global output) INTEGER
157* = 0: successful exit
158* < 0: if INFO = -i, the i-th argument had an illegal value.
159* If the i-th argument is an array and the j-entry had
160* an illegal value, then INFO = -(i*100+j), if the i-th
161* argument is a scalar and had an illegal value, then INFO = -i.
162*
163* Additional requirements
164* =======================
165*
166* The following alignment requirements must hold:
167* (a) DESCX( MB_ ) = DESCY( MB_ ) and DESCX( NB_ ) = DESCY( NB_ )
168* (b) DESCX( RSRC_ ) = DESCY( RSRC_ )
169* (c) DESCX( CSRC_ ) = DESCY( CSRC_ )
170*
171* =====================================================================
172*
173* Written by Robert Granat, May 15, 2007.
174*
175* .. Parameters ..
176 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
177 $ lld_, mb_, m_, nb_, n_, rsrc_
178 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
179 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
180 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
181* ..
182* .. Local Scalars ..
183 LOGICAL LQUERY, LEFT, RIGHT
184 INTEGER ICTXT, NPROW, NPCOL, MYROW, MYCOL, NPROCS,
185 $ mb, nb, xyrows, xycols, rsrc1, rsrc2, csrc1,
186 $ csrc2, icoffxy, iroffxy, mnwrk, lldx, lldy,
187 $ indx, jxx, xloc1, xloc2, rsrc, csrc, yloc1,
188 $ yloc2, jyy, ixx, iyy
189* ..
190* .. External Functions ..
191 INTEGER NUMROC, INDXG2P, INDXG2L
192 EXTERNAL numroc, indxg2p, indxg2l
193* ..
194* .. External Subroutines ..
195 EXTERNAL srot
196* ..
197* .. Intrinsic Functions ..
198 INTRINSIC max, min
199* ..
200* .. Local Functions ..
201 INTEGER ICEIL
202* ..
203* .. Executable Statements ..
204*
205* Get grid parameters
206*
207 ictxt = descx( ctxt_ )
208 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
209 nprocs = nprow*npcol
210*
211* Test and decode parameters
212*
213 lquery = lwork.EQ.-1
214 info = 0
215 IF( n.LT.0 ) THEN
216 info = -1
217 ELSEIF( ix.LT.1 .OR. ix.GT.descx(m_) ) THEN
218 info = -3
219 ELSEIF( jx.LT.1 .OR. jx.GT.descx(n_) ) THEN
220 info = -4
221 ELSEIF( incx.NE.1 .AND. incx.NE.descx(m_) ) THEN
222 info = -6
223 ELSEIF( iy.LT.1 .OR. iy.GT.descy(m_) ) THEN
224 info = -8
225 ELSEIF( jy.LT.1 .OR. jy.GT.descy(n_) ) THEN
226 info = -9
227 ELSEIF( incy.NE.1 .AND. incy.NE.descy(m_) ) THEN
228 info = -11
229 ELSEIF( (incx.EQ.descx(m_) .AND. incy.NE.descy(m_)) .OR.
230 $ (incx.EQ.1 .AND. incy.NE.1 ) ) THEN
231 info = -11
232 ELSEIF( (incx.EQ.1 .AND. incy.EQ.1) .AND.
233 $ ix.NE.iy ) THEN
234 info = -8
235 ELSEIF( (incx.EQ.descx(m_) .AND. incy.EQ.descy(m_)) .AND.
236 $ jx.NE.jy ) THEN
237 info = -9
238 END IF
239*
240* Compute the direction of the planar rotation
241*
242 left = incx.EQ.descx(m_) .AND. incy.EQ.descy(m_)
243 right = incx.EQ.1 .AND. incy.EQ.1
244*
245* Check blocking factors and root processor
246*
247 IF( info.EQ.0 ) THEN
248 IF( left .AND. descx(nb_).NE.descy(nb_) ) THEN
249 info = -(100*5 + nb_)
250 END IF
251 IF( right .AND. descx(mb_).NE.descy(nb_) ) THEN
252 info = -(100*10 + mb_)
253 END IF
254 END IF
255 IF( info.EQ.0 ) THEN
256 IF( left .AND. descx(csrc_).NE.descy(csrc_) ) THEN
257 info = -(100*5 + csrc_)
258 END IF
259 IF( right .AND. descx(rsrc_).NE.descy(rsrc_) ) THEN
260 info = -(100*10 + rsrc_)
261 END IF
262 END IF
263*
264* Compute workspace
265*
266 mb = descx( mb_ )
267 nb = descx( nb_ )
268 IF( left ) THEN
269 rsrc1 = indxg2p( ix, mb, myrow, descx(rsrc_), nprow )
270 rsrc2 = indxg2p( iy, mb, myrow, descy(rsrc_), nprow )
271 csrc = indxg2p( jx, nb, mycol, descx(csrc_), npcol )
272 icoffxy = mod( jx - 1, nb )
273 xycols = numroc( n+icoffxy, nb, mycol, csrc, npcol )
274 IF( ( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc2 ) .AND.
275 $ mycol.EQ.csrc ) xycols = xycols - icoffxy
276 IF( rsrc1.NE.rsrc2 ) THEN
277 mnwrk = xycols
278 ELSE
279 mnwrk = 0
280 END IF
281 ELSEIF( right ) THEN
282 csrc1 = indxg2p( jx, nb, mycol, descx(csrc_), npcol )
283 csrc2 = indxg2p( jy, nb, mycol, descy(csrc_), npcol )
284 rsrc = indxg2p( ix, mb, myrow, descx(rsrc_), nprow )
285 iroffxy = mod( ix - 1, mb )
286 xyrows = numroc( n+iroffxy, mb, myrow, rsrc, nprow )
287 IF( ( mycol.EQ.csrc1 .OR. mycol.EQ.csrc2 ) .AND.
288 $ myrow.EQ.rsrc ) xyrows = xyrows - iroffxy
289 IF( csrc1.NE.csrc2 ) THEN
290 mnwrk = xyrows
291 ELSE
292 mnwrk = 0
293 END IF
294 END IF
295 IF( info.EQ.0 ) THEN
296 IF( .NOT.lquery . and. lwork.LT.mnwrk ) info = -15
297 END IF
298*
299* Return if some argument is incorrect
300*
301 IF( info.NE.0 ) THEN
302 CALL pxerbla( ictxt, 'PSROT', -info )
303 RETURN
304 ELSEIF( lquery ) THEN
305 work( 1 ) = float(mnwrk)
306 RETURN
307 END IF
308*
309* Quick return if possible
310*
311 IF( n.EQ.0 ) THEN
312 RETURN
313 END IF
314*
315* Extract local leading dimensions
316*
317 lldx = descx( lld_ )
318 lldy = descy( lld_ )
319*
320* If we have only one process, use the corresponding LAPACK
321* routine and return
322*
323 IF( nprocs.EQ.1 ) THEN
324 IF( left ) THEN
325 CALL srot( n, x((jx-1)*lldx+ix), lldx, y((jy-1)*lldy+iy),
326 $ lldy, cs, sn )
327 ELSEIF( right ) THEN
328 CALL srot( n, x((jx-1)*lldx+ix), 1, y((jy-1)*lldy+iy),
329 $ 1, cs, sn )
330 END IF
331 RETURN
332 END IF
333*
334* Exchange data between processors if necessary and perform planar
335* rotation
336*
337 IF( left ) THEN
338 DO 10 indx = 1, npcol
339 IF( myrow.EQ.rsrc1 .AND. xycols.GT.0 ) THEN
340 IF( indx.EQ.1 ) THEN
341 jxx = jx
342 ELSE
343 jxx = jx-icoffxy + (indx-1)*nb
344 END IF
345 CALL infog2l( ix, jxx, descx, nprow, npcol, myrow,
346 $ mycol, xloc1, xloc2, rsrc, csrc )
347 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc ) THEN
348 IF( rsrc1.NE.rsrc2 ) THEN
349 CALL sgesd2d( ictxt, 1, xycols,
350 $ x((xloc2-1)*lldx+xloc1), lldx,
351 $ rsrc2, csrc )
352 CALL sgerv2d( ictxt, 1, xycols, work, 1,
353 $ rsrc2, csrc )
354 CALL srot( xycols, x((xloc2-1)*lldx+xloc1),
355 $ lldx, work, 1, cs, sn )
356 ELSE
357 CALL infog2l( iy, jxx, descy, nprow, npcol,
358 $ myrow, mycol, yloc1, yloc2, rsrc,
359 $ csrc )
360 CALL srot( xycols, x((xloc2-1)*lldx+xloc1),
361 $ lldx, y((yloc2-1)*lldy+yloc1), lldy, cs,
362 $ sn )
363 END IF
364 END IF
365 END IF
366 IF( myrow.EQ.rsrc2 .AND. rsrc1.NE.rsrc2 ) THEN
367 IF( indx.EQ.1 ) THEN
368 jyy = jy
369 ELSE
370 jyy = jy-icoffxy + (indx-1)*nb
371 END IF
372 CALL infog2l( iy, jyy, descy, nprow, npcol, myrow,
373 $ mycol, yloc1, yloc2, rsrc, csrc )
374 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc ) THEN
375 CALL sgesd2d( ictxt, 1, xycols,
376 $ y((yloc2-1)*lldy+yloc1), lldy,
377 $ rsrc1, csrc )
378 CALL sgerv2d( ictxt, 1, xycols, work, 1,
379 $ rsrc1, csrc )
380 CALL srot( xycols, work, 1, y((yloc2-1)*lldy+yloc1),
381 $ lldy, cs, sn )
382 END IF
383 END IF
384 10 CONTINUE
385 ELSEIF( right ) THEN
386 DO 20 indx = 1, nprow
387 IF( mycol.EQ.csrc1 .AND. xyrows.GT.0 ) THEN
388 IF( indx.EQ.1 ) THEN
389 ixx = ix
390 ELSE
391 ixx = ix-iroffxy + (indx-1)*mb
392 END IF
393 CALL infog2l( ixx, jx, descx, nprow, npcol, myrow,
394 $ mycol, xloc1, xloc2, rsrc, csrc )
395 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc ) THEN
396 IF( csrc1.NE.csrc2 ) THEN
397 CALL sgesd2d( ictxt, xyrows, 1,
398 $ x((xloc2-1)*lldx+xloc1), lldx,
399 $ rsrc, csrc2 )
400 CALL sgerv2d( ictxt, xyrows, 1, work, xyrows,
401 $ rsrc, csrc2 )
402 CALL srot( xyrows, x((xloc2-1)*lldx+xloc1),
403 $ 1, work, 1, cs, sn )
404 ELSE
405 CALL infog2l( ixx, jy, descy, nprow, npcol,
406 $ myrow, mycol, yloc1, yloc2, rsrc,
407 $ csrc )
408 CALL srot( xyrows, x((xloc2-1)*lldx+xloc1),
409 $ 1, y((yloc2-1)*lldy+yloc1), 1, cs,
410 $ sn )
411 END IF
412 END IF
413 END IF
414 IF( mycol.EQ.csrc2 .AND. csrc1.NE.csrc2 ) THEN
415 IF( indx.EQ.1 ) THEN
416 iyy = iy
417 ELSE
418 iyy = iy-iroffxy + (indx-1)*mb
419 END IF
420 CALL infog2l( iyy, jy, descy, nprow, npcol, myrow,
421 $ mycol, yloc1, yloc2, rsrc, csrc )
422 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc ) THEN
423 CALL sgesd2d( ictxt, xyrows, 1,
424 $ y((yloc2-1)*lldy+yloc1), lldy,
425 $ rsrc, csrc1 )
426 CALL sgerv2d( ictxt, xyrows, 1, work, xyrows,
427 $ rsrc, csrc1 )
428 CALL srot( xyrows, work, 1, y((yloc2-1)*lldy+yloc1),
429 $ 1, cs, sn )
430 END IF
431 END IF
432 20 CONTINUE
433 END IF
434*
435* Store minimum workspace requirements in WORK-array and return
436*
437 work( 1 ) = float(mnwrk)
438 RETURN
439*
440* End of PSROT
441*
442 END
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine psrot(n, x, ix, jx, descx, incx, y, iy, jy, descy, incy, cs, sn, work, lwork, info)
Definition psrot.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2