ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
psrot.f
Go to the documentation of this file.
1  SUBROUTINE psrot( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY,
2  $ INCY, CS, SN, WORK, LWORK, INFO )
3 *
4 * Contribution from the Department of Computing Science and HPC2N,
5 * Umea University, Sweden
6 *
7 * -- ScaLAPACK auxiliary routine (version 2.0.1) --
8 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9 * Univ. of Colorado Denver and University of California, Berkeley.
10 * January, 2012
11 *
12  IMPLICIT NONE
13 *
14 * .. Scalar Arguments ..
15  INTEGER N, IX, JX, INCX, IY, JY, INCY, LWORK, INFO
16  REAL CS, SN
17 * ..
18 * .. Array Arguments ..
19  INTEGER DESCX( * ), DESCY( * )
20  REAL X( * ), Y( * ), WORK( * )
21 * ..
22 *
23 * Purpose
24 * =======
25 * PSROT applies a planar rotation defined by CS and SN to the
26 * two distributed vectors sub(X) and sub(Y).
27 *
28 * Notes
29 * =====
30 *
31 * Each global data object is described by an associated description
32 * vector. This vector stores the information required to establish
33 * the mapping between an object element and its corresponding process
34 * and memory location.
35 *
36 * Let A be a generic term for any 2D block cyclicly distributed array.
37 * Such a global array has an associated description vector DESCA.
38 * In the following comments, the character _ should be read as
39 * "of the global array".
40 *
41 * NOTATION STORED IN EXPLANATION
42 * --------------- -------------- --------------------------------------
43 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
44 * DTYPE_A = 1.
45 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
46 * the BLACS process grid A is distribu-
47 * ted over. The context itself is glo-
48 * bal, but the handle (the integer
49 * value) may vary.
50 * M_A (global) DESCA( M_ ) The number of rows in the global
51 * array A.
52 * N_A (global) DESCA( N_ ) The number of columns in the global
53 * array A.
54 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
55 * the rows of the array.
56 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
57 * the columns of the array.
58 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
59 * row of the array A is distributed.
60 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
61 * first column of the array A is
62 * distributed.
63 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
64 * array. LLD_A >= MAX(1,LOCr(M_A)).
65 *
66 * Let K be the number of rows or columns of a distributed matrix,
67 * and assume that its process grid has dimension p x q.
68 * LOCr( K ) denotes the number of elements of K that a process
69 * would receive if K were distributed over the p processes of its
70 * process column.
71 * Similarly, LOCc( K ) denotes the number of elements of K that a
72 * process would receive if K were distributed over the q processes of
73 * its process row.
74 * The values of LOCr() and LOCc() may be determined via a call to the
75 * ScaLAPACK tool function, NUMROC:
76 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
77 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
78 * An upper bound for these quantities may be computed by:
79 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
80 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
81 *
82 * Arguments
83 * =========
84 *
85 * N (global input) INTEGER
86 * The number of elements to operate on when applying the planar
87 * rotation to X and Y. N>=0.
88 *
89 * X (local input/local output) DOUBLE PRECSION array of dimension
90 * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
91 * This array contains the entries of the distributed vector
92 * sub( X ).
93 *
94 * IX (global input) INTEGER
95 * The global row index of the submatrix of the distributed
96 * matrix X to operate on. If INCX = 1, then it is required
97 * that IX = IY. 1 <= IX <= M_X.
98 *
99 * JX (global input) INTEGER
100 * The global column index of the submatrix of the distributed
101 * matrix X to operate on. If INCX = M_X, then it is required
102 * that JX = JY. 1 <= IX <= N_X.
103 *
104 * DESCX (global and local input) INTEGER array of dimension 9
105 * The array descriptor of the distributed matrix X.
106 *
107 * INCX (global input) INTEGER
108 * The global increment for the elements of X. Only two values
109 * of INCX are supported in this version, namely 1 and M_X.
110 * Moreover, it must hold that INCX = M_X if INCY = M_Y and
111 * that INCX = 1 if INCY = 1.
112 *
113 * Y (local input/local output) DOUBLE PRECSION array of dimension
114 * ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
115 * This array contains the entries of the distributed vector
116 * sub( Y ).
117 *
118 * IY (global input) INTEGER
119 * The global row index of the submatrix of the distributed
120 * matrix Y to operate on. If INCY = 1, then it is required
121 * that IY = IX. 1 <= IY <= M_Y.
122 *
123 * JY (global input) INTEGER
124 * The global column index of the submatrix of the distributed
125 * matrix Y to operate on. If INCY = M_X, then it is required
126 * that JY = JX. 1 <= JY <= N_Y.
127 *
128 * DESCY (global and local input) INTEGER array of dimension 9
129 * The array descriptor of the distributed matrix Y.
130 *
131 * INCY (global input) INTEGER
132 * The global increment for the elements of Y. Only two values
133 * of INCY are supported in this version, namely 1 and M_Y.
134 * Moreover, it must hold that INCY = M_Y if INCX = M_X and
135 * that INCY = 1 if INCX = 1.
136 *
137 * CS (global input) REAL
138 * SN (global input) REAL
139 * The parameters defining the properties of the planar
140 * rotation. It must hold that 0 <= CS,SN <= 1 and that
141 * SN**2 + CS**2 = 1. The latter is hardly checked in
142 * finite precision arithmetics.
143 *
144 * WORK (local input) REAL array of dimension LWORK
145 * Local workspace area.
146 *
147 * LWORK (local input) INTEGER
148 * The length of the workspace array WORK.
149 * If INCX = 1 and INCY = 1, then LWORK = 2*MB_X
150 *
151 * If LWORK = -1, then a workspace query is assumed; the
152 * routine only calculates the optimal size of the WORK array,
153 * returns this value as the first entry of the IWORK array, and
154 * no error message related to LIWORK is issued by PXERBLA.
155 *
156 * INFO (global output) INTEGER
157 * = 0: successful exit
158 * < 0: if INFO = -i, the i-th argument had an illegal value.
159 * If the i-th argument is an array and the j-entry had
160 * an illegal value, then INFO = -(i*100+j), if the i-th
161 * argument is a scalar and had an illegal value, then INFO = -i.
162 *
163 * Additional requirements
164 * =======================
165 *
166 * The following alignment requirements must hold:
167 * (a) DESCX( MB_ ) = DESCY( MB_ ) and DESCX( NB_ ) = DESCY( NB_ )
168 * (b) DESCX( RSRC_ ) = DESCY( RSRC_ )
169 * (c) DESCX( CSRC_ ) = DESCY( CSRC_ )
170 *
171 * =====================================================================
172 *
173 * Written by Robert Granat, May 15, 2007.
174 *
175 * .. Parameters ..
176  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
177  $ lld_, mb_, m_, nb_, n_, rsrc_
178  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
179  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
180  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
181 * ..
182 * .. Local Scalars ..
183  LOGICAL LQUERY, LEFT, RIGHT
184  INTEGER ICTXT, NPROW, NPCOL, MYROW, MYCOL, NPROCS,
185  $ mb, nb, xyrows, xycols, rsrc1, rsrc2, csrc1,
186  $ csrc2, icoffxy, iroffxy, mnwrk, lldx, lldy,
187  $ indx, jxx, xloc1, xloc2, rsrc, csrc, yloc1,
188  $ yloc2, jyy, ixx, iyy
189 * ..
190 * .. External Functions ..
191  INTEGER NUMROC, INDXG2P, INDXG2L
192  EXTERNAL numroc, indxg2p, indxg2l
193 * ..
194 * .. External Subroutines ..
195  EXTERNAL srot
196 * ..
197 * .. Intrinsic Functions ..
198  INTRINSIC max, min
199 * ..
200 * .. Local Functions ..
201  INTEGER ICEIL
202 * ..
203 * .. Executable Statements ..
204 *
205 * Get grid parameters
206 *
207  ictxt = descx( ctxt_ )
208  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
209  nprocs = nprow*npcol
210 *
211 * Test and decode parameters
212 *
213  lquery = lwork.EQ.-1
214  info = 0
215  IF( n.LT.0 ) THEN
216  info = -1
217  ELSEIF( ix.LT.1 .OR. ix.GT.descx(m_) ) THEN
218  info = -3
219  ELSEIF( jx.LT.1 .OR. jx.GT.descx(n_) ) THEN
220  info = -4
221  ELSEIF( incx.NE.1 .AND. incx.NE.descx(m_) ) THEN
222  info = -6
223  ELSEIF( iy.LT.1 .OR. iy.GT.descy(m_) ) THEN
224  info = -8
225  ELSEIF( jy.LT.1 .OR. jy.GT.descy(n_) ) THEN
226  info = -9
227  ELSEIF( incy.NE.1 .AND. incy.NE.descy(m_) ) THEN
228  info = -11
229  ELSEIF( (incx.EQ.descx(m_) .AND. incy.NE.descy(m_)) .OR.
230  $ (incx.EQ.1 .AND. incy.NE.1 ) ) THEN
231  info = -11
232  ELSEIF( (incx.EQ.1 .AND. incy.EQ.1) .AND.
233  $ ix.NE.iy ) THEN
234  info = -8
235  ELSEIF( (incx.EQ.descx(m_) .AND. incy.EQ.descy(m_)) .AND.
236  $ jx.NE.jy ) THEN
237  info = -9
238  END IF
239 *
240 * Compute the direction of the planar rotation
241 *
242  left = incx.EQ.descx(m_) .AND. incy.EQ.descy(m_)
243  right = incx.EQ.1 .AND. incy.EQ.1
244 *
245 * Check blocking factors and root processor
246 *
247  IF( info.EQ.0 ) THEN
248  IF( left .AND. descx(nb_).NE.descy(nb_) ) THEN
249  info = -(100*5 + nb_)
250  END IF
251  IF( right .AND. descx(mb_).NE.descy(nb_) ) THEN
252  info = -(100*10 + mb_)
253  END IF
254  END IF
255  IF( info.EQ.0 ) THEN
256  IF( left .AND. descx(csrc_).NE.descy(csrc_) ) THEN
257  info = -(100*5 + csrc_)
258  END IF
259  IF( right .AND. descx(rsrc_).NE.descy(rsrc_) ) THEN
260  info = -(100*10 + rsrc_)
261  END IF
262  END IF
263 *
264 * Compute workspace
265 *
266  mb = descx( mb_ )
267  nb = descx( nb_ )
268  IF( left ) THEN
269  rsrc1 = indxg2p( ix, mb, myrow, descx(rsrc_), nprow )
270  rsrc2 = indxg2p( iy, mb, myrow, descy(rsrc_), nprow )
271  csrc = indxg2p( jx, nb, mycol, descx(csrc_), npcol )
272  icoffxy = mod( jx - 1, nb )
273  xycols = numroc( n+icoffxy, nb, mycol, csrc, npcol )
274  IF( ( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc2 ) .AND.
275  $ mycol.EQ.csrc ) xycols = xycols - icoffxy
276  IF( rsrc1.NE.rsrc2 ) THEN
277  mnwrk = xycols
278  ELSE
279  mnwrk = 0
280  END IF
281  ELSEIF( right ) THEN
282  csrc1 = indxg2p( jx, nb, mycol, descx(csrc_), npcol )
283  csrc2 = indxg2p( jy, nb, mycol, descy(csrc_), npcol )
284  rsrc = indxg2p( ix, mb, myrow, descx(rsrc_), nprow )
285  iroffxy = mod( ix - 1, mb )
286  xyrows = numroc( n+iroffxy, mb, myrow, rsrc, nprow )
287  IF( ( mycol.EQ.csrc1 .OR. mycol.EQ.csrc2 ) .AND.
288  $ myrow.EQ.rsrc ) xyrows = xyrows - iroffxy
289  IF( csrc1.NE.csrc2 ) THEN
290  mnwrk = xyrows
291  ELSE
292  mnwrk = 0
293  END IF
294  END IF
295  IF( info.EQ.0 ) THEN
296  IF( .NOT.lquery . and. lwork.LT.mnwrk ) info = -15
297  END IF
298 *
299 * Return if some argument is incorrect
300 *
301  IF( info.NE.0 ) THEN
302  CALL pxerbla( ictxt, 'PSROT', -info )
303  RETURN
304  ELSEIF( lquery ) THEN
305  work( 1 ) = float(mnwrk)
306  RETURN
307  END IF
308 *
309 * Quick return if possible
310 *
311  IF( n.EQ.0 ) THEN
312  RETURN
313  END IF
314 *
315 * Extract local leading dimensions
316 *
317  lldx = descx( lld_ )
318  lldy = descy( lld_ )
319 *
320 * If we have only one process, use the corresponding LAPACK
321 * routine and return
322 *
323  IF( nprocs.EQ.1 ) THEN
324  IF( left ) THEN
325  CALL srot( n, x((jx-1)*lldx+ix), lldx, y((jy-1)*lldy+iy),
326  $ lldy, cs, sn )
327  ELSEIF( right ) THEN
328  CALL srot( n, x((jx-1)*lldx+ix), 1, y((jy-1)*lldy+iy),
329  $ 1, cs, sn )
330  END IF
331  RETURN
332  END IF
333 *
334 * Exchange data between processors if necessary and perform planar
335 * rotation
336 *
337  IF( left ) THEN
338  DO 10 indx = 1, npcol
339  IF( myrow.EQ.rsrc1 .AND. xycols.GT.0 ) THEN
340  IF( indx.EQ.1 ) THEN
341  jxx = jx
342  ELSE
343  jxx = jx-icoffxy + (indx-1)*nb
344  END IF
345  CALL infog2l( ix, jxx, descx, nprow, npcol, myrow,
346  $ mycol, xloc1, xloc2, rsrc, csrc )
347  IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc ) THEN
348  IF( rsrc1.NE.rsrc2 ) THEN
349  CALL sgesd2d( ictxt, 1, xycols,
350  $ x((xloc2-1)*lldx+xloc1), lldx,
351  $ rsrc2, csrc )
352  CALL sgerv2d( ictxt, 1, xycols, work, 1,
353  $ rsrc2, csrc )
354  CALL srot( xycols, x((xloc2-1)*lldx+xloc1),
355  $ lldx, work, 1, cs, sn )
356  ELSE
357  CALL infog2l( iy, jxx, descy, nprow, npcol,
358  $ myrow, mycol, yloc1, yloc2, rsrc,
359  $ csrc )
360  CALL srot( xycols, x((xloc2-1)*lldx+xloc1),
361  $ lldx, y((yloc2-1)*lldy+yloc1), lldy, cs,
362  $ sn )
363  END IF
364  END IF
365  END IF
366  IF( myrow.EQ.rsrc2 .AND. rsrc1.NE.rsrc2 ) THEN
367  IF( indx.EQ.1 ) THEN
368  jyy = jy
369  ELSE
370  jyy = jy-icoffxy + (indx-1)*nb
371  END IF
372  CALL infog2l( iy, jyy, descy, nprow, npcol, myrow,
373  $ mycol, yloc1, yloc2, rsrc, csrc )
374  IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc ) THEN
375  CALL sgesd2d( ictxt, 1, xycols,
376  $ y((yloc2-1)*lldy+yloc1), lldy,
377  $ rsrc1, csrc )
378  CALL sgerv2d( ictxt, 1, xycols, work, 1,
379  $ rsrc1, csrc )
380  CALL srot( xycols, work, 1, y((yloc2-1)*lldy+yloc1),
381  $ lldy, cs, sn )
382  END IF
383  END IF
384  10 CONTINUE
385  ELSEIF( right ) THEN
386  DO 20 indx = 1, nprow
387  IF( mycol.EQ.csrc1 .AND. xyrows.GT.0 ) THEN
388  IF( indx.EQ.1 ) THEN
389  ixx = ix
390  ELSE
391  ixx = ix-iroffxy + (indx-1)*mb
392  END IF
393  CALL infog2l( ixx, jx, descx, nprow, npcol, myrow,
394  $ mycol, xloc1, xloc2, rsrc, csrc )
395  IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc ) THEN
396  IF( csrc1.NE.csrc2 ) THEN
397  CALL sgesd2d( ictxt, xyrows, 1,
398  $ x((xloc2-1)*lldx+xloc1), lldx,
399  $ rsrc, csrc2 )
400  CALL sgerv2d( ictxt, xyrows, 1, work, xyrows,
401  $ rsrc, csrc2 )
402  CALL srot( xyrows, x((xloc2-1)*lldx+xloc1),
403  $ 1, work, 1, cs, sn )
404  ELSE
405  CALL infog2l( ixx, jy, descy, nprow, npcol,
406  $ myrow, mycol, yloc1, yloc2, rsrc,
407  $ csrc )
408  CALL srot( xyrows, x((xloc2-1)*lldx+xloc1),
409  $ 1, y((yloc2-1)*lldy+yloc1), 1, cs,
410  $ sn )
411  END IF
412  END IF
413  END IF
414  IF( mycol.EQ.csrc2 .AND. csrc1.NE.csrc2 ) THEN
415  IF( indx.EQ.1 ) THEN
416  iyy = iy
417  ELSE
418  iyy = iy-iroffxy + (indx-1)*mb
419  END IF
420  CALL infog2l( iyy, jy, descy, nprow, npcol, myrow,
421  $ mycol, yloc1, yloc2, rsrc, csrc )
422  IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc ) THEN
423  CALL sgesd2d( ictxt, xyrows, 1,
424  $ y((yloc2-1)*lldy+yloc1), lldy,
425  $ rsrc, csrc1 )
426  CALL sgerv2d( ictxt, xyrows, 1, work, xyrows,
427  $ rsrc, csrc1 )
428  CALL srot( xyrows, work, 1, y((yloc2-1)*lldy+yloc1),
429  $ 1, cs, sn )
430  END IF
431  END IF
432  20 CONTINUE
433  END IF
434 *
435 * Store minimum workspace requirements in WORK-array and return
436 *
437  work( 1 ) = float(mnwrk)
438  RETURN
439 *
440 * End of PSROT
441 *
442  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
psrot
subroutine psrot(N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, CS, SN, WORK, LWORK, INFO)
Definition: psrot.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181