SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pslatran.f
Go to the documentation of this file.
1 SUBROUTINE pslatran( N, NB, A, IA, JA, DESCA, WORK )
2*
3* -- ScaLAPACK auxiliary routine (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* October 15, 1999
7*
8* .. Scalar Arguments ..
9 INTEGER IA, JA, N, NB
10* ..
11* .. Array Arguments ..
12 INTEGER DESCA( * )
13 REAL A( * ), WORK( * )
14* ..
15*
16* Purpose
17*
18* =======
19*
20* PSLATRAN transpose a lower triangular matrix on to the upper
21* triangular portion of the same matrix.
22*
23* This is an auxiliary routine called by PSSYTRD.
24*
25* Notes
26* =====
27*
28* IA must equal 1
29* JA must equal 1
30* DESCA( MB_ ) must equal 1
31* DESCA( NB_ ) must equal 1
32* DESCA( RSRC_ ) must equal 1
33* DESCA( CSRC_ ) must equal 1
34*
35*
36* Arguments
37* =========
38*
39* N (global input) INTEGER
40* The size of the matrix to be transposed.
41*
42* NB (global input) INTEGER
43* The number of rows and columns to be transposed with each
44* message sent. NB has no impact on the result, it is striclty
45* a performance tuning parameter.
46*
47* A (local input/local output) COMPLEX*16 pointer into the
48* local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
49* On entry, this array contains the local pieces of the
50* Hermitian distributed matrix sub( A ). On entry, the
51* leading N-by-N upper triangular part of sub( A ) contains
52* the upper triangular part of the matrix. On exit, the
53* leading N-by-N lower triangular part of sub( A ) contains the
54* lower triangular part of the matrix, and its strictly upper
55* triangular part is undefined (and may have been modified).
56*
57* IA (global input) INTEGER
58* A's global row index, which points to the beginning of the
59* submatrix which is to be operated on.
60* Must be equal to 1.
61*
62* JA (global input) INTEGER
63* A's global column index, which points to the beginning of
64* the submatrix which is to be operated on.
65* Must be equal to 1.
66*
67* DESCA (global and local input) INTEGER array of dimension DLEN_.
68* The array descriptor for the distributed matrix A.
69* DESCA( MB_ ) must equal 1
70* DESCA( NB_ ) must equal 1
71* DESCA( ICTXT_ ) must point to a square process grid
72* i.e. one where NPROW is equal to NPCOL
73*
74* WORK (local workspace) COMPLEX*16 array, dimension ( LWORK )
75*
76* Where:
77* LWORK >= NB * NUMROC( N, 1, 0, 0, NPROW )
78*
79* =====================================================================
80*
81* .. Parameters ..
82 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
83 $ MB_, NB_, RSRC_, CSRC_, LLD_
84 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
85 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
86 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
87* ..
88* .. Local Scalars ..
89 INTEGER I, ICTXT, IRECV, ISEND, J, JJ, JRECV, JSEND,
90 $ LDA, MAXIRECV, MAXISEND, MAXJRECV, MAXJSEND,
91 $ MINIRECV, MINISEND, MINJRECV, MINJSEND, MYCOL,
92 $ MYROW, NP, NPCOL, NPROW, NQ, RECVNB, SENDNB,
93 $ STARTCOL, STARTROW
94* ..
95* .. External Subroutines ..
96 EXTERNAL blacs_gridinfo, strrv2d, strsd2d
97* ..
98* .. External Functions ..
99 INTEGER NUMROC
100 EXTERNAL numroc
101* ..
102* .. Intrinsic Functions ..
103 INTRINSIC max, min
104* ..
105* .. Executable Statements ..
106* This is just to keep ftnchek and toolpack/1 happy
107 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
108 $ rsrc_.LT.0 )RETURN
109*
110* Further details
111*
112* Because the processor grid is square each process needs only send
113* data to its transpose process. (Likewsie it need only receive
114* data from its transpose process.) Because the data decomposition
115* is cyclic, the local portion of the array is triangular.
116*
117* This routine requires that the data be buffered (i.e. copied)
118* on the sending process (because of the triangular shape) and
119* unbuffered on the receiving process. Hence, two local memory to
120* memory copies are performed within the communications routines
121* followed by a memory to memory copy outside of the communications
122* routines. It would be nice to avoid having back to back memory
123* to memory copies (as we do presently on the receiving processor).
124* This could be done by packaging the data ourselves in the sender
125* and then unpacking it directly into the matrix. However, this
126* code seems cleaner and so since this routine is not a significant
127* performance bottleneck we have left it this way.
128*
129*
130*
131*
132* Quick return if possible
133*
134 IF( n.LE.0 )
135 $ RETURN
136*
137 ictxt = desca( ctxt_ )
138 lda = desca( lld_ )
139 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
140*
141*
142 np = numroc( n, 1, myrow, 0, nprow )
143 nq = numroc( n, 1, mycol, 0, npcol )
144*
145*
146 IF( myrow.EQ.mycol ) THEN
147*
148 DO 20 j = 1, np
149 DO 10 i = j + 1, nq
150 a( j+( i-1 )*lda ) = a( i+( j-1 )*lda )
151 10 CONTINUE
152 20 CONTINUE
153*
154 ELSE
155 IF( myrow.GT.mycol ) THEN
156 startrow = 1
157 startcol = 2
158 ELSE
159 IF( myrow.EQ.mycol ) THEN
160 startrow = 2
161 startcol = 2
162 ELSE
163 startrow = 2
164 startcol = 1
165 END IF
166 END IF
167*
168 DO 50 jj = 1, max( np, nq ), nb
169 minjsend = startcol + jj - 1
170 minjrecv = startrow + jj - 1
171 maxjsend = min( minjsend+nb-1, nq )
172 maxjrecv = min( minjrecv+nb-1, np )
173*
174 sendnb = maxjsend - minjsend + 1
175 recvnb = maxjrecv - minjrecv + 1
176*
177 minisend = 1
178 minirecv = 1
179 maxisend = min( np, jj+sendnb-1 )
180 maxirecv = min( nq, jj+recvnb-1 )
181*
182 isend = maxisend - minisend + 1
183 irecv = maxirecv - minirecv + 1
184 jsend = maxjsend - minjsend + 1
185 jrecv = maxjrecv - minjrecv + 1
186*
187*
188*
189 DO 40 j = minjrecv, maxjrecv
190 DO 30 i = minirecv, maxirecv + j - maxjrecv
191 work( i+( j-minjrecv )*irecv ) = a( j+( i-1 )*lda )
192 30 CONTINUE
193 40 CONTINUE
194*
195 IF( irecv.GT.0 .AND. jrecv.GT.0 )
196 $ CALL strsd2d( ictxt, 'U', 'N', irecv, jrecv, work, irecv,
197 $ mycol, myrow )
198*
199 IF( isend.GT.0 .AND. jsend.GT.0 )
200 $ CALL strrv2d( ictxt, 'U', 'N', isend, jsend,
201 $ a( minisend+( minjsend-1 )*lda ), lda,
202 $ mycol, myrow )
203*
204*
205 50 CONTINUE
206*
207 END IF
208*
209 RETURN
210*
211* End of PSLATRD
212*
213 END
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pslatran(n, nb, a, ia, ja, desca, work)
Definition pslatran.f:2