ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlatran.f
Go to the documentation of this file.
1  SUBROUTINE pdlatran( N, NB, A, IA, JA, DESCA, WORK )
2 *
3 * -- ScaLAPACK auxiliary routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * October 15, 1999
7 *
8 * .. Scalar Arguments ..
9  INTEGER IA, JA, N, NB
10 * ..
11 * .. Array Arguments ..
12  INTEGER DESCA( * )
13  DOUBLE PRECISION A( * ), WORK( * )
14 * ..
15 *
16 * Purpose
17 *
18 * =======
19 *
20 * PDLATRAN transpose a lower triangular matrix on to the upper
21 * triangular portion of the same matrix.
22 *
23 * This is an auxiliary routine called by PDSYTRD.
24 *
25 * Notes
26 * =====
27 *
28 * IA must equal 1
29 * JA must equal 1
30 * DESCA( MB_ ) must equal 1
31 * DESCA( NB_ ) must equal 1
32 * DESCA( RSRC_ ) must equal 1
33 * DESCA( CSRC_ ) must equal 1
34 *
35 *
36 * Arguments
37 * =========
38 *
39 * N (global input) INTEGER
40 * The size of the matrix to be transposed.
41 *
42 * NB (global input) INTEGER
43 * The number of rows and columns to be transposed with each
44 * message sent. NB has no impact on the result, it is striclty
45 * a performance tuning parameter.
46 *
47 * A (local input/local output) COMPLEX*16 pointer into the
48 * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
49 * On entry, this array contains the local pieces of the
50 * Hermitian distributed matrix sub( A ). On entry, the
51 * leading N-by-N upper triangular part of sub( A ) contains
52 * the upper triangular part of the matrix. On exit, the
53 * leading N-by-N lower triangular part of sub( A ) contains the
54 * lower triangular part of the matrix, and its strictly upper
55 * triangular part is undefined (and may have been modified).
56 *
57 * IA (global input) INTEGER
58 * A's global row index, which points to the beginning of the
59 * submatrix which is to be operated on.
60 * Must be equal to 1.
61 *
62 * JA (global input) INTEGER
63 * A's global column index, which points to the beginning of
64 * the submatrix which is to be operated on.
65 * Must be equal to 1.
66 *
67 * DESCA (global and local input) INTEGER array of dimension DLEN_.
68 * The array descriptor for the distributed matrix A.
69 * DESCA( MB_ ) must equal 1
70 * DESCA( NB_ ) must equal 1
71 * DESCA( ICTXT_ ) must point to a square process grid
72 * i.e. one where NPROW is equal to NPCOL
73 *
74 * WORK (local workspace) COMPLEX*16 array, dimension ( LWORK )
75 *
76 * Where:
77 * LWORK >= NB * NUMROC( N, 1, 0, 0, NPROW )
78 *
79 * =====================================================================
80 *
81 * .. Parameters ..
82  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
83  $ MB_, NB_, RSRC_, CSRC_, LLD_
84  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
85  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
86  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
87 * ..
88 * .. Local Scalars ..
89  INTEGER I, ICTXT, IRECV, ISEND, J, JJ, JRECV, JSEND,
90  $ LDA, MAXIRECV, MAXISEND, MAXJRECV, MAXJSEND,
91  $ MINIRECV, MINISEND, MINJRECV, MINJSEND, MYCOL,
92  $ MYROW, NP, NPCOL, NPROW, NQ, RECVNB, SENDNB,
93  $ STARTCOL, STARTROW
94 * ..
95 * .. External Subroutines ..
96  EXTERNAL blacs_gridinfo, dtrrv2d, dtrsd2d
97 * ..
98 * .. External Functions ..
99  INTEGER NUMROC
100  EXTERNAL numroc
101 * ..
102 * .. Intrinsic Functions ..
103  INTRINSIC max, min
104 * ..
105 * .. Executable Statements ..
106 * This is just to keep ftnchek and toolpack/1 happy
107  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
108  $ rsrc_.LT.0 )RETURN
109 *
110 * Further details
111 *
112 * Because the processor grid is square each process needs only send
113 * data to its transpose process. (Likewsie it need only receive
114 * data from its transpose process.) Because the data decomposition
115 * is cyclic, the local portion of the array is triangular.
116 *
117 * This routine requires that the data be buffered (i.e. copied)
118 * on the sending process (because of the triangular shape) and
119 * unbuffered on the receiving process. Hence, two local memory to
120 * memory copies are performed within the communications routines
121 * followed by a memory to memory copy outside of the communications
122 * routines. It would be nice to avoid having back to back memory
123 * to memory copies (as we do presently on the receiving processor).
124 * This could be done by packaging the data ourselves in the sender
125 * and then unpacking it directly into the matrix. However, this
126 * code seems cleaner and so since this routine is not a significant
127 * performance bottleneck we have left it this way.
128 *
129 *
130 *
131 *
132 * Quick return if possible
133 *
134  IF( n.LE.0 )
135  $ RETURN
136 *
137  ictxt = desca( ctxt_ )
138  lda = desca( lld_ )
139  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
140 *
141 *
142  np = numroc( n, 1, myrow, 0, nprow )
143  nq = numroc( n, 1, mycol, 0, npcol )
144 *
145 *
146  IF( myrow.EQ.mycol ) THEN
147 *
148  DO 20 j = 1, np
149  DO 10 i = j + 1, nq
150  a( j+( i-1 )*lda ) = a( i+( j-1 )*lda )
151  10 CONTINUE
152  20 CONTINUE
153 *
154  ELSE
155  IF( myrow.GT.mycol ) THEN
156  startrow = 1
157  startcol = 2
158  ELSE
159  IF( myrow.EQ.mycol ) THEN
160  startrow = 2
161  startcol = 2
162  ELSE
163  startrow = 2
164  startcol = 1
165  END IF
166  END IF
167 *
168  DO 50 jj = 1, max( np, nq ), nb
169  minjsend = startcol + jj - 1
170  minjrecv = startrow + jj - 1
171  maxjsend = min( minjsend+nb-1, nq )
172  maxjrecv = min( minjrecv+nb-1, np )
173 *
174  sendnb = maxjsend - minjsend + 1
175  recvnb = maxjrecv - minjrecv + 1
176 *
177  minisend = 1
178  minirecv = 1
179  maxisend = min( np, jj+sendnb-1 )
180  maxirecv = min( nq, jj+recvnb-1 )
181 *
182  isend = maxisend - minisend + 1
183  irecv = maxirecv - minirecv + 1
184  jsend = maxjsend - minjsend + 1
185  jrecv = maxjrecv - minjrecv + 1
186 *
187 *
188 *
189  DO 40 j = minjrecv, maxjrecv
190  DO 30 i = minirecv, maxirecv + j - maxjrecv
191  work( i+( j-minjrecv )*irecv ) = a( j+( i-1 )*lda )
192  30 CONTINUE
193  40 CONTINUE
194 *
195  IF( irecv.GT.0 .AND. jrecv.GT.0 )
196  $ CALL dtrsd2d( ictxt, 'U', 'N', irecv, jrecv, work, irecv,
197  $ mycol, myrow )
198 *
199  IF( isend.GT.0 .AND. jsend.GT.0 )
200  $ CALL dtrrv2d( ictxt, 'U', 'N', isend, jsend,
201  $ a( minisend+( minjsend-1 )*lda ), lda,
202  $ mycol, myrow )
203 *
204 *
205  50 CONTINUE
206 *
207  END IF
208 *
209  RETURN
210 *
211 * End of PDLATRD
212 *
213  END
max
#define max(A, B)
Definition: pcgemr.c:180
pdlatran
subroutine pdlatran(N, NB, A, IA, JA, DESCA, WORK)
Definition: pdlatran.f:2
min
#define min(A, B)
Definition: pcgemr.c:181