ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlauu2.f
Go to the documentation of this file.
1  SUBROUTINE pdlauu2( UPLO, N, A, IA, JA, DESCA )
2 *
3 * -- ScaLAPACK auxiliary routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * .. Scalar Arguments ..
9  CHARACTER UPLO
10  INTEGER IA, JA, N
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * )
14  DOUBLE PRECISION A( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PDLAUU2 computes the product U * U' or L' * L, where the triangular
21 * factor U or L is stored in the upper or lower triangular part of
22 * the matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
23 *
24 * If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
25 * overwriting the factor U in sub( A ).
26 * If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
27 * overwriting the factor L in sub( A ).
28 *
29 * This is the unblocked form of the algorithm, calling Level 2 BLAS.
30 * No communication is performed by this routine, the matrix to operate
31 * on should be strictly local to one process.
32 *
33 * Notes
34 * =====
35 *
36 * Each global data object is described by an associated description
37 * vector. This vector stores the information required to establish
38 * the mapping between an object element and its corresponding process
39 * and memory location.
40 *
41 * Let A be a generic term for any 2D block cyclicly distributed array.
42 * Such a global array has an associated description vector DESCA.
43 * In the following comments, the character _ should be read as
44 * "of the global array".
45 *
46 * NOTATION STORED IN EXPLANATION
47 * --------------- -------------- --------------------------------------
48 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
49 * DTYPE_A = 1.
50 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51 * the BLACS process grid A is distribu-
52 * ted over. The context itself is glo-
53 * bal, but the handle (the integer
54 * value) may vary.
55 * M_A (global) DESCA( M_ ) The number of rows in the global
56 * array A.
57 * N_A (global) DESCA( N_ ) The number of columns in the global
58 * array A.
59 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
60 * the rows of the array.
61 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
62 * the columns of the array.
63 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64 * row of the array A is distributed.
65 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66 * first column of the array A is
67 * distributed.
68 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
69 * array. LLD_A >= MAX(1,LOCr(M_A)).
70 *
71 * Let K be the number of rows or columns of a distributed matrix,
72 * and assume that its process grid has dimension p x q.
73 * LOCr( K ) denotes the number of elements of K that a process
74 * would receive if K were distributed over the p processes of its
75 * process column.
76 * Similarly, LOCc( K ) denotes the number of elements of K that a
77 * process would receive if K were distributed over the q processes of
78 * its process row.
79 * The values of LOCr() and LOCc() may be determined via a call to the
80 * ScaLAPACK tool function, NUMROC:
81 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83 * An upper bound for these quantities may be computed by:
84 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86 *
87 * Arguments
88 * =========
89 *
90 * UPLO (global input) CHARACTER*1
91 * Specifies whether the triangular factor stored in the matrix
92 * sub( A ) is upper or lower triangular:
93 * = 'U': Upper triangular,
94 * = 'L': Lower triangular.
95 *
96 * N (global input) INTEGER
97 * The number of rows and columns to be operated on, i.e. the
98 * order of the order of the triangular factor U or L. N >= 0.
99 *
100 * A (local input/local output) DOUBLE PRECISION pointer into the
101 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
102 * On entry, the local pieces of the triangular factor L or U.
103 * On exit, if UPLO = 'U', the upper triangle of the distributed
104 * matrix sub( A ) is overwritten with the upper triangle of the
105 * product U * U'; if UPLO = 'L', the lower triangle of sub( A )
106 * is overwritten with the lower triangle of the product L' * L.
107 *
108 * IA (global input) INTEGER
109 * The row index in the global array A indicating the first
110 * row of sub( A ).
111 *
112 * JA (global input) INTEGER
113 * The column index in the global array A indicating the
114 * first column of sub( A ).
115 *
116 * DESCA (global and local input) INTEGER array of dimension DLEN_.
117 * The array descriptor for the distributed matrix A.
118 *
119 * =====================================================================
120 *
121 * .. Parameters ..
122  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
123  $ LLD_, MB_, M_, NB_, N_, RSRC_
124  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
125  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
126  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
127  DOUBLE PRECISION ONE
128  parameter( one = 1.0d+0 )
129 * ..
130 * .. Local Scalars ..
131  INTEGER IACOL, IAROW, ICURR, IDIAG, IIA, IOFFA, JJA,
132  $ LDA, MYCOL, MYROW, NA, NPCOL, NPROW
133  DOUBLE PRECISION AII
134 * ..
135 * .. External Subroutines ..
136  EXTERNAL blacs_gridinfo, infog2l, dgemv, dscal
137 * ..
138 * .. External Functions ..
139  LOGICAL LSAME
140  DOUBLE PRECISION DDOT
141  EXTERNAL ddot, lsame
142 * ..
143 * .. Executable Statements ..
144 *
145 * Quick return if possible
146 *
147  IF( n.EQ.0 )
148  $ RETURN
149 *
150 * Get grid parameters and compute local indexes
151 *
152  CALL blacs_gridinfo( desca( ctxt_ ), nprow, npcol, myrow, mycol )
153  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
154  $ iarow, iacol )
155 *
156  IF( myrow.EQ.iarow .AND. mycol.EQ.iacol ) THEN
157 *
158  lda = desca( lld_ )
159  idiag = iia + ( jja - 1 ) * lda
160  ioffa = idiag
161 *
162  IF( lsame( uplo, 'U' ) ) THEN
163 *
164 * Compute the product U * U'.
165 *
166  DO 10 na = n-1, 1, -1
167  aii = a( idiag )
168  icurr = idiag + lda
169  a( idiag ) = aii*aii + ddot( na, a( icurr ), lda,
170  $ a( icurr ), lda )
171  CALL dgemv( 'No transpose', n-na-1, na, one,
172  $ a( ioffa+lda ), lda, a( icurr ), lda, aii,
173  $ a( ioffa ), 1 )
174  idiag = idiag + lda + 1
175  ioffa = ioffa + lda
176  10 CONTINUE
177  aii = a( idiag )
178  CALL dscal( n, aii, a( ioffa ), 1 )
179 *
180  ELSE
181 *
182 * Compute the product L' * L.
183 *
184  DO 20 na = 1, n-1
185  aii = a( idiag )
186  icurr = idiag + 1
187  a(idiag) = aii*aii + ddot( n-na, a( icurr ), 1,
188  $ a( icurr ), 1 )
189  CALL dgemv( 'Transpose', n-na, na-1, one, a( ioffa+1 ),
190  $ lda, a( icurr ), 1, aii, a( ioffa ), lda )
191  idiag = idiag + lda + 1
192  ioffa = ioffa + 1
193  20 CONTINUE
194  aii = a( idiag )
195  CALL dscal( n, aii, a( ioffa ), lda )
196 *
197  END IF
198 *
199  END IF
200 *
201  RETURN
202 *
203 * End of PDLAUU2
204 *
205  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pdlauu2
subroutine pdlauu2(UPLO, N, A, IA, JA, DESCA)
Definition: pdlauu2.f:2