ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzsepqtq.f
Go to the documentation of this file.
1 *
2 *
3  SUBROUTINE pzsepqtq( MS, NV, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC,
4  $ DESCC, PROCDIST, ICLUSTR, GAP, WORK, LWORK,
5  $ QTQNRM, INFO, RES )
6 *
7 * -- ScaLAPACK routine (version 1.7) --
8 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9 * and University of California, Berkeley.
10 * May 1, 1997
11 *
12 * .. Scalar Arguments ..
13  INTEGER IC, INFO, IQ, JC, JQ, LWORK, MS, NV, RES
14  DOUBLE PRECISION QTQNRM, THRESH
15 * ..
16 * .. Array Arguments ..
17 *
18  INTEGER DESCC( * ), DESCQ( * ), ICLUSTR( * ),
19  $ PROCDIST( * )
20  DOUBLE PRECISION GAP( * ), WORK( * )
21  COMPLEX*16 C( * ), Q( * )
22 * ..
23 *
24 * Notes
25 * =====
26 *
27 * Each global data object is described by an associated description
28 * vector. This vector stores the information required to establish
29 * the mapping between an object element and its corresponding process
30 * and memory location.
31 *
32 * Let A be a generic term for any 2D block cyclicly distributed array.
33 * Such a global array has an associated description vector DESCA.
34 * In the following comments, the character _ should be read as
35 * "of the global array".
36 *
37 * NOTATION STORED IN EXPLANATION
38 * --------------- -------------- --------------------------------------
39 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
40 * DTYPE_A = 1.
41 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42 * the BLACS process grid A is distribu-
43 * ted over. The context itself is glo-
44 * bal, but the handle (the integer
45 * value) may vary.
46 * M_A (global) DESCA( M_ ) The number of rows in the global
47 * array A.
48 * N_A (global) DESCA( N_ ) The number of columns in the global
49 * array A.
50 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
51 * the rows of the array.
52 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
53 * the columns of the array.
54 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55 * row of the array A is distributed.
56 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57 * first column of the array A is
58 * distributed.
59 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
60 * array. LLD_A >= MAX(1,LOCr(M_A)).
61 *
62 * Let K be the number of rows or columns of a distributed matrix,
63 * and assume that its process grid has dimension p x q.
64 * LOCr( K ) denotes the number of elements of K that a process
65 * would receive if K were distributed over the p processes of its
66 * process column.
67 * Similarly, LOCc( K ) denotes the number of elements of K that a
68 * process would receive if K were distributed over the q processes of
69 * its process row.
70 * The values of LOCr() and LOCc() may be determined via a call to the
71 * ScaLAPACK tool function, NUMROC:
72 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74 * An upper bound for these quantities may be computed by:
75 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77 *
78 * Purpose
79 * =======
80 *
81 * Compute |I - QT * Q| / (ulp * n)
82 *
83 * Arguments
84 * =========
85 *
86 * NP = number of local rows in C
87 * NQ = number of local columns in C and Q
88 *
89 * MS (global input) INTEGER
90 * Matrix size.
91 * The number of global rows in Q
92 *
93 * NV (global input) INTEGER
94 * Number of eigenvectors
95 * The number of global columns in C and Q
96 *
97 * THRESH (global input) DOUBLE PRECISION
98 * A test will count as "failed" if the "error", computed as
99 * described below, exceeds THRESH. Note that the error
100 * is scaled to be O(1), so THRESH should be a reasonably
101 * small multiple of 1, e.g., 10 or 100. In particular,
102 * it should not depend on the precision (single vs. double)
103 * or the size of the matrix. It must be at least zero.
104 *
105 * Q (local input) COMPLEX*16 array,
106 * global dimension (MS, NV), local dimension (LDQ, NQ)
107 *
108 * Contains the eigenvectors as computed by PZSTEIN
109 *
110 * IQ (global input) INTEGER
111 * Q's global row index, which points to the beginning of the
112 * submatrix which is to be operated on.
113 *
114 * JQ (global input) INTEGER
115 * Q's global column index, which points to the beginning of
116 * the submatrix which is to be operated on.
117 *
118 * DESCQ (global and local input) INTEGER array of dimension DLEN_.
119 * The array descriptor for the distributed matrix Q.
120 *
121 * C (local workspace) COMPLEX*16 array,
122 * global dimension (NV, NV), local dimension (DESCC(DLEN_), NQ)
123 *
124 * Accumulator for computing I - QT * Q
125 *
126 * IC (global input) INTEGER
127 * C's global row index, which points to the beginning of the
128 * submatrix which is to be operated on.
129 *
130 * JC (global input) INTEGER
131 * C's global column index, which points to the beginning of
132 * the submatrix which is to be operated on.
133 *
134 * DESCC (global and local input) INTEGER array of dimension DLEN_.
135 * The array descriptor for the distributed matrix C.
136 *
137 * W (input) DOUBLE PRECISION array, dimension (NV)
138 * All procesors have an identical copy of W()
139 *
140 * Contains the computed eigenvalues
141 *
142 * PROCDIST (global input) INTEGER array dimension (NPROW*NPCOL+1)
143 * Identifies which eigenvectors are the last to be computed
144 * by a given process
145 *
146 * ICLUSTR (global input) INTEGER array dimension (2*P)
147 * This input array contains indices of eigenvectors
148 * corresponding to a cluster of eigenvalues that could not be
149 * orthogonalized due to insufficient workspace.
150 * This should be the output of PZSTEIN.
151 *
152 * GAP (global input) DOUBLE PRECISION array, dimension (P)
153 * This input array contains the gap between eigenvalues whose
154 * eigenvectors could not be orthogonalized.
155 *
156 * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
157 *
158 * LWORK (local input) INTEGER
159 * The length of the array WORK.
160 * LWORK >= 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 )
161 * Where:
162 * NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW )
163 * MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL )
164 *
165 * QTQNRM (global output) DOUBLE PRECISION
166 * |QTQ -I| / EPS
167 *
168 * RES (global output) INTEGER
169 * 0 if the test passes i.e. |I - QT * Q| / (ulp * n) <= THRESH
170 * 1 if the test fails i.e. |I - QT * Q| / (ulp * n) > THRESH
171 *
172 *
173 * .. Parameters ..
174 *
175  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
176  $ MB_, NB_, RSRC_, CSRC_, LLD_
177  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
178  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
179  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
180  COMPLEX*16 ZERO, ONE, NEGONE
181  PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0,
182  $ negone = -1.0d+0 )
183 * ..
184 * .. Intrinsic Functions ..
185 *
186  INTRINSIC dble, dcmplx, max
187 * ..
188 * .. Local Scalars ..
189  INTEGER CLUSTER, FIRSTP, IMAX, IMIN, JMAX, JMIN, LWMIN,
190  $ MQ0, MYCOL, MYROW, NEXTP, NP0, NPCOL, NPROW
191  DOUBLE PRECISION NORM, QTQNRM2, ULP
192 * ..
193 * .. External Functions ..
194  INTEGER NUMROC
195  DOUBLE PRECISION PDLAMCH, PZLANGE
196  EXTERNAL numroc, pdlamch, pzlange
197 * ..
198 * .. External Subroutines ..
199  EXTERNAL blacs_gridinfo, chk1mat, pxerbla, pzgemm,
200  $ pzlaset, pzmatadd
201 * ..
202 * .. Executable Statements ..
203 * This is just to keep ftnchek happy
204  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
205  $ rsrc_.LT.0 )RETURN
206 *
207 *
208  res = 0
209  ulp = pdlamch( descc( ctxt_ ), 'P' )
210 *
211  CALL blacs_gridinfo( descc( ctxt_ ), nprow, npcol, myrow, mycol )
212  info = 0
213  CALL chk1mat( ms, 1, ms, 2, iq, jq, descq, 7, info )
214  CALL chk1mat( nv, 1, ms, 2, ic, jc, descc, 11, info )
215 *
216  IF( info.EQ.0 ) THEN
217  np0 = numroc( nv, descc( mb_ ), 0, 0, nprow )
218  mq0 = numroc( nv, descc( nb_ ), 0, 0, npcol )
219 *
220  lwmin = 2 + max( descc( mb_ ), 2 )*( 2*np0+mq0 )
221 *
222  IF( iq.NE.1 ) THEN
223  info = -5
224  ELSE IF( jq.NE.1 ) THEN
225  info = -6
226  ELSE IF( ic.NE.1 ) THEN
227  info = -9
228  ELSE IF( jc.NE.1 ) THEN
229  info = -10
230  ELSE IF( lwork.LT.lwmin ) THEN
231  info = -16
232  END IF
233  END IF
234 *
235  IF( info.NE.0 ) THEN
236  CALL pxerbla( descc( ctxt_ ), 'PZSEPQTQ', -info )
237  RETURN
238  END IF
239 *
240 * C = Identity matrix
241 *
242  CALL pzlaset( 'A', nv, nv, zero, one, c, ic, jc, descc )
243 *
244 * C = C - QT * Q
245 *
246  IF( nv*ms.GT.0 ) THEN
247  CALL pzgemm( 'Conjugate transpose', 'N', nv, nv, ms, negone, q,
248  $ 1, 1, descq, q, 1, 1, descq, one, c, 1, 1, descc )
249  END IF
250 *
251 * Allow for poorly orthogonalized eigenvectors for large clusters
252 *
253  norm = pzlange( '1', nv, nv, c, 1, 1, descc, work )
254  qtqnrm = norm / ( dble( max( ms, 1 ) )*ulp )
255 *
256  cluster = 1
257  10 CONTINUE
258  DO 20 firstp = 1, nprow*npcol
259  IF( procdist( firstp ).GE.iclustr( 2*( cluster-1 )+1 ) )
260  $ GO TO 30
261  20 CONTINUE
262  30 CONTINUE
263 *
264  imin = iclustr( 2*cluster-1 )
265  jmax = iclustr( 2*cluster )
266 *
267 *
268  IF( imin.EQ.0 )
269  $ GO TO 60
270 *
271  DO 40 nextp = firstp, nprow*npcol
272  imax = procdist( nextp )
273  jmin = imax + 1
274 *
275 *
276  CALL pzmatadd( imax-imin+1, jmax-jmin+1, zero, c, imin, jmin,
277  $ descc, dcmplx( gap( cluster ) / 0.01d+0 ), c,
278  $ imin, jmin, descc )
279  CALL pzmatadd( jmax-jmin+1, imax-imin+1, zero, c, jmin, imin,
280  $ descc, dcmplx( gap( cluster ) / 0.01d+0 ), c,
281  $ jmin, imin, descc )
282  imin = imax
283 *
284  IF( iclustr( 2*cluster ).LT.procdist( nextp+1 ) )
285  $ GO TO 50
286  40 CONTINUE
287  50 CONTINUE
288 *
289  cluster = cluster + 1
290  GO TO 10
291  60 CONTINUE
292 *
293 * Compute the norm of C
294 *
295  norm = pzlange( '1', nv, nv, c, 1, 1, descc, work )
296 *
297  qtqnrm2 = norm / ( dble( max( ms, 1 ) )*ulp )
298 *
299  IF( qtqnrm2.GT.thresh ) THEN
300  res = 1
301  qtqnrm = qtqnrm2
302  END IF
303  RETURN
304 *
305 * End of PZSEPQTQ
306 *
307  END
max
#define max(A, B)
Definition: pcgemr.c:180
pzlaset
subroutine pzlaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: pzblastst.f:7509
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pzmatadd
subroutine pzmatadd(M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC)
Definition: pzmatadd.f:3
pzsepqtq
subroutine pzsepqtq(MS, NV, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC, DESCC, PROCDIST, ICLUSTR, GAP, WORK, LWORK, QTQNRM, INFO, RES)
Definition: pzsepqtq.f:6