ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pssepqtq.f
Go to the documentation of this file.
1 *
2 *
3  SUBROUTINE pssepqtq( MS, NV, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC,
4  $ DESCC, PROCDIST, ICLUSTR, GAP, WORK, LWORK,
5  $ QTQNRM, INFO, RES )
6 *
7 * -- ScaLAPACK routine (version 1.7) --
8 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9 * and University of California, Berkeley.
10 * May 1, 1997
11 *
12 * .. Scalar Arguments ..
13  INTEGER IC, INFO, IQ, JC, JQ, LWORK, MS, NV, RES
14  REAL QTQNRM, THRESH
15 * ..
16 * .. Array Arguments ..
17 *
18  INTEGER DESCC( * ), DESCQ( * ), ICLUSTR( * ),
19  $ PROCDIST( * )
20  REAL C( * ), GAP( * ), Q( * ), WORK( * )
21 * ..
22 *
23 * Notes
24 * =====
25 *
26 * Each global data object is described by an associated description
27 * vector. This vector stores the information required to establish
28 * the mapping between an object element and its corresponding process
29 * and memory location.
30 *
31 * Let A be a generic term for any 2D block cyclicly distributed array.
32 * Such a global array has an associated description vector DESCA.
33 * In the following comments, the character _ should be read as
34 * "of the global array".
35 *
36 * NOTATION STORED IN EXPLANATION
37 * --------------- -------------- --------------------------------------
38 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
39 * DTYPE_A = 1.
40 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
41 * the BLACS process grid A is distribu-
42 * ted over. The context itself is glo-
43 * bal, but the handle (the integer
44 * value) may vary.
45 * M_A (global) DESCA( M_ ) The number of rows in the global
46 * array A.
47 * N_A (global) DESCA( N_ ) The number of columns in the global
48 * array A.
49 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
50 * the rows of the array.
51 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
52 * the columns of the array.
53 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
54 * row of the array A is distributed.
55 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
56 * first column of the array A is
57 * distributed.
58 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
59 * array. LLD_A >= MAX(1,LOCr(M_A)).
60 *
61 * Let K be the number of rows or columns of a distributed matrix,
62 * and assume that its process grid has dimension p x q.
63 * LOCr( K ) denotes the number of elements of K that a process
64 * would receive if K were distributed over the p processes of its
65 * process column.
66 * Similarly, LOCc( K ) denotes the number of elements of K that a
67 * process would receive if K were distributed over the q processes of
68 * its process row.
69 * The values of LOCr() and LOCc() may be determined via a call to the
70 * ScaLAPACK tool function, NUMROC:
71 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
72 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
73 * An upper bound for these quantities may be computed by:
74 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
75 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
76 *
77 * Purpose
78 * =======
79 *
80 * Compute |I - QT * Q| / (ulp * n)
81 *
82 * Arguments
83 * =========
84 *
85 * NP = number of local rows in C
86 * NQ = number of local columns in C and Q
87 *
88 * MS (global input) INTEGER
89 * Matrix size.
90 * The number of global rows in Q
91 *
92 * NV (global input) INTEGER
93 * Number of eigenvectors
94 * The number of global columns in C and Q
95 *
96 * THRESH (global input) REAL
97 * A test will count as "failed" if the "error", computed as
98 * described below, exceeds THRESH. Note that the error
99 * is scaled to be O(1), so THRESH should be a reasonably
100 * small multiple of 1, e.g., 10 or 100. In particular,
101 * it should not depend on the precision (single vs. double)
102 * or the size of the matrix. It must be at least zero.
103 *
104 * Q (local input) REAL array,
105 * global dimension (MS, NV), local dimension (LDQ, NQ)
106 *
107 * Contains the eigenvectors as computed by PSSTEIN
108 *
109 * IQ (global input) INTEGER
110 * Q's global row index, which points to the beginning of the
111 * submatrix which is to be operated on.
112 *
113 * JQ (global input) INTEGER
114 * Q's global column index, which points to the beginning of
115 * the submatrix which is to be operated on.
116 *
117 * DESCQ (global and local input) INTEGER array of dimension DLEN_.
118 * The array descriptor for the distributed matrix Q.
119 *
120 * C (local workspace) REAL array,
121 * global dimension (NV, NV), local dimension (DESCC(DLEN_), NQ)
122 *
123 * Accumulator for computing I - QT * Q
124 *
125 * IC (global input) INTEGER
126 * C's global row index, which points to the beginning of the
127 * submatrix which is to be operated on.
128 *
129 * JC (global input) INTEGER
130 * C's global column index, which points to the beginning of
131 * the submatrix which is to be operated on.
132 *
133 * DESCC (global and local input) INTEGER array of dimension DLEN_.
134 * The array descriptor for the distributed matrix C.
135 *
136 * W (input) REAL array, dimension (NV)
137 * All procesors have an identical copy of W()
138 *
139 * Contains the computed eigenvalues
140 *
141 * PROCDIST (global input) INTEGER array dimension (NPROW*NPCOL+1)
142 * Identifies which eigenvectors are the last to be computed
143 * by a given process
144 *
145 * ICLUSTR (global input) INTEGER array dimension (2*P)
146 * This input array contains indices of eigenvectors
147 * corresponding to a cluster of eigenvalues that could not be
148 * orthogonalized due to insufficient workspace.
149 * This should be the output of PSSTEIN.
150 *
151 * GAP (global input) REAL array, dimension (P)
152 * This input array contains the gap between eigenvalues whose
153 * eigenvectors could not be orthogonalized.
154 *
155 * WORK (local workspace) REAL array, dimension (LWORK)
156 *
157 * LWORK (local input) INTEGER
158 * The length of the array WORK.
159 * LWORK >= 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 )
160 * Where:
161 * NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW )
162 * MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL )
163 *
164 * QTQNRM (global output) REAL
165 * |QTQ -I| / EPS
166 *
167 * RES (global output) INTEGER
168 * 0 if the test passes i.e. |I - QT * Q| / (ulp * n) <= THRESH
169 * 1 if the test fails i.e. |I - QT * Q| / (ulp * n) > THRESH
170 *
171 *
172 * .. Parameters ..
173 *
174  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
175  $ MB_, NB_, RSRC_, CSRC_, LLD_
176  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
177  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
178  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
179  REAL ZERO, ONE, NEGONE
180  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0,
181  $ negone = -1.0e+0 )
182 * ..
183 * .. Intrinsic Functions ..
184 *
185  INTRINSIC max, real
186 * ..
187 * .. Local Scalars ..
188  INTEGER CLUSTER, FIRSTP, IMAX, IMIN, JMAX, JMIN, LWMIN,
189  $ MQ0, MYCOL, MYROW, NEXTP, NP0, NPCOL, NPROW
190  REAL NORM, QTQNRM2, ULP
191 * ..
192 * .. External Functions ..
193  INTEGER NUMROC
194  REAL PSLAMCH, PSLANGE
195  EXTERNAL numroc, pslamch, pslange
196 * ..
197 * .. External Subroutines ..
198  EXTERNAL blacs_gridinfo, chk1mat, psgemm, pslaset,
199  $ psmatadd, pxerbla
200 * ..
201 * .. Executable Statements ..
202 * This is just to keep ftnchek happy
203  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
204  $ rsrc_.LT.0 )RETURN
205 *
206 *
207  res = 0
208  ulp = pslamch( descc( ctxt_ ), 'P' )
209 *
210  CALL blacs_gridinfo( descc( ctxt_ ), nprow, npcol, myrow, mycol )
211  info = 0
212  CALL chk1mat( ms, 1, ms, 2, iq, jq, descq, 7, info )
213  CALL chk1mat( nv, 1, ms, 2, ic, jc, descc, 11, info )
214 *
215  IF( info.EQ.0 ) THEN
216  np0 = numroc( nv, descc( mb_ ), 0, 0, nprow )
217  mq0 = numroc( nv, descc( nb_ ), 0, 0, npcol )
218 *
219  lwmin = 2 + max( descc( mb_ ), 2 )*( 2*np0+mq0 )
220 *
221  IF( iq.NE.1 ) THEN
222  info = -5
223  ELSE IF( jq.NE.1 ) THEN
224  info = -6
225  ELSE IF( ic.NE.1 ) THEN
226  info = -9
227  ELSE IF( jc.NE.1 ) THEN
228  info = -10
229  ELSE IF( lwork.LT.lwmin ) THEN
230  info = -16
231  END IF
232  END IF
233 *
234  IF( info.NE.0 ) THEN
235  CALL pxerbla( descc( ctxt_ ), 'PSSEPQTQ', -info )
236  RETURN
237  END IF
238 *
239 * C = Identity matrix
240 *
241  CALL pslaset( 'A', nv, nv, zero, one, c, ic, jc, descc )
242 *
243 * C = C - QT * Q
244 *
245  IF( nv*ms.GT.0 ) THEN
246  CALL psgemm( 'Transpose', 'N', nv, nv, ms, negone, q, 1, 1,
247  $ descq, q, 1, 1, descq, one, c, 1, 1, descc )
248  END IF
249 *
250 * Allow for poorly orthogonalized eigenvectors for large clusters
251 *
252  norm = pslange( '1', nv, nv, c, 1, 1, descc, work )
253  qtqnrm = norm / ( real( max( ms, 1 ) )*ulp )
254 *
255  cluster = 1
256  10 CONTINUE
257  DO 20 firstp = 1, nprow*npcol
258  IF( procdist( firstp ).GE.iclustr( 2*( cluster-1 )+1 ) )
259  $ GO TO 30
260  20 CONTINUE
261  30 CONTINUE
262 *
263  imin = iclustr( 2*cluster-1 )
264  jmax = iclustr( 2*cluster )
265 *
266 *
267  IF( imin.EQ.0 )
268  $ GO TO 60
269 *
270  DO 40 nextp = firstp, nprow*npcol
271  imax = procdist( nextp )
272  jmin = imax + 1
273 *
274 *
275  CALL psmatadd( imax-imin+1, jmax-jmin+1, zero, c, imin, jmin,
276  $ descc, gap( cluster ) / 0.01e+0, c, imin, jmin,
277  $ descc )
278  CALL psmatadd( jmax-jmin+1, imax-imin+1, zero, c, jmin, imin,
279  $ descc, gap( cluster ) / 0.01e+0, c, jmin, imin,
280  $ descc )
281  imin = imax
282 *
283  IF( iclustr( 2*cluster ).LT.procdist( nextp+1 ) )
284  $ GO TO 50
285  40 CONTINUE
286  50 CONTINUE
287 *
288  cluster = cluster + 1
289  GO TO 10
290  60 CONTINUE
291 *
292 * Compute the norm of C
293 *
294  norm = pslange( '1', nv, nv, c, 1, 1, descc, work )
295 *
296  qtqnrm2 = norm / ( real( max( ms, 1 ) )*ulp )
297 *
298  IF( qtqnrm2.GT.thresh ) THEN
299  res = 1
300  qtqnrm = qtqnrm2
301  END IF
302  RETURN
303 *
304 * End of PSSEPQTQ
305 *
306  END
max
#define max(A, B)
Definition: pcgemr.c:180
psmatadd
subroutine psmatadd(M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC)
Definition: psmatadd.f:3
pssepqtq
subroutine pssepqtq(MS, NV, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC, DESCC, PROCDIST, ICLUSTR, GAP, WORK, LWORK, QTQNRM, INFO, RES)
Definition: pssepqtq.f:6
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pslaset
subroutine pslaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: psblastst.f:6863