ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcdtinfo.f
Go to the documentation of this file.
1  SUBROUTINE pcdtinfo( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW,
2  $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL,
3  $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL,
4  $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH,
5  $ WORK, IAM, NPROCS )
6 *
7 *
8 *
9 * -- ScaLAPACK routine (version 1.7) --
10 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
11 * and University of California, Berkeley.
12 * November 15, 1997
13 *
14 * .. Scalar Arguments ..
15  CHARACTER TRANS
16  CHARACTER*(*) SUMMRY
17  INTEGER IAM,
18  $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL,
19  $ ldpval, ldqval, ngrids, nmat, nnb, nnbr, nbw,
20  $ nprocs, nnr, nout
21  REAL THRESH
22 * ..
23 * .. Array Arguments ..
24  INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ),
25  $ nrval( ldnrval ), nval( ldnval ),
26  $ bwlval( ldbwval),bwuval( ldbwval),
27  $ pval( ldpval ), qval(ldqval), work( * )
28 * ..
29 *
30 * Purpose
31 * =======
32 *
33 * PCDTINFO get needed startup information for band factorization
34 * and transmits it to all processes.
35 *
36 * Arguments
37 * =========
38 *
39 * SUMMRY (global output) CHARACTER*(*)
40 * Name of output (summary) file (if any). Only defined for
41 * process 0.
42 *
43 * NOUT (global output) INTEGER
44 * The unit number for output file. NOUT = 6, ouput to screen,
45 * NOUT = 0, output to stderr. Only defined for process 0.
46 *
47 *
48 * NMAT (global output) INTEGER
49 * The number of different values that can be used for N.
50 *
51 * NVAL (global output) INTEGER array, dimension (LDNVAL)
52 * The values of N (number of columns in matrix) to run the
53 * code with.
54 *
55 * NBW (global output) INTEGER
56 * The number of different values that can be used for @bw@.
57 * BWLVAL (global output) INTEGER array, dimension (LDNVAL)
58 * The values of BWL (number of subdiagonals in matrix) to run
59 * the code with.
60 * BWUVAL (global output) INTEGER array, dimension (LDNVAL)
61 * The values of BW (number of supdiagonals in matrix) to run
62 * the code with.
63 *
64 * LDNVAL (global input) INTEGER
65 * The maximum number of different values that can be used for
66 * N, LDNVAL > = NMAT.
67 *
68 * NNB (global output) INTEGER
69 * The number of different values that can be used for NB.
70 *
71 * NBVAL (global output) INTEGER array, dimension (LDNBVAL)
72 * The values of NB (blocksize) to run the code with.
73 *
74 * LDNBVAL (global input) INTEGER
75 * The maximum number of different values that can be used for
76 * NB, LDNBVAL >= NNB.
77 *
78 * NNR (global output) INTEGER
79 * The number of different values that can be used for NRHS.
80 *
81 * NRVAL (global output) INTEGER array, dimension(LDNRVAL)
82 * The values of NRHS (# of Right Hand Sides) to run the code
83 * with.
84 *
85 * LDNRVAL (global input) INTEGER
86 * The maximum number of different values that can be used for
87 * NRHS, LDNRVAL >= NNR.
88 *
89 * NNBR (global output) INTEGER
90 * The number of different values that can be used for NBRHS.
91 *
92 * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL)
93 * The values of NBRHS (RHS blocksize) to run the code with.
94 *
95 * LDNBRVAL (global input) INTEGER
96 * The maximum number of different values that can be used for
97 * NBRHS, LDNBRVAL >= NBRVAL.
98 *
99 * NGRIDS (global output) INTEGER
100 * The number of different values that can be used for P & Q.
101 *
102 * PVAL (global output) INTEGER array, dimension (LDPVAL)
103 * Not used (will be returned as all 1s) since proc grid is 1D
104 *
105 * LDPVAL (global input) INTEGER
106 * The maximum number of different values that can be used for
107 * P, LDPVAL >= NGRIDS.
108 *
109 * QVAL (global output) INTEGER array, dimension (LDQVAL)
110 * The values of Q (number of process columns) to run the code
111 * with.
112 *
113 * LDQVAL (global input) INTEGER
114 * The maximum number of different values that can be used for
115 * Q, LDQVAL >= NGRIDS.
116 *
117 * THRESH (global output) REAL
118 * Indicates what error checks shall be run and printed out:
119 * = 0 : Perform no error checking
120 * > 0 : report all residuals greater than THRESH, perform
121 * factor check only if solve check fails
122 *
123 * WORK (local workspace) INTEGER array of dimension >=
124 * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL
125 * $ +3*LDNVAL)
126 * Used to pack input arrays in order to send info in one
127 * message.
128 *
129 * IAM (local input) INTEGER
130 * My process number.
131 *
132 * NPROCS (global input) INTEGER
133 * The total number of processes.
134 *
135 * ======================================================================
136 *
137 * Note: For packing the information we assumed that the length in bytes
138 * ===== of an integer is equal to the length in bytes of a real single
139 * precision.
140 *
141 * =====================================================================
142 *
143 * Code Developer: Andrew J. Cleary, University of Tennessee.
144 * Current address: Lawrence Livermore National Labs.
145 * This version released: August, 2001.
146 *
147 * ======================================================================
148 *
149 * .. Parameters ..
150  INTEGER NIN
151  PARAMETER ( NIN = 11 )
152 * ..
153 * .. Local Scalars ..
154  INTEGER I, ICTXT
155  CHARACTER*79 USRINFO
156  REAL EPS
157 * ..
158 * .. External Subroutines ..
159  EXTERNAL blacs_abort, blacs_get, blacs_gridexit,
160  $ blacs_gridinit, blacs_setup, icopy, igebr2d,
161  $ igebs2d, sgebr2d, sgebs2d
162 * ..
163 * .. External Functions ..
164  LOGICAL LSAME
165  REAL PSLAMCH
166  EXTERNAL LSAME, PSLAMCH
167 * ..
168 * .. Intrinsic Functions ..
169  INTRINSIC max, min
170 * ..
171 * .. Executable Statements ..
172 *
173 * Process 0 reads the input data, broadcasts to other processes and
174 * writes needed information to NOUT
175 *
176  IF( iam.EQ.0 ) THEN
177 *
178 * Open file and skip data file header
179 *
180  OPEN( nin, file = 'BLU.dat', status = 'OLD' )
181  READ( nin, fmt = * ) summry
182  summry = ' '
183 *
184 * Read in user-supplied info about machine type, compiler, etc.
185 *
186  READ( nin, fmt = 9999 ) usrinfo
187 *
188 * Read name and unit number for summary output file
189 *
190  READ( nin, fmt = * ) summry
191  READ( nin, fmt = * ) nout
192  IF( nout.NE.0 .AND. nout.NE.6 )
193  $ OPEN( nout, file = summry, status = 'UNKNOWN' )
194 *
195 * Read and check the parameter values for the tests.
196 *
197 * Get TRANS
198 *
199  READ( nin, fmt = * ) trans
200 *
201 *
202 * Get number of matrices and their dimensions
203 *
204  READ( nin, fmt = * ) nmat
205  IF( nmat.LT.1 .OR. nmat.GT.ldnval ) THEN
206  WRITE( nout, fmt = 9994 ) 'N', ldnval
207  GO TO 20
208  END IF
209  READ( nin, fmt = * ) ( nval( i ), i = 1, nmat )
210 *
211 * Get bandwidths
212 *
213  READ( nin, fmt = * ) nbw
214  nbw = 1
215  IF( nbw.LT.1 .OR. nbw.GT.ldbwval ) THEN
216  WRITE( nout, fmt = 9994 ) 'BW', ldbwval
217  GO TO 20
218  END IF
219  READ( nin, fmt = * ) ( bwlval( i ), i = 1, nbw )
220  READ( nin, fmt = * ) ( bwuval( i ), i = 1, nbw )
221 *
222 * Get values of NB
223 *
224  READ( nin, fmt = * ) nnb
225  IF( nnb.LT.1 .OR. nnb.GT.ldnbval ) THEN
226  WRITE( nout, fmt = 9994 ) 'NB', ldnbval
227  GO TO 20
228  END IF
229  READ( nin, fmt = * ) ( nbval( i ), i = 1, nnb )
230 *
231 * Get values of NRHS
232 *
233  READ( nin, fmt = * ) nnr
234  IF( nnr.LT.1 .OR. nnr.GT.ldnrval ) THEN
235  WRITE( nout, fmt = 9994 ) 'NRHS', ldnrval
236  GO TO 20
237  END IF
238  READ( nin, fmt = * ) ( nrval( i ), i = 1, nnr )
239 *
240 * Get values of NBRHS
241 *
242  READ( nin, fmt = * ) nnbr
243  IF( nnbr.LT.1 .OR. nnbr.GT.ldnbrval ) THEN
244  WRITE( nout, fmt = 9994 ) 'NBRHS', ldnbrval
245  GO TO 20
246  END IF
247  READ( nin, fmt = * ) ( nbrval( i ), i = 1, nnbr )
248 *
249 * Get number of grids
250 *
251  READ( nin, fmt = * ) ngrids
252  IF( ngrids.LT.1 .OR. ngrids.GT.ldpval ) THEN
253  WRITE( nout, fmt = 9994 ) 'Grids', ldpval
254  GO TO 20
255  ELSE IF( ngrids.GT.ldqval ) THEN
256  WRITE( nout, fmt = 9994 ) 'Grids', ldqval
257  GO TO 20
258  END IF
259 *
260 * Processor grid must be 1D so set PVAL to 1
261  DO 8738 i = 1, ngrids
262  pval( i ) = 1
263  8738 CONTINUE
264 *
265 * Get values of Q
266 *
267  READ( nin, fmt = * ) ( qval( i ), i = 1, ngrids )
268 *
269 * Get level of checking
270 *
271  READ( nin, fmt = * ) thresh
272 *
273 * Close input file
274 *
275  CLOSE( nin )
276 *
277 * For pvm only: if virtual machine not set up, allocate it and
278 * spawn the correct number of processes.
279 *
280  IF( nprocs.LT.1 ) THEN
281  nprocs = 0
282  DO 10 i = 1, ngrids
283  nprocs = max( nprocs, pval( i )*qval( i ) )
284  10 CONTINUE
285  CALL blacs_setup( iam, nprocs )
286  END IF
287 *
288 * Temporarily define blacs grid to include all processes so
289 * information can be broadcast to all processes.
290 *
291  CALL blacs_get( -1, 0, ictxt )
292  CALL blacs_gridinit( ictxt, 'Row-major', 1, nprocs )
293 *
294 * Compute machine epsilon
295 *
296  eps = pslamch( ictxt, 'eps' )
297 *
298 * Pack information arrays and broadcast
299 *
300  CALL sgebs2d( ictxt, 'All', ' ', 1, 1, thresh, 1 )
301  i = 1
302  work( i ) = nmat
303  i = i+1
304  work( i ) = nbw
305  i = i+1
306  work( i ) = nnb
307  i = i+1
308  work( i ) = nnr
309  i = i+1
310  work( i ) = nnbr
311  i = i+1
312  work( i ) = ngrids
313  i = i+1
314  IF( lsame( trans, 'N' ) ) THEN
315  work( i ) = 1
316  ELSE
317  trans = 'C'
318  work( i ) = 2
319  END IF
320  i = i+1
321 * Send number of elements to be sent
322  CALL igebs2d( ictxt, 'All', ' ', 1, 1, i-1, 1 )
323 * Send elements
324  CALL igebs2d( ictxt, 'All', ' ', i-1, 1, work, i-1 )
325 *
326  i = 1
327  CALL icopy( nmat, nval, 1, work( i ), 1 )
328  i = i + nmat
329  CALL icopy( nbw, bwlval, 1, work( i ), 1 )
330  i = i + nbw
331  CALL icopy( nbw, bwuval, 1, work( i ), 1 )
332  i = i + nbw
333  CALL icopy( nnb, nbval, 1, work( i ), 1 )
334  i = i + nnb
335  CALL icopy( nnr, nrval, 1, work( i ), 1 )
336  i = i + nnr
337  CALL icopy( nnbr, nbrval, 1, work( i ), 1 )
338  i = i + nnbr
339  CALL icopy( ngrids, pval, 1, work( i ), 1 )
340  i = i + ngrids
341  CALL icopy( ngrids, qval, 1, work( i ), 1 )
342  i = i + ngrids
343  CALL igebs2d( ictxt, 'All', ' ', i-1, 1, work, i-1 )
344 *
345 * regurgitate input
346 *
347  WRITE( nout, fmt = 9999 )
348  $ 'SCALAPACK banded linear systems.'
349  WRITE( nout, fmt = 9999 ) usrinfo
350  WRITE( nout, fmt = * )
351  WRITE( nout, fmt = 9999 )
352  $ 'Tests of the parallel '//
353  $ 'complex single precision band matrix solve '
354  WRITE( nout, fmt = 9999 )
355  $ 'The following scaled residual '//
356  $ 'checks will be computed:'
357  WRITE( nout, fmt = 9999 )
358  $ ' Solve residual = ||Ax - b|| / '//
359  $ '(||x|| * ||A|| * eps * N)'
360  WRITE( nout, fmt = 9999 )
361  $ ' Factorization residual = ||A - LU|| /'//
362  $ ' (||A|| * eps * N)'
363  WRITE( nout, fmt = 9999 )
364  $ 'The matrix A is randomly '//
365  $ 'generated for each test.'
366  WRITE( nout, fmt = * )
367  WRITE( nout, fmt = 9999 )
368  $ 'An explanation of the input/output '//
369  $ 'parameters follows:'
370  WRITE( nout, fmt = 9999 )
371  $ 'TIME : Indicates whether WALL or '//
372  $ 'CPU time was used.'
373 *
374  WRITE( nout, fmt = 9999 )
375  $ 'N : The number of rows and columns '//
376  $ 'in the matrix A.'
377  WRITE( nout, fmt = 9999 )
378  $ 'bwl, bwu : The number of diagonals '//
379  $ 'in the matrix A.'
380  WRITE( nout, fmt = 9999 )
381  $ 'NB : The size of the column panels the'//
382  $ ' matrix A is split into. [-1 for default]'
383  WRITE( nout, fmt = 9999 )
384  $ 'NRHS : The total number of RHS to solve'//
385  $ ' for.'
386  WRITE( nout, fmt = 9999 )
387  $ 'NBRHS : The number of RHS to be put on '//
388  $ 'a column of processes before going'
389  WRITE( nout, fmt = 9999 )
390  $ ' on to the next column of processes.'
391  WRITE( nout, fmt = 9999 )
392  $ 'P : The number of process rows.'
393  WRITE( nout, fmt = 9999 )
394  $ 'Q : The number of process columns.'
395  WRITE( nout, fmt = 9999 )
396  $ 'THRESH : If a residual value is less than'//
397  $ ' THRESH, CHECK is flagged as PASSED'
398  WRITE( nout, fmt = 9999 )
399  $ 'Fact time: Time in seconds to factor the'//
400  $ ' matrix'
401  WRITE( nout, fmt = 9999 )
402  $ 'Sol Time: Time in seconds to solve the'//
403  $ ' system.'
404  WRITE( nout, fmt = 9999 )
405  $ 'MFLOPS : Rate of execution for factor '//
406  $ 'and solve using sequential operation count.'
407  WRITE( nout, fmt = 9999 )
408  $ 'MFLOP2 : Rough estimate of speed '//
409  $ 'using actual op count (accurate big P,N).'
410  WRITE( nout, fmt = * )
411  WRITE( nout, fmt = 9999 )
412  $ 'The following parameter values will be used:'
413  WRITE( nout, fmt = 9996 )
414  $ 'N ', ( nval(i), i = 1, min(nmat, 10) )
415  IF( nmat.GT.10 )
416  $ WRITE( nout, fmt = 9997 ) ( nval(i), i = 11, nmat )
417  WRITE( nout, fmt = 9996 )
418  $ 'bwl ', ( bwlval(i), i = 1, min(nbw, 10) )
419  IF( nbw.GT.10 )
420  $ WRITE( nout, fmt = 9997 ) ( bwlval(i), i = 11, nbw )
421  WRITE( nout, fmt = 9996 )
422  $ 'bwu ', ( bwuval(i), i = 1, min(nbw, 10) )
423  IF( nbw.GT.10 )
424  $ WRITE( nout, fmt = 9997 ) ( bwuval(i), i = 11, nbw )
425  WRITE( nout, fmt = 9996 )
426  $ 'NB ', ( nbval(i), i = 1, min(nnb, 10) )
427  IF( nnb.GT.10 )
428  $ WRITE( nout, fmt = 9997 ) ( nbval(i), i = 11, nnb )
429  WRITE( nout, fmt = 9996 )
430  $ 'NRHS ', ( nrval(i), i = 1, min(nnr, 10) )
431  IF( nnr.GT.10 )
432  $ WRITE( nout, fmt = 9997 ) ( nrval(i), i = 11, nnr )
433  WRITE( nout, fmt = 9996 )
434  $ 'NBRHS', ( nbrval(i), i = 1, min(nnbr, 10) )
435  IF( nnbr.GT.10 )
436  $ WRITE( nout, fmt = 9997 ) ( nbrval(i), i = 11, nnbr )
437  WRITE( nout, fmt = 9996 )
438  $ 'P ', ( pval(i), i = 1, min(ngrids, 10) )
439  IF( ngrids.GT.10 )
440  $ WRITE( nout, fmt = 9997) ( pval(i), i = 11, ngrids )
441  WRITE( nout, fmt = 9996 )
442  $ 'Q ', ( qval(i), i = 1, min(ngrids, 10) )
443  IF( ngrids.GT.10 )
444  $ WRITE( nout, fmt = 9997 ) ( qval(i), i = 11, ngrids )
445  WRITE( nout, fmt = * )
446  WRITE( nout, fmt = 9995 ) eps
447  WRITE( nout, fmt = 9998 ) thresh
448 *
449  ELSE
450 *
451 * If in pvm, must participate setting up virtual machine
452 *
453  IF( nprocs.LT.1 )
454  $ CALL blacs_setup( iam, nprocs )
455 *
456 * Temporarily define blacs grid to include all processes so
457 * all processes have needed startup information
458 *
459  CALL blacs_get( -1, 0, ictxt )
460  CALL blacs_gridinit( ictxt, 'Row-major', 1, nprocs )
461 *
462 * Compute machine epsilon
463 *
464  eps = pslamch( ictxt, 'eps' )
465 *
466  CALL sgebr2d( ictxt, 'All', ' ', 1, 1, thresh, 1, 0, 0 )
467  CALL igebr2d( ictxt, 'All', ' ', 1, 1, i, 1, 0, 0 )
468  CALL igebr2d( ictxt, 'All', ' ', i, 1, work, i, 0, 0 )
469  i = 1
470  nmat = work( i )
471  i = i+1
472  nbw = work( i )
473  i = i+1
474  nnb = work( i )
475  i = i+1
476  nnr = work( i )
477  i = i+1
478  nnbr = work( i )
479  i = i+1
480  ngrids = work( i )
481  i = i+1
482  IF( work( i ) .EQ. 1 ) THEN
483  trans = 'N'
484  ELSE
485  trans = 'C'
486  END IF
487  i = i+1
488 *
489  i = nmat + nbw + nnb + nnr + nnbr + 2*ngrids
490  i = i + nbw
491 *
492  CALL igebr2d( ictxt, 'All', ' ', 1, i, work, 1, 0, 0 )
493  i = 1
494  CALL icopy( nmat, work( i ), 1, nval, 1 )
495  i = i + nmat
496  CALL icopy( nbw, work( i ), 1, bwlval, 1 )
497  i = i + nbw
498  CALL icopy( nbw, work( i ), 1, bwuval, 1 )
499  i = i + nbw
500  CALL icopy( nnb, work( i ), 1, nbval, 1 )
501  i = i + nnb
502  CALL icopy( nnr, work( i ), 1, nrval, 1 )
503  i = i + nnr
504  CALL icopy( nnbr, work( i ), 1, nbrval, 1 )
505  i = i + nnbr
506  CALL icopy( ngrids, work( i ), 1, pval, 1 )
507  i = i + ngrids
508  CALL icopy( ngrids, work( i ), 1, qval, 1 )
509 *
510  END IF
511 *
512  CALL blacs_gridexit( ictxt )
513 *
514  RETURN
515 *
516  20 WRITE( nout, fmt = 9993 )
517  CLOSE( nin )
518  IF( nout.NE.6 .AND. nout.NE.0 )
519  $ CLOSE( nout )
520 *
521  CALL blacs_abort( ictxt, 1 )
522  stop
523 *
524  9999 FORMAT( a )
525  9998 FORMAT( 'Routines pass computational tests if scaled residual ',
526  $ 'is less than ', g12.5 )
527  9997 FORMAT( ' ', 10i6 )
528  9996 FORMAT( 2x, a5, ': ', 10i6 )
529  9995 FORMAT( 'Relative machine precision (eps) is taken to be ',
530  $ e18.6 )
531  9994 FORMAT( ' Number of values of ',5a, ' is less than 1 or greater ',
532  $ 'than ', i2 )
533  9993 FORMAT( ' Illegal input in file ',40a,'. Aborting run.' )
534 *
535 * End of PCDTINFO
536 *
537  END
max
#define max(A, B)
Definition: pcgemr.c:180
pcdtinfo
subroutine pcdtinfo(SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pcdtinfo.f:6
icopy
subroutine icopy(N, SX, INCX, SY, INCY)
Definition: pblastst.f:1525
min
#define min(A, B)
Definition: pcgemr.c:181