ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdgbinfo.f
Go to the documentation of this file.
1  SUBROUTINE pdgbinfo( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW,
2  $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL,
3  $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL,
4  $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH,
5  $ WORK, IAM, NPROCS )
6 *
7 *
8 *
9 * -- ScaLAPACK routine (version 1.7) --
10 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
11 * and University of California, Berkeley.
12 * November 15, 1997
13 *
14 * .. Scalar Arguments ..
15  CHARACTER TRANS
16  CHARACTER*(*) SUMMRY
17  INTEGER IAM,
18  $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL,
19  $ ldpval, ldqval, ngrids, nmat, nnb, nnbr, nbw,
20  $ nprocs, nnr, nout
21  REAL THRESH
22 * ..
23 * .. Array Arguments ..
24  INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ),
25  $ nrval( ldnrval ), nval( ldnval ),
26  $ bwlval( ldbwval),bwuval( ldbwval),
27  $ pval( ldpval ), qval(ldqval), work( * )
28 * ..
29 *
30 * Purpose
31 * =======
32 *
33 * PDGBINFO get needed startup information for band factorization
34 * and transmits it to all processes.
35 *
36 * Arguments
37 * =========
38 *
39 * SUMMRY (global output) CHARACTER*(*)
40 * Name of output (summary) file (if any). Only defined for
41 * process 0.
42 *
43 * NOUT (global output) INTEGER
44 * The unit number for output file. NOUT = 6, ouput to screen,
45 * NOUT = 0, output to stderr. Only defined for process 0.
46 *
47 *
48 * NMAT (global output) INTEGER
49 * The number of different values that can be used for N.
50 *
51 * NVAL (global output) INTEGER array, dimension (LDNVAL)
52 * The values of N (number of columns in matrix) to run the
53 * code with.
54 *
55 * NBW (global output) INTEGER
56 * The number of different values that can be used for @bw@.
57 * BWLVAL (global output) INTEGER array, dimension (LDNVAL)
58 * The values of BWL (number of subdiagonals in matrix) to run
59 * the code with.
60 * BWUVAL (global output) INTEGER array, dimension (LDNVAL)
61 * The values of BW (number of supdiagonals in matrix) to run
62 * the code with.
63 *
64 * LDNVAL (global input) INTEGER
65 * The maximum number of different values that can be used for
66 * N, LDNVAL > = NMAT.
67 *
68 * NNB (global output) INTEGER
69 * The number of different values that can be used for NB.
70 *
71 * NBVAL (global output) INTEGER array, dimension (LDNBVAL)
72 * The values of NB (blocksize) to run the code with.
73 *
74 * LDNBVAL (global input) INTEGER
75 * The maximum number of different values that can be used for
76 * NB, LDNBVAL >= NNB.
77 *
78 * NNR (global output) INTEGER
79 * The number of different values that can be used for NRHS.
80 *
81 * NRVAL (global output) INTEGER array, dimension(LDNRVAL)
82 * The values of NRHS (# of Right Hand Sides) to run the code
83 * with.
84 *
85 * LDNRVAL (global input) INTEGER
86 * The maximum number of different values that can be used for
87 * NRHS, LDNRVAL >= NNR.
88 *
89 * NNBR (global output) INTEGER
90 * The number of different values that can be used for NBRHS.
91 *
92 * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL)
93 * The values of NBRHS (RHS blocksize) to run the code with.
94 *
95 * LDNBRVAL (global input) INTEGER
96 * The maximum number of different values that can be used for
97 * NBRHS, LDNBRVAL >= NBRVAL.
98 *
99 * NGRIDS (global output) INTEGER
100 * The number of different values that can be used for P & Q.
101 *
102 * PVAL (global output) INTEGER array, dimension (LDPVAL)
103 * Not used (will be returned as all 1s) since proc grid is 1D
104 *
105 * LDPVAL (global input) INTEGER
106 * The maximum number of different values that can be used for
107 * P, LDPVAL >= NGRIDS.
108 *
109 * QVAL (global output) INTEGER array, dimension (LDQVAL)
110 * The values of Q (number of process columns) to run the code
111 * with.
112 *
113 * LDQVAL (global input) INTEGER
114 * The maximum number of different values that can be used for
115 * Q, LDQVAL >= NGRIDS.
116 *
117 * THRESH (global output) REAL
118 * Indicates what error checks shall be run and printed out:
119 * = 0 : Perform no error checking
120 * > 0 : report all residuals greater than THRESH, perform
121 * factor check only if solve check fails
122 *
123 * WORK (local workspace) INTEGER array of dimension >=
124 * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL
125 * $ +3*LDNVAL)
126 * Used to pack input arrays in order to send info in one
127 * message.
128 *
129 * IAM (local input) INTEGER
130 * My process number.
131 *
132 * NPROCS (global input) INTEGER
133 * The total number of processes.
134 *
135 * ======================================================================
136 *
137 * Note: For packing the information we assumed that the length in bytes
138 * ===== of an integer is equal to the length in bytes of a real single
139 * precision.
140 *
141 * =====================================================================
142 *
143 * Code Developer: Andrew J. Cleary, University of Tennessee.
144 * Current address: Lawrence Livermore National Labs.
145 * This version released: August, 2001.
146 *
147 * ======================================================================
148 *
149 * .. Parameters ..
150  INTEGER NIN
151  PARAMETER ( NIN = 11 )
152 * ..
153 * .. Local Scalars ..
154  INTEGER I, ICTXT
155  CHARACTER*79 USRINFO
156  DOUBLE PRECISION EPS
157 * ..
158 * .. External Subroutines ..
159  EXTERNAL blacs_abort, blacs_get, blacs_gridexit,
160  $ blacs_gridinit, blacs_setup, icopy, igebr2d,
161  $ igebs2d, sgebr2d, sgebs2d
162 * ..
163 * .. External Functions ..
164  LOGICAL LSAME
165  DOUBLE PRECISION PDLAMCH
166  EXTERNAL LSAME, PDLAMCH
167 * ..
168 * .. Intrinsic Functions ..
169  INTRINSIC max, min
170 * ..
171 * .. Executable Statements ..
172 *
173 * Process 0 reads the input data, broadcasts to other processes and
174 * writes needed information to NOUT
175 *
176  IF( iam.EQ.0 ) THEN
177 *
178 * Open file and skip data file header
179 *
180  OPEN( nin, file = 'BLU.dat', status = 'OLD' )
181  READ( nin, fmt = * ) summry
182  summry = ' '
183 *
184 * Read in user-supplied info about machine type, compiler, etc.
185 *
186  READ( nin, fmt = 9999 ) usrinfo
187 *
188 * Read name and unit number for summary output file
189 *
190  READ( nin, fmt = * ) summry
191  READ( nin, fmt = * ) nout
192  IF( nout.NE.0 .AND. nout.NE.6 )
193  $ OPEN( nout, file = summry, status = 'UNKNOWN' )
194 *
195 * Read and check the parameter values for the tests.
196 *
197 * Get TRANS
198 *
199  READ( nin, fmt = * ) trans
200 *
201 *
202 * Get number of matrices and their dimensions
203 *
204  READ( nin, fmt = * ) nmat
205  IF( nmat.LT.1 .OR. nmat.GT.ldnval ) THEN
206  WRITE( nout, fmt = 9994 ) 'N', ldnval
207  GO TO 20
208  END IF
209  READ( nin, fmt = * ) ( nval( i ), i = 1, nmat )
210 *
211 * Get bandwidths
212 *
213  READ( nin, fmt = * ) nbw
214  IF( nbw.LT.1 .OR. nbw.GT.ldbwval ) THEN
215  WRITE( nout, fmt = 9994 ) 'BW', ldbwval
216  GO TO 20
217  END IF
218  READ( nin, fmt = * ) ( bwlval( i ), i = 1, nbw )
219  READ( nin, fmt = * ) ( bwuval( i ), i = 1, nbw )
220 *
221 * Get values of NB
222 *
223  READ( nin, fmt = * ) nnb
224  IF( nnb.LT.1 .OR. nnb.GT.ldnbval ) THEN
225  WRITE( nout, fmt = 9994 ) 'NB', ldnbval
226  GO TO 20
227  END IF
228  READ( nin, fmt = * ) ( nbval( i ), i = 1, nnb )
229 *
230 * Get values of NRHS
231 *
232  READ( nin, fmt = * ) nnr
233  IF( nnr.LT.1 .OR. nnr.GT.ldnrval ) THEN
234  WRITE( nout, fmt = 9994 ) 'NRHS', ldnrval
235  GO TO 20
236  END IF
237  READ( nin, fmt = * ) ( nrval( i ), i = 1, nnr )
238 *
239 * Get values of NBRHS
240 *
241  READ( nin, fmt = * ) nnbr
242  IF( nnbr.LT.1 .OR. nnbr.GT.ldnbrval ) THEN
243  WRITE( nout, fmt = 9994 ) 'NBRHS', ldnbrval
244  GO TO 20
245  END IF
246  READ( nin, fmt = * ) ( nbrval( i ), i = 1, nnbr )
247 *
248 * Get number of grids
249 *
250  READ( nin, fmt = * ) ngrids
251  IF( ngrids.LT.1 .OR. ngrids.GT.ldpval ) THEN
252  WRITE( nout, fmt = 9994 ) 'Grids', ldpval
253  GO TO 20
254  ELSE IF( ngrids.GT.ldqval ) THEN
255  WRITE( nout, fmt = 9994 ) 'Grids', ldqval
256  GO TO 20
257  END IF
258 *
259 * Processor grid must be 1D so set PVAL to 1
260  DO 8738 i = 1, ngrids
261  pval( i ) = 1
262  8738 CONTINUE
263 *
264 * Get values of Q
265 *
266  READ( nin, fmt = * ) ( qval( i ), i = 1, ngrids )
267 *
268 * Get level of checking
269 *
270  READ( nin, fmt = * ) thresh
271 *
272 * Close input file
273 *
274  CLOSE( nin )
275 *
276 * For pvm only: if virtual machine not set up, allocate it and
277 * spawn the correct number of processes.
278 *
279  IF( nprocs.LT.1 ) THEN
280  nprocs = 0
281  DO 10 i = 1, ngrids
282  nprocs = max( nprocs, pval( i )*qval( i ) )
283  10 CONTINUE
284  CALL blacs_setup( iam, nprocs )
285  END IF
286 *
287 * Temporarily define blacs grid to include all processes so
288 * information can be broadcast to all processes.
289 *
290  CALL blacs_get( -1, 0, ictxt )
291  CALL blacs_gridinit( ictxt, 'Row-major', 1, nprocs )
292 *
293 * Compute machine epsilon
294 *
295  eps = pdlamch( ictxt, 'eps' )
296 *
297 * Pack information arrays and broadcast
298 *
299  CALL sgebs2d( ictxt, 'All', ' ', 1, 1, thresh, 1 )
300  i = 1
301  work( i ) = nmat
302  i = i+1
303  work( i ) = nbw
304  i = i+1
305  work( i ) = nnb
306  i = i+1
307  work( i ) = nnr
308  i = i+1
309  work( i ) = nnbr
310  i = i+1
311  work( i ) = ngrids
312  i = i+1
313  IF( lsame( trans, 'N' ) ) THEN
314  work( i ) = 1
315  ELSE
316  trans = 'T'
317  work( i ) = 2
318  END IF
319  i = i+1
320 * Send number of elements to be sent
321  CALL igebs2d( ictxt, 'All', ' ', 1, 1, i-1, 1 )
322 * Send elements
323  CALL igebs2d( ictxt, 'All', ' ', i-1, 1, work, i-1 )
324 *
325  i = 1
326  CALL icopy( nmat, nval, 1, work( i ), 1 )
327  i = i + nmat
328  CALL icopy( nbw, bwlval, 1, work( i ), 1 )
329  i = i + nbw
330  CALL icopy( nbw, bwuval, 1, work( i ), 1 )
331  i = i + nbw
332  CALL icopy( nnb, nbval, 1, work( i ), 1 )
333  i = i + nnb
334  CALL icopy( nnr, nrval, 1, work( i ), 1 )
335  i = i + nnr
336  CALL icopy( nnbr, nbrval, 1, work( i ), 1 )
337  i = i + nnbr
338  CALL icopy( ngrids, pval, 1, work( i ), 1 )
339  i = i + ngrids
340  CALL icopy( ngrids, qval, 1, work( i ), 1 )
341  i = i + ngrids
342  CALL igebs2d( ictxt, 'All', ' ', i-1, 1, work, i-1 )
343 *
344 * regurgitate input
345 *
346  WRITE( nout, fmt = 9999 )
347  $ 'SCALAPACK banded linear systems.'
348  WRITE( nout, fmt = 9999 ) usrinfo
349  WRITE( nout, fmt = * )
350  WRITE( nout, fmt = 9999 )
351  $ 'Tests of the parallel '//
352  $ 'real double precision band matrix solve '
353  WRITE( nout, fmt = 9999 )
354  $ 'The following scaled residual '//
355  $ 'checks will be computed:'
356  WRITE( nout, fmt = 9999 )
357  $ ' Solve residual = ||Ax - b|| / '//
358  $ '(||x|| * ||A|| * eps * N)'
359  WRITE( nout, fmt = 9999 )
360  $ ' Factorization residual = ||A - LU|| /'//
361  $ ' (||A|| * eps * N)'
362  WRITE( nout, fmt = 9999 )
363  $ 'The matrix A is randomly '//
364  $ 'generated for each test.'
365  WRITE( nout, fmt = * )
366  WRITE( nout, fmt = 9999 )
367  $ 'An explanation of the input/output '//
368  $ 'parameters follows:'
369  WRITE( nout, fmt = 9999 )
370  $ 'TIME : Indicates whether WALL or '//
371  $ 'CPU time was used.'
372 *
373  WRITE( nout, fmt = 9999 )
374  $ 'N : The number of rows and columns '//
375  $ 'in the matrix A.'
376  WRITE( nout, fmt = 9999 )
377  $ 'bwl, bwu : The number of diagonals '//
378  $ 'in the matrix A.'
379  WRITE( nout, fmt = 9999 )
380  $ 'NB : The size of the column panels the'//
381  $ ' matrix A is split into. [-1 for default]'
382  WRITE( nout, fmt = 9999 )
383  $ 'NRHS : The total number of RHS to solve'//
384  $ ' for.'
385  WRITE( nout, fmt = 9999 )
386  $ 'NBRHS : The number of RHS to be put on '//
387  $ 'a column of processes before going'
388  WRITE( nout, fmt = 9999 )
389  $ ' on to the next column of processes.'
390  WRITE( nout, fmt = 9999 )
391  $ 'P : The number of process rows.'
392  WRITE( nout, fmt = 9999 )
393  $ 'Q : The number of process columns.'
394  WRITE( nout, fmt = 9999 )
395  $ 'THRESH : If a residual value is less than'//
396  $ ' THRESH, CHECK is flagged as PASSED'
397  WRITE( nout, fmt = 9999 )
398  $ 'Fact time: Time in seconds to factor the'//
399  $ ' matrix'
400  WRITE( nout, fmt = 9999 )
401  $ 'Sol Time: Time in seconds to solve the'//
402  $ ' system.'
403  WRITE( nout, fmt = 9999 )
404  $ 'MFLOPS : Rate of execution for factor '//
405  $ 'and solve using sequential operation count.'
406  WRITE( nout, fmt = 9999 )
407  $ 'MFLOP2 : Rough estimate of speed '//
408  $ 'using actual op count (accurate big P,N).'
409  WRITE( nout, fmt = * )
410  WRITE( nout, fmt = 9999 )
411  $ 'The following parameter values will be used:'
412  WRITE( nout, fmt = 9996 )
413  $ 'N ', ( nval(i), i = 1, min(nmat, 10) )
414  IF( nmat.GT.10 )
415  $ WRITE( nout, fmt = 9997 ) ( nval(i), i = 11, nmat )
416  WRITE( nout, fmt = 9996 )
417  $ 'bwl ', ( bwlval(i), i = 1, min(nbw, 10) )
418  IF( nbw.GT.10 )
419  $ WRITE( nout, fmt = 9997 ) ( bwlval(i), i = 11, nbw )
420  WRITE( nout, fmt = 9996 )
421  $ 'bwu ', ( bwuval(i), i = 1, min(nbw, 10) )
422  IF( nbw.GT.10 )
423  $ WRITE( nout, fmt = 9997 ) ( bwuval(i), i = 11, nbw )
424  WRITE( nout, fmt = 9996 )
425  $ 'NB ', ( nbval(i), i = 1, min(nnb, 10) )
426  IF( nnb.GT.10 )
427  $ WRITE( nout, fmt = 9997 ) ( nbval(i), i = 11, nnb )
428  WRITE( nout, fmt = 9996 )
429  $ 'NRHS ', ( nrval(i), i = 1, min(nnr, 10) )
430  IF( nnr.GT.10 )
431  $ WRITE( nout, fmt = 9997 ) ( nrval(i), i = 11, nnr )
432  WRITE( nout, fmt = 9996 )
433  $ 'NBRHS', ( nbrval(i), i = 1, min(nnbr, 10) )
434  IF( nnbr.GT.10 )
435  $ WRITE( nout, fmt = 9997 ) ( nbrval(i), i = 11, nnbr )
436  WRITE( nout, fmt = 9996 )
437  $ 'P ', ( pval(i), i = 1, min(ngrids, 10) )
438  IF( ngrids.GT.10 )
439  $ WRITE( nout, fmt = 9997) ( pval(i), i = 11, ngrids )
440  WRITE( nout, fmt = 9996 )
441  $ 'Q ', ( qval(i), i = 1, min(ngrids, 10) )
442  IF( ngrids.GT.10 )
443  $ WRITE( nout, fmt = 9997 ) ( qval(i), i = 11, ngrids )
444  WRITE( nout, fmt = * )
445  WRITE( nout, fmt = 9995 ) eps
446  WRITE( nout, fmt = 9998 ) thresh
447 *
448  ELSE
449 *
450 * If in pvm, must participate setting up virtual machine
451 *
452  IF( nprocs.LT.1 )
453  $ CALL blacs_setup( iam, nprocs )
454 *
455 * Temporarily define blacs grid to include all processes so
456 * all processes have needed startup information
457 *
458  CALL blacs_get( -1, 0, ictxt )
459  CALL blacs_gridinit( ictxt, 'Row-major', 1, nprocs )
460 *
461 * Compute machine epsilon
462 *
463  eps = pdlamch( ictxt, 'eps' )
464 *
465  CALL sgebr2d( ictxt, 'All', ' ', 1, 1, thresh, 1, 0, 0 )
466  CALL igebr2d( ictxt, 'All', ' ', 1, 1, i, 1, 0, 0 )
467  CALL igebr2d( ictxt, 'All', ' ', i, 1, work, i, 0, 0 )
468  i = 1
469  nmat = work( i )
470  i = i+1
471  nbw = work( i )
472  i = i+1
473  nnb = work( i )
474  i = i+1
475  nnr = work( i )
476  i = i+1
477  nnbr = work( i )
478  i = i+1
479  ngrids = work( i )
480  i = i+1
481  IF( work( i ) .EQ. 1 ) THEN
482  trans = 'N'
483  ELSE
484  trans = 'T'
485  END IF
486  i = i+1
487 *
488  i = nmat + nbw + nnb + nnr + nnbr + 2*ngrids
489  i = i + nbw
490 *
491  CALL igebr2d( ictxt, 'All', ' ', 1, i, work, 1, 0, 0 )
492  i = 1
493  CALL icopy( nmat, work( i ), 1, nval, 1 )
494  i = i + nmat
495  CALL icopy( nbw, work( i ), 1, bwlval, 1 )
496  i = i + nbw
497  CALL icopy( nbw, work( i ), 1, bwuval, 1 )
498  i = i + nbw
499  CALL icopy( nnb, work( i ), 1, nbval, 1 )
500  i = i + nnb
501  CALL icopy( nnr, work( i ), 1, nrval, 1 )
502  i = i + nnr
503  CALL icopy( nnbr, work( i ), 1, nbrval, 1 )
504  i = i + nnbr
505  CALL icopy( ngrids, work( i ), 1, pval, 1 )
506  i = i + ngrids
507  CALL icopy( ngrids, work( i ), 1, qval, 1 )
508 *
509  END IF
510 *
511  CALL blacs_gridexit( ictxt )
512 *
513  RETURN
514 *
515  20 WRITE( nout, fmt = 9993 )
516  CLOSE( nin )
517  IF( nout.NE.6 .AND. nout.NE.0 )
518  $ CLOSE( nout )
519 *
520  CALL blacs_abort( ictxt, 1 )
521  stop
522 *
523  9999 FORMAT( a )
524  9998 FORMAT( 'Routines pass computational tests if scaled residual ',
525  $ 'is less than ', g12.5 )
526  9997 FORMAT( ' ', 10i6 )
527  9996 FORMAT( 2x, a5, ': ', 10i6 )
528  9995 FORMAT( 'Relative machine precision (eps) is taken to be ',
529  $ e18.6 )
530  9994 FORMAT( ' Number of values of ',5a, ' is less than 1 or greater ',
531  $ 'than ', i2 )
532  9993 FORMAT( ' Illegal input in file ',40a,'. Aborting run.' )
533 *
534 * End of PDGBINFO
535 *
536  END
max
#define max(A, B)
Definition: pcgemr.c:180
pdgbinfo
subroutine pdgbinfo(SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pdgbinfo.f:6
icopy
subroutine icopy(N, SX, INCX, SY, INCY)
Definition: pblastst.f:1525
min
#define min(A, B)
Definition: pcgemr.c:181