ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pclsdriver.f
Go to the documentation of this file.
1  PROGRAM pclsdriver
2 *
3 * -- ScaLAPACK routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * August 14, 2001
7 *
8 * Purpose
9 * =======
10 *
11 * PCLSDRIVER is the main test program for the COMPLEX
12 * SCALAPACK (full rank) Least Squares routines. This test driver solves
13 * full-rank least square problems.
14 *
15 * The program must be driven by a short data file. An annotated
16 * example of a data file can be obtained by deleting the first 3
17 * characters from the following 17 lines:
18 * 'ScaLapack LS solve input file'
19 * 'Intel iPSC/860 hypercube, gamma model.'
20 * 'LS.out' output file name (if any)
21 * 6 device out
22 * 4 number of problems sizes
23 * 55 17 31 201 values of M
24 * 5 71 31 201 values of N
25 * 3 number of NB's
26 * 2 3 5 values of NB
27 * 3 number of NRHS's
28 * 2 3 5 values of NRHS
29 * 2 number of NBRHS's
30 * 1 2 values of NBRHS
31 * 7 number of process grids (ordered P & Q)
32 * 1 2 1 4 2 3 8 values of P
33 * 7 2 4 1 3 2 1 values of Q
34 * 3.0 threshold
35 *
36 * Internal Parameters
37 * ===================
38 *
39 * TOTMEM INTEGER, default = 6200000.
40 * TOTMEM is a machine-specific parameter indicating the
41 * maximum amount of available memory in bytes.
42 * The user should customize TOTMEM to his platform. Remember
43 * to leave room in memory for the operating system, the BLACS
44 * buffer, etc. For example, on a system with 8 MB of memory
45 * per process (e.g., one processor on an Intel iPSC/860), the
46 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
47 * code, BLACS buffer, etc). However, for PVM, we usually set
48 * TOTMEM = 2000000. Some experimenting with the maximum value
49 * of TOTMEM may be required.
50 * INTGSZ INTEGER, default = 4 bytes.
51 * CPLXSZ INTEGER, default = 8 bytes.
52 * INTGSZ and CPLXSZ indicate the length in bytes on the
53 * given platform for an integer and a single precision
54 * complex.
55 * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ )
56 * All arrays used by SCALAPACK routines are allocated from
57 * this array and referenced by pointers. The integer IPA,
58 * for example, is a pointer to the starting element of MEM for
59 * the matrix A.
60 *
61 * =====================================================================
62 *
63 * .. Parameters ..
64  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
65  $ lld_, mb_, m_, nb_, n_, rsrc_
66  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
67  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
68  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
69  INTEGER cplxsz, memsiz, ntests, realsz, totmem
70  REAL rzero, rone
71  COMPLEX one, padval, zero
72  parameter( cplxsz = 8, realsz = 8, totmem = 2000000,
73  $ memsiz = totmem / cplxsz, ntests = 20,
74  $ padval = ( -9923.0e+0, -9923.0e+0 ) )
75  parameter( one = ( 1.0e+0, 0.0e+0 ), rzero = 0.0e+0,
76  $ rone = 1.0e+0, zero = ( 0.0e+0, 0.0e+0 ) )
77 * ..
78 * .. Local Scalars ..
79  LOGICAL check, tpsd
80  CHARACTER trans
81  CHARACTER*6 passed
82  CHARACTER*80 outfile
83  INTEGER hh, i, iam, iaseed, ibseed, ictxt, ii, imidpad,
84  $ info, ipa, ipb, ipostpad, iprepad, ipw, ipw2,
85  $ ipx, iscale, itran, itype, j, jj, k, kfail, kk,
86  $ kpass, kskip, ktests, lcm, lcmp, ltau, lwf,
87  $ lwork, lws, m, mnp, mnrhsp, mp, mq, mycol,
88  $ myrow, n, nb, nbrhs, ncols, ngrids, nmat, nnb,
89  $ nnbr, nnr, nnrhsq, nout, np, npcol, nprocs,
90  $ nprow, nrows, nq, nrhs, nrhsp, nrhsq, worksiz
91  REAL anorm, bnorm, sresid, thresh
92  DOUBLE PRECISION addfac, adds, mulfac, mults, nops, tmflops
93 * ..
94 * .. Local Arrays ..
95  INTEGER desca( dlen_ ), descb( dlen_ ), descw( lld_ ),
96  $ descx( dlen_ ), ierr( 2 ), mval( ntests ),
97  $ nbrval( ntests ), nbval( ntests ),
98  $ nrval( ntests ), nval( ntests ),
99  $ pval( ntests ), qval( ntests )
100  REAL result( 2 )
101  DOUBLE PRECISION ctime( 1 ), wtime( 1 )
102  COMPLEX mem( memsiz )
103 * ..
104 * .. External Subroutines ..
105  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
106  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
107  $ blacs_pinfo, descinit, igsum2d, pcchekpad,
108  $ pcfillpad, pcgels, pcgemm, pclacpy,
109  $ pclsinfo, pcmatgen, pscnrm2,
110  $ pcsscal, pcqrt13, pcqrt16, slboot,
111  $ slcombine, sltimer
112 * ..
113 * .. External Functions ..
114  LOGICAL lsame
115  INTEGER iceil, ilcm, numroc
116  REAL pclange, pcqrt14, pcqrt17
117  EXTERNAL iceil, ilcm, lsame, numroc, pclange,
118  $ pcqrt14, pcqrt17
119 * ..
120 * .. Intrinsic Functions ..
121  INTRINSIC max, min
122 * ..
123 * .. Data Statements ..
124  DATA ktests, kpass, kfail, kskip / 4*0 /
125 * ..
126 * .. Executable Statements ..
127 *
128 * Get starting information
129 *
130  CALL blacs_pinfo( iam, nprocs )
131 *
132  iaseed = 100
133  ibseed = 200
134  CALL pclsinfo( outfile, nout, nmat, mval, ntests, nval,
135  $ ntests, nnb, nbval, ntests, nnr, nrval, ntests,
136  $ nnbr, nbrval, ntests, ngrids, pval, ntests, qval,
137  $ ntests, thresh, mem, iam, nprocs )
138  check = ( thresh.GE.0.0e+0 )
139 *
140 * Print headings
141 *
142  IF( iam.EQ.0 ) THEN
143  WRITE( nout, fmt = * )
144  WRITE( nout, fmt = 9995 )
145  WRITE( nout, fmt = 9994 )
146  WRITE( nout, fmt = * )
147  END IF
148 *
149 * Loop over different process grids
150 *
151  DO 90 i = 1, ngrids
152 *
153  nprow = pval( i )
154  npcol = qval( i )
155 *
156 * Make sure grid information is correct
157 *
158  ierr( 1 ) = 0
159  IF( nprow.LT.1 ) THEN
160  IF( iam.EQ.0 )
161  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
162  ierr( 1 ) = 1
163  ELSE IF( npcol.LT.1 ) THEN
164  IF( iam.EQ.0 )
165  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
166  ierr( 1 ) = 1
167  ELSE IF( nprow*npcol.GT.nprocs ) THEN
168  IF( iam.EQ.0 )
169  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
170  ierr( 1 ) = 1
171  END IF
172 *
173  IF( ierr( 1 ).GT.0 ) THEN
174  IF( iam.EQ.0 )
175  $ WRITE( nout, fmt = 9997 ) 'grid'
176  kskip = kskip + 1
177  GO TO 90
178  END IF
179 *
180 * Define process grid
181 *
182  CALL blacs_get( -1, 0, ictxt )
183  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
184  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
185 *
186 * Go to bottom of loop if this case doesn't use my process
187 *
188  IF( ( myrow.GE.nprow ).OR.( mycol.GE.npcol ) )
189  $ GO TO 90
190 *
191  DO 80 j = 1, nmat
192 *
193  m = mval( j )
194  n = nval( j )
195 *
196 * Make sure matrix information is correct
197 *
198  ierr( 1 ) = 0
199  IF( m.LT.1 ) THEN
200  IF( iam.EQ.0 )
201  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
202  ierr( 1 ) = 1
203  ELSE IF( n.LT.1 ) THEN
204  IF( iam.EQ.0 )
205  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
206  ierr( 1 ) = 1
207  END IF
208 *
209 * Make sure no one had error
210 *
211  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
212 *
213  IF( ierr( 1 ).GT.0 ) THEN
214  IF( iam.EQ.0 )
215  $ WRITE( nout, fmt = 9997 ) 'matrix'
216  kskip = kskip + 1
217  GO TO 80
218  END IF
219 *
220 * Loop over different blocking sizes
221 *
222  DO 70 k = 1, nnb
223 *
224  nb = nbval( k )
225 *
226 * Make sure nb is legal
227 *
228  ierr( 1 ) = 0
229  IF( nb.LT.1 ) THEN
230  ierr( 1 ) = 1
231  IF( iam.EQ.0 )
232  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
233  END IF
234 *
235 * Check all processes for an error
236 *
237  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
238 *
239  IF( ierr( 1 ).GT.0 ) THEN
240  IF( iam.EQ.0 )
241  $ WRITE( nout, fmt = 9997 ) 'NB'
242  kskip = kskip + 1
243  GO TO 70
244  END IF
245 *
246 * Padding constants
247 *
248  mp = numroc( m, nb, myrow, 0, nprow )
249  mq = numroc( m, nb, mycol, 0, npcol )
250  np = numroc( n, nb, myrow, 0, nprow )
251  mnp = max( mp, np )
252  nq = numroc( n, nb, mycol, 0, npcol )
253 *
254  IF( check ) THEN
255  iprepad = max( nb, mp )
256  imidpad = nb
257  ipostpad = max( nb, nq )
258  ELSE
259  iprepad = 0
260  imidpad = 0
261  ipostpad = 0
262  END IF
263 *
264 * Initialize the array descriptor for the matrix A
265 *
266  CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
267  $ max( 1, mp ) + imidpad, ierr( 1 ) )
268 *
269 * Check all processes for an error
270 *
271  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
272 *
273  IF( ierr( 1 ).LT.0 ) THEN
274  IF( iam.EQ.0 )
275  $ WRITE( nout, fmt = 9997 ) 'descriptor'
276  kskip = kskip + 1
277  GO TO 70
278  END IF
279 *
280  DO 60 iscale = 1, 3
281 *
282  itype = iscale
283 *
284 * Assign pointers into MEM for SCALAPACK arrays, A is
285 * allocated starting at position MEM( IPREPAD+1 )
286 *
287  ipa = iprepad + 1
288  ipx = ipa + desca( lld_ )*nq + ipostpad + iprepad
289  ipw = ipx
290 *
291  worksiz = nq + ipostpad
292 *
293 * Check for adequate memory for problem size
294 *
295  ierr( 1 ) = 0
296  IF( ( ipw+worksiz ).GT.memsiz ) THEN
297  IF( iam.EQ.0 )
298  $ WRITE( nout, fmt = 9996 ) 'MEMORY',
299  $ ( ipx+worksiz )*cplxsz
300  ierr( 1 ) = 1
301  END IF
302 *
303 * Check all processes for an error
304 *
305  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
306  $ 0 )
307 *
308  IF( ierr( 1 ).GT.0 ) THEN
309  IF( iam.EQ.0 )
310  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
311  kskip = kskip + 1
312  GO TO 70
313  END IF
314 *
315  IF( check ) THEN
316  CALL pcfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
317  $ desca( lld_ ), iprepad, ipostpad,
318  $ padval )
319  CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
320  $ mem( ipw-iprepad ),
321  $ worksiz-ipostpad, iprepad,
322  $ ipostpad, padval )
323  END IF
324 *
325 * Generate the matrix A and calculate its 1-norm
326 *
327  CALL pcqrt13( iscale, m, n, mem( ipa ), 1, 1,
328  $ desca, anorm, iaseed, mem( ipw ) )
329 *
330  IF( check ) THEN
331  CALL pcchekpad( ictxt, 'PCQRT13', mp, nq,
332  $ mem( ipa-iprepad ), desca( lld_ ),
333  $ iprepad, ipostpad, padval )
334  CALL pcchekpad( ictxt, 'PCQRT13',
335  $ worksiz-ipostpad, 1,
336  $ mem( ipw-iprepad ),
337  $ worksiz-ipostpad, iprepad,
338  $ ipostpad, padval )
339  END IF
340 *
341  DO 50 itran = 1, 2
342 *
343  IF( itran.EQ.1 ) THEN
344  nrows = m
345  ncols = n
346  trans = 'N'
347  tpsd = .false.
348  ELSE
349  nrows = n
350  ncols = m
351  trans = 'C'
352  tpsd = .true.
353  END IF
354 *
355 * Loop over the different values for NRHS
356 *
357  DO 40 hh = 1, nnr
358 *
359  nrhs = nrval( hh )
360 *
361  DO 30 kk = 1, nnbr
362 *
363  nbrhs = nbrval( kk )
364 *
365  nrhsp = numroc( nrhs, nbrhs, myrow, 0,
366  $ nprow )
367  nrhsq = numroc( nrhs, nbrhs, mycol, 0,
368  $ npcol )
369 *
370 * Define Array descriptor for rhs MAX(M,N)xNRHS
371 *
372  CALL descinit( descx, max( m, n ), nrhs, nb,
373  $ nbrhs, 0, 0, ictxt,
374  $ max( 1, mnp ) + imidpad,
375  $ ierr( 1 ) )
376  IF( tpsd ) THEN
377  CALL descinit( descw, m, nrhs, nb, nbrhs,
378  $ 0, 0, ictxt, max( 1, mp ) +
379  $ imidpad, ierr( 2 ) )
380  ELSE
381  CALL descinit( descw, n, nrhs, nb, nbrhs,
382  $ 0, 0, ictxt, max( 1, np ) +
383  $ imidpad, ierr( 2 ) )
384  END IF
385 *
386 * Check all processes for an error
387 *
388  CALL igsum2d( ictxt, 'All', ' ', 2, 1, ierr,
389  $ 2, -1, 0 )
390 *
391  IF( ierr( 1 ).LT.0 .OR. ierr( 2 ).LT.0 ) THEN
392  IF( iam.EQ.0 )
393  $ WRITE( nout, fmt = 9997 ) 'descriptor'
394  kskip = kskip + 1
395  GO TO 30
396  END IF
397 *
398 * Check for enough memory
399 *
400  ipx = ipa + desca( lld_ )*nq + ipostpad +
401  $ iprepad
402  ipw = ipx + descx( lld_ )*nrhsq + ipostpad +
403  $ iprepad
404  worksiz = descw( lld_ )*nrhsq + ipostpad
405 *
406  ierr( 1 ) = 0
407  IF( ipw+worksiz.GT.memsiz ) THEN
408  IF( iam.EQ.0 )
409  $ WRITE( nout, fmt = 9996 ) 'Generation',
410  $ ( ipw+worksiz )*cplxsz
411  ierr( 1 ) = 1
412  END IF
413 *
414 * Check all processes for an error
415 *
416  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
417  $ 1, -1, 0 )
418 *
419  IF( ierr( 1 ).GT.0 ) THEN
420  IF( iam.EQ.0 )
421  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
422  kskip = kskip + 1
423  GO TO 30
424  END IF
425 *
426 * Generate RHS
427 *
428  IF( tpsd ) THEN
429  CALL pcmatgen( ictxt, 'No', 'No',
430  $ descw( m_ ), descw( n_ ),
431  $ descw( mb_ ), descw( nb_ ),
432  $ mem( ipw ), descw( lld_ ),
433  $ descw( rsrc_ ),
434  $ descw( csrc_ ), ibseed, 0,
435  $ mp, 0, nrhsq, myrow, mycol,
436  $ nprow, npcol )
437  ELSE
438  CALL pcmatgen( ictxt, 'No', 'No',
439  $ descw( m_ ), descw( n_ ),
440  $ descw( mb_ ), descw( nb_ ),
441  $ mem( ipw ), descw( lld_ ),
442  $ descw( rsrc_ ),
443  $ descw( csrc_ ), ibseed, 0,
444  $ np, 0, nrhsq, myrow, mycol,
445  $ nprow, npcol )
446  END IF
447 *
448  IF( check ) THEN
449  CALL pcfillpad( ictxt, mnp, nrhsq,
450  $ mem( ipx-iprepad ),
451  $ descx( lld_ ), iprepad,
452  $ ipostpad, padval )
453  IF( tpsd ) THEN
454  CALL pcfillpad( ictxt, mp, nrhsq,
455  $ mem( ipw-iprepad ),
456  $ descw( lld_ ), iprepad,
457  $ ipostpad, padval )
458  ELSE
459  CALL pcfillpad( ictxt, np, nrhsq,
460  $ mem( ipw-iprepad ),
461  $ descw( lld_ ), iprepad,
462  $ ipostpad, padval )
463  END IF
464  END IF
465 *
466  DO 10 jj = 1, nrhs
467  CALL pscnrm2( ncols, bnorm, mem( ipw ),
468  $ 1, jj, descw, 1 )
469  IF( bnorm.GT.rzero )
470  $ CALL pcsscal( ncols, rone / bnorm,
471  $ mem( ipw ), 1, jj, descw,
472  $ 1 )
473  10 CONTINUE
474 *
475  CALL pcgemm( trans, 'N', nrows, nrhs, ncols,
476  $ one, mem( ipa ), 1, 1, desca,
477  $ mem( ipw ), 1, 1, descw, zero,
478  $ mem( ipx ), 1, 1, descx )
479 *
480  IF( check ) THEN
481 *
482 * check for memory overwrite
483 *
484  CALL pcchekpad( ictxt, 'Generation', mp,
485  $ nq, mem( ipa-iprepad ),
486  $ desca( lld_ ), iprepad,
487  $ ipostpad, padval )
488  CALL pcchekpad( ictxt, 'Generation', mnp,
489  $ nrhsq, mem( ipx-iprepad ),
490  $ descx( lld_ ), iprepad,
491  $ ipostpad, padval )
492  IF( tpsd ) THEN
493  CALL pcchekpad( ictxt, 'Generation',
494  $ mp, nrhsq,
495  $ mem( ipw-iprepad ),
496  $ descw( lld_ ), iprepad,
497  $ ipostpad, padval )
498  ELSE
499  CALL pcchekpad( ictxt, 'Generation',
500  $ np, nrhsq,
501  $ mem( ipw-iprepad ),
502  $ descw( lld_ ), iprepad,
503  $ ipostpad, padval )
504  END IF
505 *
506 * Allocate space for copy of rhs
507 *
508  ipb = ipw
509 *
510  IF( tpsd ) THEN
511  CALL descinit( descb, n, nrhs, nb,
512  $ nbrhs, 0, 0, ictxt,
513  $ max( 1, np ) + imidpad,
514  $ ierr( 1 ) )
515  ELSE
516  CALL descinit( descb, m, nrhs, nb,
517  $ nbrhs, 0, 0, ictxt,
518  $ max( 1, mp ) + imidpad,
519  $ ierr( 1 ) )
520  END IF
521 *
522 * Check all processes for an error
523 *
524  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
525  $ ierr, 1, -1, 0 )
526 *
527  IF( ierr( 1 ).LT.0 ) THEN
528  IF( iam.EQ.0 )
529  $ WRITE( nout, fmt = 9997 )
530  $ 'descriptor'
531  kskip = kskip + 1
532  GO TO 30
533  END IF
534 *
535  ipw = ipb + descb( lld_ )*nrhsq +
536  $ ipostpad + iprepad
537 *
538  END IF
539 *
540 * Calculate the amount of workspace for PCGELS
541 *
542  IF( m.GE.n ) THEN
543  ltau = numroc( min(m,n), nb, mycol, 0,
544  $ npcol )
545  lwf = nb * ( mp + nq + nb )
546  lws = max( ( nb*( nb - 1 ) ) / 2,
547  $ ( mp + nrhsq ) * nb ) + nb*nb
548  ELSE
549  lcm = ilcm( nprow, npcol )
550  lcmp = lcm / nprow
551  ltau = numroc( min(m,n), nb, myrow, 0,
552  $ nprow )
553  lwf = nb * ( mp + nq + nb )
554  lws = max( ( nb*( nb - 1 ) ) / 2, ( np +
555  $ max( nq + numroc( numroc( n, nb, 0,
556  $ 0, nprow ), nb, 0, 0, lcmp ),
557  $ nrhsq ) ) * nb ) + nb*nb
558  END IF
559 *
560  lwork = ltau + max( lwf, lws )
561  worksiz = lwork + ipostpad
562 *
563 * Check for adequate memory for problem size
564 *
565  ierr( 1 ) = 0
566  IF( ipw+worksiz.GT.memsiz ) THEN
567  IF( iam.EQ.0 )
568  $ WRITE( nout, fmt = 9996 ) 'solve',
569  $ ( ipw+worksiz )*cplxsz
570  ierr( 1 ) = 1
571  END IF
572 *
573 * Check all processes for an error
574 *
575  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
576  $ 1, -1, 0 )
577 *
578  IF( ierr( 1 ).GT.0 ) THEN
579  IF( iam.EQ.0 )
580  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
581  kskip = kskip + 1
582  GO TO 30
583  END IF
584 *
585  IF( check ) THEN
586 *
587 * Make the copy of the right hand side
588 *
589  CALL pclacpy( 'All', nrows, nrhs,
590  $ mem( ipx ), 1, 1, descx,
591  $ mem( ipb ), 1, 1, descb )
592 *
593  IF( tpsd ) THEN
594  CALL pcfillpad( ictxt, np, nrhsq,
595  $ mem( ipb-iprepad ),
596  $ descb( lld_ ), iprepad,
597  $ ipostpad, padval )
598  ELSE
599  CALL pcfillpad( ictxt, mp, nrhsq,
600  $ mem( ipb-iprepad ),
601  $ descb( lld_ ), iprepad,
602  $ ipostpad, padval )
603  END IF
604  CALL pcfillpad( ictxt, lwork, 1,
605  $ mem( ipw-iprepad ),
606  $ lwork, iprepad,
607  $ ipostpad, padval )
608  END IF
609 *
610  CALL slboot( )
611  CALL blacs_barrier( ictxt, 'All' )
612  CALL sltimer( 1 )
613 *
614 * Solve the LS or overdetermined system
615 *
616  CALL pcgels( trans, m, n, nrhs, mem( ipa ),
617  $ 1, 1, desca, mem( ipx ), 1, 1,
618  $ descx, mem( ipw ), lwork, info )
619 *
620  CALL sltimer( 1 )
621 *
622  IF( check ) THEN
623 *
624 * check for memory overwrite
625 *
626  CALL pcchekpad( ictxt, 'PCGELS', mp,
627  $ nq, mem( ipa-iprepad ),
628  $ desca( lld_ ), iprepad,
629  $ ipostpad, padval )
630  CALL pcchekpad( ictxt, 'PCGELS', mnp,
631  $ nrhsq, mem( ipx-iprepad ),
632  $ descx( lld_ ), iprepad,
633  $ ipostpad, padval )
634  CALL pcchekpad( ictxt, 'PCGELS', lwork,
635  $ 1, mem( ipw-iprepad ),
636  $ lwork, iprepad,
637  $ ipostpad, padval )
638  END IF
639 *
640 * Regenerate A in place for testing and next
641 * iteration
642 *
643  CALL pcqrt13( iscale, m, n, mem( ipa ), 1, 1,
644  $ desca, anorm, iaseed,
645  $ mem( ipw ) )
646 *
647 * check the solution to rhs
648 *
649  IF( check ) THEN
650 *
651 * Am I going to call PCQRT17 ?
652 *
653  IF( ( m.GE.n .AND. ( .NOT.tpsd ) ) .OR.
654  $ ( m.LT.n .AND. tpsd ) ) THEN
655 *
656 * Call PCQRT17 first, A, X, and B remain
657 * unchanged. Solving LS system
658 *
659 * Check amount of memory for PCQRT17
660 *
661  IF( tpsd ) THEN
662  worksiz = np*nrhsq + nrhsp*mq
663  ipw2 = ipw + worksiz
664  worksiz = worksiz +
665  $ iceil( realsz*max( nq, max(
666  $ mq, nrhsq ) ), cplxsz ) +
667  $ ipostpad
668  ELSE
669  worksiz = mp*nrhsq + nrhsp*nq
670  ipw2 = ipw + worksiz
671  worksiz = worksiz +
672  $ iceil( realsz*max( nq,
673  $ nrhsq ), cplxsz ) +
674  $ ipostpad
675  END IF
676 *
677 * Check for adequate memory for problem
678 * size
679 *
680  ierr( 1 ) = 0
681  IF( ( ipw+worksiz ).GT.memsiz ) THEN
682  IF( iam.EQ.0 )
683  $ WRITE( nout, fmt = 9996 )
684  $ 'MEMORY', ( ipw+worksiz )*cplxsz
685  ierr( 1 ) = 1
686  END IF
687 *
688 * Check all processes for an error
689 *
690  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
691  $ ierr, 1, -1, 0 )
692 *
693  IF( ierr( 1 ).GT.0 ) THEN
694  IF( iam.EQ.0 )
695  $ WRITE( nout, fmt = 9997 )
696  $ 'MEMORY'
697  kskip = kskip + 1
698  GO TO 30
699  END IF
700 *
701  CALL pcfillpad( ictxt,
702  $ worksiz-ipostpad, 1,
703  $ mem( ipw-iprepad ),
704  $ worksiz-ipostpad,
705  $ iprepad, ipostpad,
706  $ padval )
707 *
708  result( 2 ) = pcqrt17( trans, 1, m, n,
709  $ nrhs,
710  $ mem( ipa ),
711  $ 1, 1, desca,
712  $ mem( ipx ), 1,
713  $ 1, descx,
714  $ mem( ipb ),
715  $ 1, 1, descb,
716  $ mem( ipw ),
717  $ mem( ipw2 ) )
718  sresid = result( 2 )
719 *
720  CALL pcchekpad( ictxt, 'PCQRT17',
721  $ mp, nq,
722  $ mem( ipa-iprepad ),
723  $ desca( lld_ ),
724  $ iprepad, ipostpad,
725  $ padval )
726  CALL pcchekpad( ictxt, 'PCQRT17',
727  $ mnp, nrhsq,
728  $ mem( ipx-iprepad ),
729  $ descx( lld_ ), iprepad,
730  $ ipostpad, padval )
731  IF( tpsd ) THEN
732  CALL pcchekpad( ictxt, 'PCQRT17',
733  $ np, nrhsq,
734  $ mem( ipb-iprepad ),
735  $ descb( lld_ ),
736  $ iprepad, ipostpad,
737  $ padval )
738  ELSE
739  CALL pcchekpad( ictxt, 'PCQRT17',
740  $ mp, nrhsq,
741  $ mem( ipb-iprepad ),
742  $ descb( lld_ ),
743  $ iprepad, ipostpad,
744  $ padval )
745  END IF
746  CALL pcchekpad( ictxt, 'PCQRT17',
747  $ worksiz-ipostpad, 1,
748  $ mem( ipw-iprepad ),
749  $ worksiz-ipostpad,
750  $ iprepad, ipostpad,
751  $ padval )
752  END IF
753 *
754 * Call PCQRT16, B will be destroyed.
755 *
756  IF( tpsd ) THEN
757  worksiz = mp + ipostpad
758  ELSE
759  worksiz = nq + ipostpad
760  END IF
761 *
762 * Check for adequate memory for problem size
763 *
764  ierr( 1 ) = 0
765  IF( ( ipw+worksiz ).GT.memsiz ) THEN
766  IF( iam.EQ.0 )
767  $ WRITE( nout, fmt = 9996 ) 'MEMORY',
768  $ ( ipw+worksiz )*cplxsz
769  ierr( 1 ) = 1
770  END IF
771 *
772 * Check all processes for an error
773 *
774  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
775  $ ierr, 1, -1, 0 )
776 *
777  IF( ierr( 1 ).GT.0 ) THEN
778  IF( iam.EQ.0 )
779  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
780  kskip = kskip + 1
781  GO TO 30
782  END IF
783 *
784  CALL pcfillpad( ictxt,
785  $ worksiz-ipostpad, 1,
786  $ mem( ipw-iprepad ),
787  $ worksiz-ipostpad,
788  $ iprepad, ipostpad,
789  $ padval )
790 *
791  CALL pcqrt16( trans, m, n, nrhs,
792  $ mem( ipa ), 1, 1, desca,
793  $ mem( ipx ), 1, 1, descx,
794  $ mem( ipb ), 1, 1, descb,
795  $ mem( ipw ), result( 1 ) )
796 *
797  CALL pcchekpad( ictxt, 'PCQRT16',
798  $ mp, nq,
799  $ mem( ipa-iprepad ),
800  $ desca( lld_ ),
801  $ iprepad, ipostpad,
802  $ padval )
803  CALL pcchekpad( ictxt, 'PCQRT16',
804  $ mnp, nrhsq,
805  $ mem( ipx-iprepad ),
806  $ descx( lld_ ), iprepad,
807  $ ipostpad, padval )
808  IF( tpsd ) THEN
809  CALL pcchekpad( ictxt, 'PCQRT16',
810  $ np, nrhsq,
811  $ mem( ipb-iprepad ),
812  $ descb( lld_ ),
813  $ iprepad, ipostpad,
814  $ padval )
815  ELSE
816  CALL pcchekpad( ictxt, 'PCQRT16',
817  $ mp, nrhsq,
818  $ mem( ipb-iprepad ),
819  $ descb( lld_ ),
820  $ iprepad, ipostpad,
821  $ padval )
822  END IF
823  CALL pcchekpad( ictxt, 'PCQRT16',
824  $ worksiz-ipostpad, 1,
825  $ mem( ipw-iprepad ),
826  $ worksiz-ipostpad,
827  $ iprepad, ipostpad,
828  $ padval )
829 *
830 * Call PCQRT14
831 *
832  IF( ( m.GE.n .AND. tpsd ) .OR.
833  $ ( m.LT.n .AND. ( .NOT.tpsd ) ) ) THEN
834 *
835  ipw = ipb
836 *
837  IF( tpsd ) THEN
838 *
839  nnrhsq = numroc( n+nrhs, nb, mycol,
840  $ 0, npcol )
841  ltau = numroc( min( m, n+nrhs ), nb,
842  $ mycol, 0, npcol )
843  lwf = nb * ( nb + mp + nnrhsq )
844  worksiz = mp * nnrhsq + ltau + lwf +
845  $ ipostpad
846 *
847  ELSE
848 *
849  mnrhsp = numroc( m+nrhs, nb, myrow,
850  $ 0, nprow )
851  ltau = numroc( min( m+nrhs, n ), nb,
852  $ myrow, 0, nprow )
853  lwf = nb * ( nb + mnrhsp + nq )
854  worksiz = mnrhsp * nq + ltau + lwf +
855  $ ipostpad
856 *
857  END IF
858 *
859 * Check for adequate memory for problem
860 * size
861 *
862  ierr( 1 ) = 0
863  IF( ( ipw+worksiz ).GT.memsiz ) THEN
864  IF( iam.EQ.0 )
865  $ WRITE( nout, fmt = 9996 )
866  $ 'MEMORY', ( ipw+worksiz )*cplxsz
867  ierr( 1 ) = 1
868  END IF
869 *
870 * Check all processes for an error
871 *
872  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
873  $ ierr, 1, -1, 0 )
874 *
875  IF( ierr( 1 ).GT.0 ) THEN
876  IF( iam.EQ.0 )
877  $ WRITE( nout, fmt = 9997 )
878  $ 'MEMORY'
879  kskip = kskip + 1
880  GO TO 30
881  END IF
882 *
883  CALL pcfillpad( ictxt,
884  $ worksiz-ipostpad, 1,
885  $ mem( ipw-iprepad ),
886  $ worksiz-ipostpad,
887  $ iprepad, ipostpad,
888  $ padval )
889 *
890 * Solve underdetermined system
891 *
892  result( 2 ) = pcqrt14( trans, m, n,
893  $ nrhs,
894  $ mem( ipa ), 1,
895  $ 1, desca,
896  $ mem( ipx ),
897  $ 1, 1, descx,
898  $ mem( ipw ) )
899  sresid = result( 2 )
900 *
901  CALL pcchekpad( ictxt, 'PCQRT14',
902  $ mp, nq,
903  $ mem( ipa-iprepad ),
904  $ desca( lld_ ),
905  $ iprepad, ipostpad,
906  $ padval )
907  CALL pcchekpad( ictxt, 'PCQRT14',
908  $ mnp, nrhsq,
909  $ mem( ipx-iprepad ),
910  $ descx( lld_ ), iprepad,
911  $ ipostpad, padval )
912  CALL pcchekpad( ictxt, 'PCQRT14',
913  $ worksiz-ipostpad, 1,
914  $ mem( ipw-iprepad ),
915  $ worksiz-ipostpad,
916  $ iprepad, ipostpad,
917  $ padval )
918  END IF
919 *
920 * Print information about the tests that
921 * did not pass the threshold.
922 *
923  passed = 'PASSED'
924  DO 20 ii = 1, 2
925  IF( ( result( ii ).GE.thresh ) .AND.
926  $ ( result( ii )-result( ii ).EQ.0.0e+0
927  $ ) ) THEN
928  IF( iam.EQ.0 )
929  $ WRITE( nout, fmt = 9986 )trans,
930  $ m, n, nrhs, nb, itype, ii,
931  $ result( ii )
932  kfail = kfail + 1
933  passed = 'FAILED'
934  ELSE
935  kpass = kpass + 1
936  END IF
937  20 CONTINUE
938 *
939  ELSE
940 *
941 * By-pass the solve check
942 *
943  kpass = kpass + 1
944  sresid = sresid - sresid
945  passed = 'BYPASS'
946 *
947  END IF
948 *
949 * Gather maximum of all CPU and WALL clock
950 * timings
951 *
952  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
953  $ wtime )
954  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
955  $ ctime )
956 *
957 * Print results
958 *
959  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
960  addfac = 2
961  mulfac = 6
962  IF( m.GE.n ) THEN
963 *
964 * NOPS = SOPLA( 'CGEQRF', M, N, 0, 0,
965 * NB ) + SOPLA( 'CUNMQR', M, NRHS, N,
966 * 0, NB )
967 *
968  mults = n*( ( ( 23.d0 / 6.d0 )+m+n /
969  $ 2.d0 )+ n*( m-n / 3.d0 ) ) +
970  $ n*nrhs*( 2.d0*m+2.d0-n )
971  adds = n*( ( 5.d0 / 6.d0 )+n*
972  $ ( 1.d0 / 2.d0+( m-n / 3.d0 ) ) )
973  $ + n*nrhs*( 2.d0*m+1.d0-n )
974  ELSE
975 *
976 * NOPS = SOPLA( 'CGELQF', M, N, 0, 0,
977 * NB ) + SOPLA( 'CUNMLQ', M,
978 * NRHS, N, 0, NB )
979 *
980  mults = m*( ( ( 29.d0 / 6.d0 )+2.d0*n-m
981  $ / 2.d0 )+m*( n-m / 3.d0 ) )
982  $ + n*nrhs*( 2.d0*m+2.d0-n )
983  adds = m*( ( 5.d0 / 6.d0 )+m / 2.d0+m*
984  $ ( n-m / 3.d0 ) )
985  $ + n*nrhs*( 2.d0*m+1.d0-n )
986  END IF
987  nops = addfac*adds + mulfac*mults
988 *
989 * Calculate total megaflops, for WALL and
990 * CPU time, and print output
991 *
992 * Print WALL time if machine supports it
993 *
994  IF( wtime( 1 ).GT.0.0d+0 ) THEN
995  tmflops = nops / ( wtime( 1 )*1.0d+6 )
996  ELSE
997  tmflops = 0.0d+0
998  END IF
999 *
1000  IF( wtime( 1 ).GE.0.0d+0 )
1001  $ WRITE( nout, fmt = 9993 )
1002  $ 'WALL', trans, m, n, nb, nrhs,
1003  $ nbrhs, nprow, npcol, wtime( 1 ),
1004  $ tmflops, passed
1005 *
1006 * Print CPU time if machine supports it
1007 *
1008  IF( ctime( 1 ).GT.0.0d+0 ) THEN
1009  tmflops = nops / ( ctime( 1 )*1.0d+6 )
1010  ELSE
1011  tmflops = 0.0d+0
1012  END IF
1013 *
1014  IF( ctime( 1 ).GE.0.0d+0 )
1015  $ WRITE( nout, fmt = 9993 )
1016  $ 'CPU ', trans, m, n, nb, nrhs,
1017  $ nbrhs, nprow, npcol, ctime( 1 ),
1018  $ tmflops, passed
1019  END IF
1020  30 CONTINUE
1021  40 CONTINUE
1022  50 CONTINUE
1023  60 CONTINUE
1024  70 CONTINUE
1025  80 CONTINUE
1026  CALL blacs_gridexit( ictxt )
1027  90 CONTINUE
1028 *
1029 * Print out ending messages and close output file
1030 *
1031  IF( iam.EQ.0 ) THEN
1032  ktests = kpass + kfail + kskip
1033  WRITE( nout, fmt = * )
1034  WRITE( nout, fmt = 9992 ) ktests
1035  IF( check ) THEN
1036  WRITE( nout, fmt = 9991 ) kpass
1037  WRITE( nout, fmt = 9989 ) kfail
1038  ELSE
1039  WRITE( nout, fmt = 9990 ) kpass
1040  END IF
1041  WRITE( nout, fmt = 9988 ) kskip
1042  WRITE( nout, fmt = * )
1043  WRITE( nout, fmt = * )
1044  WRITE( nout, fmt = 9987 )
1045  IF( nout.NE.6 .AND. nout.NE.0 )
1046  $ CLOSE ( nout )
1047  END IF
1048 *
1049  CALL blacs_exit( 0 )
1050 *
1051  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1052  $ '; It should be at least 1' )
1053  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1054  $ i4 )
1055  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1056  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1057  $ i11 )
1058  9995 FORMAT( 'Time TRANS M N NB NRHS NBRHS P Q ',
1059  $ 'LS Time MFLOPS CHECK' )
1060  9994 FORMAT( '---- ----- ------ ------ --- ----- ----- ----- ----- ',
1061  $ '--------- -------- ------' )
1062  9993 FORMAT( a4, 3x, a1, 3x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
1063  $ i5, 1x, i5, 1x, f9.2, 1x, f8.2, 1x, a6 )
1064  9992 FORMAT( 'Finished', i6, ' tests, with the following results:' )
1065  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1066  9990 FORMAT( i5, ' tests completed without checking.' )
1067  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1068  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1069  9987 FORMAT( 'END OF TESTS.' )
1070  9986 FORMAT( ' TRANS=''', a1, ''', M=', i5, ', N=', i5, ', NRHS=', i4,
1071  $ ', NB=', i4, ', type', i2, ', test(', i2, ')=', g12.5 )
1072 *
1073  stop
1074 *
1075 * End of PCLSDRIVER
1076 *
1077  END
pcqrt13
subroutine pcqrt13(SCALE, M, N, A, IA, JA, DESCA, NORMA, ISEED, WORK)
Definition: pcqrt13.f:3
pclsdriver
program pclsdriver
Definition: pclsdriver.f:1
pcgels
subroutine pcgels(TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, DESCB, WORK, LWORK, INFO)
Definition: pcgels.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
pclsinfo
subroutine pclsinfo(SUMMRY, NOUT, NMAT, MVAL, LDMVAL, NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pclsinfo.f:6
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pcqrt17
real function pcqrt17(TRANS, IRESID, M, N, NRHS, A, IA, JA, DESCA, X, IX, JX, DESCX, B, IB, JB, DESCB, WORK, RWORK)
Definition: pcqrt17.f:5
pcchekpad
subroutine pcchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcchekpad.f:3
pcmatgen
subroutine pcmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcmatgen.f:4
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
pclacpy
subroutine pclacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pclacpy.f:3
pcqrt16
subroutine pcqrt16(TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX, JX, DESCX, B, IB, JB, DESCB, RWORK, RESID)
Definition: pcqrt16.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pclange
real function pclange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pclange.f:3
pcfillpad
subroutine pcfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcfillpad.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
pcqrt14
real function pcqrt14(TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX, JX, DESCX, WORK)
Definition: pcqrt14.f:3
min
#define min(A, B)
Definition: pcgemr.c:181
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2