SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pclsdriver.f
Go to the documentation of this file.
1 PROGRAM pclsdriver
2*
3* -- ScaLAPACK routine (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* August 14, 2001
7*
8* Purpose
9* =======
10*
11* PCLSDRIVER is the main test program for the COMPLEX
12* SCALAPACK (full rank) Least Squares routines. This test driver solves
13* full-rank least square problems.
14*
15* The program must be driven by a short data file. An annotated
16* example of a data file can be obtained by deleting the first 3
17* characters from the following 17 lines:
18* 'ScaLapack LS solve input file'
19* 'Intel iPSC/860 hypercube, gamma model.'
20* 'LS.out' output file name (if any)
21* 6 device out
22* 4 number of problems sizes
23* 55 17 31 201 values of M
24* 5 71 31 201 values of N
25* 3 number of NB's
26* 2 3 5 values of NB
27* 3 number of NRHS's
28* 2 3 5 values of NRHS
29* 2 number of NBRHS's
30* 1 2 values of NBRHS
31* 7 number of process grids (ordered P & Q)
32* 1 2 1 4 2 3 8 values of P
33* 7 2 4 1 3 2 1 values of Q
34* 3.0 threshold
35*
36* Internal Parameters
37* ===================
38*
39* TOTMEM INTEGER, default = 6200000.
40* TOTMEM is a machine-specific parameter indicating the
41* maximum amount of available memory in bytes.
42* The user should customize TOTMEM to his platform. Remember
43* to leave room in memory for the operating system, the BLACS
44* buffer, etc. For example, on a system with 8 MB of memory
45* per process (e.g., one processor on an Intel iPSC/860), the
46* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
47* code, BLACS buffer, etc). However, for PVM, we usually set
48* TOTMEM = 2000000. Some experimenting with the maximum value
49* of TOTMEM may be required.
50* INTGSZ INTEGER, default = 4 bytes.
51* CPLXSZ INTEGER, default = 8 bytes.
52* INTGSZ and CPLXSZ indicate the length in bytes on the
53* given platform for an integer and a single precision
54* complex.
55* MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ )
56* All arrays used by SCALAPACK routines are allocated from
57* this array and referenced by pointers. The integer IPA,
58* for example, is a pointer to the starting element of MEM for
59* the matrix A.
60*
61* =====================================================================
62*
63* .. Parameters ..
64 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
65 $ lld_, mb_, m_, nb_, n_, rsrc_
66 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
67 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
68 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
69 INTEGER cplxsz, memsiz, ntests, realsz, totmem
70 REAL rzero, rone
71 COMPLEX one, padval, zero
72 parameter( cplxsz = 8, realsz = 8, totmem = 2000000,
73 $ memsiz = totmem / cplxsz, ntests = 20,
74 $ padval = ( -9923.0e+0, -9923.0e+0 ) )
75 parameter( one = ( 1.0e+0, 0.0e+0 ), rzero = 0.0e+0,
76 $ rone = 1.0e+0, zero = ( 0.0e+0, 0.0e+0 ) )
77* ..
78* .. Local Scalars ..
79 LOGICAL check, tpsd
80 CHARACTER trans
81 CHARACTER*6 passed
82 CHARACTER*80 outfile
83 INTEGER hh, i, iam, iaseed, ibseed, ictxt, ii, imidpad,
84 $ info, ipa, ipb, ipostpad, iprepad, ipw, ipw2,
85 $ ipx, iscale, itran, itype, j, jj, k, kfail, kk,
86 $ kpass, kskip, ktests, lcm, lcmp, ltau, lwf,
87 $ lwork, lws, m, mnp, mnrhsp, mp, mq, mycol,
88 $ myrow, n, nb, nbrhs, ncols, ngrids, nmat, nnb,
89 $ nnbr, nnr, nnrhsq, nout, np, npcol, nprocs,
90 $ nprow, nrows, nq, nrhs, nrhsp, nrhsq, worksiz
91 REAL anorm, bnorm, sresid, thresh
92 DOUBLE PRECISION addfac, adds, mulfac, mults, nops, tmflops
93* ..
94* .. Local Arrays ..
95 INTEGER desca( dlen_ ), descb( dlen_ ), descw( lld_ ),
96 $ descx( dlen_ ), ierr( 2 ), mval( ntests ),
97 $ nbrval( ntests ), nbval( ntests ),
98 $ nrval( ntests ), nval( ntests ),
99 $ pval( ntests ), qval( ntests )
100 REAL result( 2 )
101 DOUBLE PRECISION ctime( 1 ), wtime( 1 )
102 COMPLEX mem( memsiz )
103* ..
104* .. External Subroutines ..
105 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
106 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
107 $ blacs_pinfo, descinit, igsum2d, pcchekpad,
108 $ pcfillpad, pcgels, pcgemm, pclacpy,
109 $ pclsinfo, pcmatgen, pscnrm2,
110 $ pcsscal, pcqrt13, pcqrt16, slboot,
112* ..
113* .. External Functions ..
114 LOGICAL lsame
115 INTEGER iceil, ilcm, numroc
116 REAL pclange, pcqrt14, pcqrt17
117 EXTERNAL iceil, ilcm, lsame, numroc, pclange,
119* ..
120* .. Intrinsic Functions ..
121 INTRINSIC max, min
122* ..
123* .. Data Statements ..
124 DATA ktests, kpass, kfail, kskip / 4*0 /
125* ..
126* .. Executable Statements ..
127*
128* Get starting information
129*
130 CALL blacs_pinfo( iam, nprocs )
131*
132 iaseed = 100
133 ibseed = 200
134 CALL pclsinfo( outfile, nout, nmat, mval, ntests, nval,
135 $ ntests, nnb, nbval, ntests, nnr, nrval, ntests,
136 $ nnbr, nbrval, ntests, ngrids, pval, ntests, qval,
137 $ ntests, thresh, mem, iam, nprocs )
138 check = ( thresh.GE.0.0e+0 )
139*
140* Print headings
141*
142 IF( iam.EQ.0 ) THEN
143 WRITE( nout, fmt = * )
144 WRITE( nout, fmt = 9995 )
145 WRITE( nout, fmt = 9994 )
146 WRITE( nout, fmt = * )
147 END IF
148*
149* Loop over different process grids
150*
151 DO 90 i = 1, ngrids
152*
153 nprow = pval( i )
154 npcol = qval( i )
155*
156* Make sure grid information is correct
157*
158 ierr( 1 ) = 0
159 IF( nprow.LT.1 ) THEN
160 IF( iam.EQ.0 )
161 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
162 ierr( 1 ) = 1
163 ELSE IF( npcol.LT.1 ) THEN
164 IF( iam.EQ.0 )
165 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
166 ierr( 1 ) = 1
167 ELSE IF( nprow*npcol.GT.nprocs ) THEN
168 IF( iam.EQ.0 )
169 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
170 ierr( 1 ) = 1
171 END IF
172*
173 IF( ierr( 1 ).GT.0 ) THEN
174 IF( iam.EQ.0 )
175 $ WRITE( nout, fmt = 9997 ) 'grid'
176 kskip = kskip + 1
177 GO TO 90
178 END IF
179*
180* Define process grid
181*
182 CALL blacs_get( -1, 0, ictxt )
183 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
184 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
185*
186* Go to bottom of loop if this case doesn't use my process
187*
188 IF( ( myrow.GE.nprow ).OR.( mycol.GE.npcol ) )
189 $ GO TO 90
190*
191 DO 80 j = 1, nmat
192*
193 m = mval( j )
194 n = nval( j )
195*
196* Make sure matrix information is correct
197*
198 ierr( 1 ) = 0
199 IF( m.LT.1 ) THEN
200 IF( iam.EQ.0 )
201 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
202 ierr( 1 ) = 1
203 ELSE IF( n.LT.1 ) THEN
204 IF( iam.EQ.0 )
205 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
206 ierr( 1 ) = 1
207 END IF
208*
209* Make sure no one had error
210*
211 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
212*
213 IF( ierr( 1 ).GT.0 ) THEN
214 IF( iam.EQ.0 )
215 $ WRITE( nout, fmt = 9997 ) 'matrix'
216 kskip = kskip + 1
217 GO TO 80
218 END IF
219*
220* Loop over different blocking sizes
221*
222 DO 70 k = 1, nnb
223*
224 nb = nbval( k )
225*
226* Make sure nb is legal
227*
228 ierr( 1 ) = 0
229 IF( nb.LT.1 ) THEN
230 ierr( 1 ) = 1
231 IF( iam.EQ.0 )
232 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
233 END IF
234*
235* Check all processes for an error
236*
237 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
238*
239 IF( ierr( 1 ).GT.0 ) THEN
240 IF( iam.EQ.0 )
241 $ WRITE( nout, fmt = 9997 ) 'NB'
242 kskip = kskip + 1
243 GO TO 70
244 END IF
245*
246* Padding constants
247*
248 mp = numroc( m, nb, myrow, 0, nprow )
249 mq = numroc( m, nb, mycol, 0, npcol )
250 np = numroc( n, nb, myrow, 0, nprow )
251 mnp = max( mp, np )
252 nq = numroc( n, nb, mycol, 0, npcol )
253*
254 IF( check ) THEN
255 iprepad = max( nb, mp )
256 imidpad = nb
257 ipostpad = max( nb, nq )
258 ELSE
259 iprepad = 0
260 imidpad = 0
261 ipostpad = 0
262 END IF
263*
264* Initialize the array descriptor for the matrix A
265*
266 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
267 $ max( 1, mp ) + imidpad, ierr( 1 ) )
268*
269* Check all processes for an error
270*
271 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
272*
273 IF( ierr( 1 ).LT.0 ) THEN
274 IF( iam.EQ.0 )
275 $ WRITE( nout, fmt = 9997 ) 'descriptor'
276 kskip = kskip + 1
277 GO TO 70
278 END IF
279*
280 DO 60 iscale = 1, 3
281*
282 itype = iscale
283*
284* Assign pointers into MEM for SCALAPACK arrays, A is
285* allocated starting at position MEM( IPREPAD+1 )
286*
287 ipa = iprepad + 1
288 ipx = ipa + desca( lld_ )*nq + ipostpad + iprepad
289 ipw = ipx
290*
291 worksiz = nq + ipostpad
292*
293* Check for adequate memory for problem size
294*
295 ierr( 1 ) = 0
296 IF( ( ipw+worksiz ).GT.memsiz ) THEN
297 IF( iam.EQ.0 )
298 $ WRITE( nout, fmt = 9996 ) 'MEMORY',
299 $ ( ipx+worksiz )*cplxsz
300 ierr( 1 ) = 1
301 END IF
302*
303* Check all processes for an error
304*
305 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
306 $ 0 )
307*
308 IF( ierr( 1 ).GT.0 ) THEN
309 IF( iam.EQ.0 )
310 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
311 kskip = kskip + 1
312 GO TO 70
313 END IF
314*
315 IF( check ) THEN
316 CALL pcfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
317 $ desca( lld_ ), iprepad, ipostpad,
318 $ padval )
319 CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
320 $ mem( ipw-iprepad ),
321 $ worksiz-ipostpad, iprepad,
322 $ ipostpad, padval )
323 END IF
324*
325* Generate the matrix A and calculate its 1-norm
326*
327 CALL pcqrt13( iscale, m, n, mem( ipa ), 1, 1,
328 $ desca, anorm, iaseed, mem( ipw ) )
329*
330 IF( check ) THEN
331 CALL pcchekpad( ictxt, 'PCQRT13', mp, nq,
332 $ mem( ipa-iprepad ), desca( lld_ ),
333 $ iprepad, ipostpad, padval )
334 CALL pcchekpad( ictxt, 'PCQRT13',
335 $ worksiz-ipostpad, 1,
336 $ mem( ipw-iprepad ),
337 $ worksiz-ipostpad, iprepad,
338 $ ipostpad, padval )
339 END IF
340*
341 DO 50 itran = 1, 2
342*
343 IF( itran.EQ.1 ) THEN
344 nrows = m
345 ncols = n
346 trans = 'N'
347 tpsd = .false.
348 ELSE
349 nrows = n
350 ncols = m
351 trans = 'C'
352 tpsd = .true.
353 END IF
354*
355* Loop over the different values for NRHS
356*
357 DO 40 hh = 1, nnr
358*
359 nrhs = nrval( hh )
360*
361 DO 30 kk = 1, nnbr
362*
363 nbrhs = nbrval( kk )
364*
365 nrhsp = numroc( nrhs, nbrhs, myrow, 0,
366 $ nprow )
367 nrhsq = numroc( nrhs, nbrhs, mycol, 0,
368 $ npcol )
369*
370* Define Array descriptor for rhs MAX(M,N)xNRHS
371*
372 CALL descinit( descx, max( m, n ), nrhs, nb,
373 $ nbrhs, 0, 0, ictxt,
374 $ max( 1, mnp ) + imidpad,
375 $ ierr( 1 ) )
376 IF( tpsd ) THEN
377 CALL descinit( descw, m, nrhs, nb, nbrhs,
378 $ 0, 0, ictxt, max( 1, mp ) +
379 $ imidpad, ierr( 2 ) )
380 ELSE
381 CALL descinit( descw, n, nrhs, nb, nbrhs,
382 $ 0, 0, ictxt, max( 1, np ) +
383 $ imidpad, ierr( 2 ) )
384 END IF
385*
386* Check all processes for an error
387*
388 CALL igsum2d( ictxt, 'All', ' ', 2, 1, ierr,
389 $ 2, -1, 0 )
390*
391 IF( ierr( 1 ).LT.0 .OR. ierr( 2 ).LT.0 ) THEN
392 IF( iam.EQ.0 )
393 $ WRITE( nout, fmt = 9997 ) 'descriptor'
394 kskip = kskip + 1
395 GO TO 30
396 END IF
397*
398* Check for enough memory
399*
400 ipx = ipa + desca( lld_ )*nq + ipostpad +
401 $ iprepad
402 ipw = ipx + descx( lld_ )*nrhsq + ipostpad +
403 $ iprepad
404 worksiz = descw( lld_ )*nrhsq + ipostpad
405*
406 ierr( 1 ) = 0
407 IF( ipw+worksiz.GT.memsiz ) THEN
408 IF( iam.EQ.0 )
409 $ WRITE( nout, fmt = 9996 ) 'Generation',
410 $ ( ipw+worksiz )*cplxsz
411 ierr( 1 ) = 1
412 END IF
413*
414* Check all processes for an error
415*
416 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
417 $ 1, -1, 0 )
418*
419 IF( ierr( 1 ).GT.0 ) THEN
420 IF( iam.EQ.0 )
421 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
422 kskip = kskip + 1
423 GO TO 30
424 END IF
425*
426* Generate RHS
427*
428 IF( tpsd ) THEN
429 CALL pcmatgen( ictxt, 'No', 'No',
430 $ descw( m_ ), descw( n_ ),
431 $ descw( mb_ ), descw( nb_ ),
432 $ mem( ipw ), descw( lld_ ),
433 $ descw( rsrc_ ),
434 $ descw( csrc_ ), ibseed, 0,
435 $ mp, 0, nrhsq, myrow, mycol,
436 $ nprow, npcol )
437 ELSE
438 CALL pcmatgen( ictxt, 'No', 'No',
439 $ descw( m_ ), descw( n_ ),
440 $ descw( mb_ ), descw( nb_ ),
441 $ mem( ipw ), descw( lld_ ),
442 $ descw( rsrc_ ),
443 $ descw( csrc_ ), ibseed, 0,
444 $ np, 0, nrhsq, myrow, mycol,
445 $ nprow, npcol )
446 END IF
447*
448 IF( check ) THEN
449 CALL pcfillpad( ictxt, mnp, nrhsq,
450 $ mem( ipx-iprepad ),
451 $ descx( lld_ ), iprepad,
452 $ ipostpad, padval )
453 IF( tpsd ) THEN
454 CALL pcfillpad( ictxt, mp, nrhsq,
455 $ mem( ipw-iprepad ),
456 $ descw( lld_ ), iprepad,
457 $ ipostpad, padval )
458 ELSE
459 CALL pcfillpad( ictxt, np, nrhsq,
460 $ mem( ipw-iprepad ),
461 $ descw( lld_ ), iprepad,
462 $ ipostpad, padval )
463 END IF
464 END IF
465*
466 DO 10 jj = 1, nrhs
467 CALL pscnrm2( ncols, bnorm, mem( ipw ),
468 $ 1, jj, descw, 1 )
469 IF( bnorm.GT.rzero )
470 $ CALL pcsscal( ncols, rone / bnorm,
471 $ mem( ipw ), 1, jj, descw,
472 $ 1 )
473 10 CONTINUE
474*
475 CALL pcgemm( trans, 'N', nrows, nrhs, ncols,
476 $ one, mem( ipa ), 1, 1, desca,
477 $ mem( ipw ), 1, 1, descw, zero,
478 $ mem( ipx ), 1, 1, descx )
479*
480 IF( check ) THEN
481*
482* check for memory overwrite
483*
484 CALL pcchekpad( ictxt, 'Generation', mp,
485 $ nq, mem( ipa-iprepad ),
486 $ desca( lld_ ), iprepad,
487 $ ipostpad, padval )
488 CALL pcchekpad( ictxt, 'Generation', mnp,
489 $ nrhsq, mem( ipx-iprepad ),
490 $ descx( lld_ ), iprepad,
491 $ ipostpad, padval )
492 IF( tpsd ) THEN
493 CALL pcchekpad( ictxt, 'Generation',
494 $ mp, nrhsq,
495 $ mem( ipw-iprepad ),
496 $ descw( lld_ ), iprepad,
497 $ ipostpad, padval )
498 ELSE
499 CALL pcchekpad( ictxt, 'Generation',
500 $ np, nrhsq,
501 $ mem( ipw-iprepad ),
502 $ descw( lld_ ), iprepad,
503 $ ipostpad, padval )
504 END IF
505*
506* Allocate space for copy of rhs
507*
508 ipb = ipw
509*
510 IF( tpsd ) THEN
511 CALL descinit( descb, n, nrhs, nb,
512 $ nbrhs, 0, 0, ictxt,
513 $ max( 1, np ) + imidpad,
514 $ ierr( 1 ) )
515 ELSE
516 CALL descinit( descb, m, nrhs, nb,
517 $ nbrhs, 0, 0, ictxt,
518 $ max( 1, mp ) + imidpad,
519 $ ierr( 1 ) )
520 END IF
521*
522* Check all processes for an error
523*
524 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
525 $ ierr, 1, -1, 0 )
526*
527 IF( ierr( 1 ).LT.0 ) THEN
528 IF( iam.EQ.0 )
529 $ WRITE( nout, fmt = 9997 )
530 $ 'descriptor'
531 kskip = kskip + 1
532 GO TO 30
533 END IF
534*
535 ipw = ipb + descb( lld_ )*nrhsq +
536 $ ipostpad + iprepad
537*
538 END IF
539*
540* Calculate the amount of workspace for PCGELS
541*
542 IF( m.GE.n ) THEN
543 ltau = numroc( min(m,n), nb, mycol, 0,
544 $ npcol )
545 lwf = nb * ( mp + nq + nb )
546 lws = max( ( nb*( nb - 1 ) ) / 2,
547 $ ( mp + nrhsq ) * nb ) + nb*nb
548 ELSE
549 lcm = ilcm( nprow, npcol )
550 lcmp = lcm / nprow
551 ltau = numroc( min(m,n), nb, myrow, 0,
552 $ nprow )
553 lwf = nb * ( mp + nq + nb )
554 lws = max( ( nb*( nb - 1 ) ) / 2, ( np +
555 $ max( nq + numroc( numroc( n, nb, 0,
556 $ 0, nprow ), nb, 0, 0, lcmp ),
557 $ nrhsq ) ) * nb ) + nb*nb
558 END IF
559*
560 lwork = ltau + max( lwf, lws )
561 worksiz = lwork + ipostpad
562*
563* Check for adequate memory for problem size
564*
565 ierr( 1 ) = 0
566 IF( ipw+worksiz.GT.memsiz ) THEN
567 IF( iam.EQ.0 )
568 $ WRITE( nout, fmt = 9996 ) 'solve',
569 $ ( ipw+worksiz )*cplxsz
570 ierr( 1 ) = 1
571 END IF
572*
573* Check all processes for an error
574*
575 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
576 $ 1, -1, 0 )
577*
578 IF( ierr( 1 ).GT.0 ) THEN
579 IF( iam.EQ.0 )
580 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
581 kskip = kskip + 1
582 GO TO 30
583 END IF
584*
585 IF( check ) THEN
586*
587* Make the copy of the right hand side
588*
589 CALL pclacpy( 'All', nrows, nrhs,
590 $ mem( ipx ), 1, 1, descx,
591 $ mem( ipb ), 1, 1, descb )
592*
593 IF( tpsd ) THEN
594 CALL pcfillpad( ictxt, np, nrhsq,
595 $ mem( ipb-iprepad ),
596 $ descb( lld_ ), iprepad,
597 $ ipostpad, padval )
598 ELSE
599 CALL pcfillpad( ictxt, mp, nrhsq,
600 $ mem( ipb-iprepad ),
601 $ descb( lld_ ), iprepad,
602 $ ipostpad, padval )
603 END IF
604 CALL pcfillpad( ictxt, lwork, 1,
605 $ mem( ipw-iprepad ),
606 $ lwork, iprepad,
607 $ ipostpad, padval )
608 END IF
609*
610 CALL slboot( )
611 CALL blacs_barrier( ictxt, 'All' )
612 CALL sltimer( 1 )
613*
614* Solve the LS or overdetermined system
615*
616 CALL pcgels( trans, m, n, nrhs, mem( ipa ),
617 $ 1, 1, desca, mem( ipx ), 1, 1,
618 $ descx, mem( ipw ), lwork, info )
619*
620 CALL sltimer( 1 )
621*
622 IF( check ) THEN
623*
624* check for memory overwrite
625*
626 CALL pcchekpad( ictxt, 'PCGELS', mp,
627 $ nq, mem( ipa-iprepad ),
628 $ desca( lld_ ), iprepad,
629 $ ipostpad, padval )
630 CALL pcchekpad( ictxt, 'PCGELS', mnp,
631 $ nrhsq, mem( ipx-iprepad ),
632 $ descx( lld_ ), iprepad,
633 $ ipostpad, padval )
634 CALL pcchekpad( ictxt, 'PCGELS', lwork,
635 $ 1, mem( ipw-iprepad ),
636 $ lwork, iprepad,
637 $ ipostpad, padval )
638 END IF
639*
640* Regenerate A in place for testing and next
641* iteration
642*
643 CALL pcqrt13( iscale, m, n, mem( ipa ), 1, 1,
644 $ desca, anorm, iaseed,
645 $ mem( ipw ) )
646*
647* check the solution to rhs
648*
649 IF( check ) THEN
650*
651* Am I going to call PCQRT17 ?
652*
653 IF( ( m.GE.n .AND. ( .NOT.tpsd ) ) .OR.
654 $ ( m.LT.n .AND. tpsd ) ) THEN
655*
656* Call PCQRT17 first, A, X, and B remain
657* unchanged. Solving LS system
658*
659* Check amount of memory for PCQRT17
660*
661 IF( tpsd ) THEN
662 worksiz = np*nrhsq + nrhsp*mq
663 ipw2 = ipw + worksiz
664 worksiz = worksiz +
665 $ iceil( realsz*max( nq, max(
666 $ mq, nrhsq ) ), cplxsz ) +
667 $ ipostpad
668 ELSE
669 worksiz = mp*nrhsq + nrhsp*nq
670 ipw2 = ipw + worksiz
671 worksiz = worksiz +
672 $ iceil( realsz*max( nq,
673 $ nrhsq ), cplxsz ) +
674 $ ipostpad
675 END IF
676*
677* Check for adequate memory for problem
678* size
679*
680 ierr( 1 ) = 0
681 IF( ( ipw+worksiz ).GT.memsiz ) THEN
682 IF( iam.EQ.0 )
683 $ WRITE( nout, fmt = 9996 )
684 $ 'MEMORY', ( ipw+worksiz )*cplxsz
685 ierr( 1 ) = 1
686 END IF
687*
688* Check all processes for an error
689*
690 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
691 $ ierr, 1, -1, 0 )
692*
693 IF( ierr( 1 ).GT.0 ) THEN
694 IF( iam.EQ.0 )
695 $ WRITE( nout, fmt = 9997 )
696 $ 'MEMORY'
697 kskip = kskip + 1
698 GO TO 30
699 END IF
700*
701 CALL pcfillpad( ictxt,
702 $ worksiz-ipostpad, 1,
703 $ mem( ipw-iprepad ),
704 $ worksiz-ipostpad,
705 $ iprepad, ipostpad,
706 $ padval )
707*
708 result( 2 ) = pcqrt17( trans, 1, m, n,
709 $ nrhs,
710 $ mem( ipa ),
711 $ 1, 1, desca,
712 $ mem( ipx ), 1,
713 $ 1, descx,
714 $ mem( ipb ),
715 $ 1, 1, descb,
716 $ mem( ipw ),
717 $ mem( ipw2 ) )
718 sresid = result( 2 )
719*
720 CALL pcchekpad( ictxt, 'PCQRT17',
721 $ mp, nq,
722 $ mem( ipa-iprepad ),
723 $ desca( lld_ ),
724 $ iprepad, ipostpad,
725 $ padval )
726 CALL pcchekpad( ictxt, 'PCQRT17',
727 $ mnp, nrhsq,
728 $ mem( ipx-iprepad ),
729 $ descx( lld_ ), iprepad,
730 $ ipostpad, padval )
731 IF( tpsd ) THEN
732 CALL pcchekpad( ictxt, 'PCQRT17',
733 $ np, nrhsq,
734 $ mem( ipb-iprepad ),
735 $ descb( lld_ ),
736 $ iprepad, ipostpad,
737 $ padval )
738 ELSE
739 CALL pcchekpad( ictxt, 'PCQRT17',
740 $ mp, nrhsq,
741 $ mem( ipb-iprepad ),
742 $ descb( lld_ ),
743 $ iprepad, ipostpad,
744 $ padval )
745 END IF
746 CALL pcchekpad( ictxt, 'PCQRT17',
747 $ worksiz-ipostpad, 1,
748 $ mem( ipw-iprepad ),
749 $ worksiz-ipostpad,
750 $ iprepad, ipostpad,
751 $ padval )
752 END IF
753*
754* Call PCQRT16, B will be destroyed.
755*
756 IF( tpsd ) THEN
757 worksiz = mp + ipostpad
758 ELSE
759 worksiz = nq + ipostpad
760 END IF
761*
762* Check for adequate memory for problem size
763*
764 ierr( 1 ) = 0
765 IF( ( ipw+worksiz ).GT.memsiz ) THEN
766 IF( iam.EQ.0 )
767 $ WRITE( nout, fmt = 9996 ) 'MEMORY',
768 $ ( ipw+worksiz )*cplxsz
769 ierr( 1 ) = 1
770 END IF
771*
772* Check all processes for an error
773*
774 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
775 $ ierr, 1, -1, 0 )
776*
777 IF( ierr( 1 ).GT.0 ) THEN
778 IF( iam.EQ.0 )
779 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
780 kskip = kskip + 1
781 GO TO 30
782 END IF
783*
784 CALL pcfillpad( ictxt,
785 $ worksiz-ipostpad, 1,
786 $ mem( ipw-iprepad ),
787 $ worksiz-ipostpad,
788 $ iprepad, ipostpad,
789 $ padval )
790*
791 CALL pcqrt16( trans, m, n, nrhs,
792 $ mem( ipa ), 1, 1, desca,
793 $ mem( ipx ), 1, 1, descx,
794 $ mem( ipb ), 1, 1, descb,
795 $ mem( ipw ), result( 1 ) )
796*
797 CALL pcchekpad( ictxt, 'PCQRT16',
798 $ mp, nq,
799 $ mem( ipa-iprepad ),
800 $ desca( lld_ ),
801 $ iprepad, ipostpad,
802 $ padval )
803 CALL pcchekpad( ictxt, 'PCQRT16',
804 $ mnp, nrhsq,
805 $ mem( ipx-iprepad ),
806 $ descx( lld_ ), iprepad,
807 $ ipostpad, padval )
808 IF( tpsd ) THEN
809 CALL pcchekpad( ictxt, 'PCQRT16',
810 $ np, nrhsq,
811 $ mem( ipb-iprepad ),
812 $ descb( lld_ ),
813 $ iprepad, ipostpad,
814 $ padval )
815 ELSE
816 CALL pcchekpad( ictxt, 'PCQRT16',
817 $ mp, nrhsq,
818 $ mem( ipb-iprepad ),
819 $ descb( lld_ ),
820 $ iprepad, ipostpad,
821 $ padval )
822 END IF
823 CALL pcchekpad( ictxt, 'PCQRT16',
824 $ worksiz-ipostpad, 1,
825 $ mem( ipw-iprepad ),
826 $ worksiz-ipostpad,
827 $ iprepad, ipostpad,
828 $ padval )
829*
830* Call PCQRT14
831*
832 IF( ( m.GE.n .AND. tpsd ) .OR.
833 $ ( m.LT.n .AND. ( .NOT.tpsd ) ) ) THEN
834*
835 ipw = ipb
836*
837 IF( tpsd ) THEN
838*
839 nnrhsq = numroc( n+nrhs, nb, mycol,
840 $ 0, npcol )
841 ltau = numroc( min( m, n+nrhs ), nb,
842 $ mycol, 0, npcol )
843 lwf = nb * ( nb + mp + nnrhsq )
844 worksiz = mp * nnrhsq + ltau + lwf +
845 $ ipostpad
846*
847 ELSE
848*
849 mnrhsp = numroc( m+nrhs, nb, myrow,
850 $ 0, nprow )
851 ltau = numroc( min( m+nrhs, n ), nb,
852 $ myrow, 0, nprow )
853 lwf = nb * ( nb + mnrhsp + nq )
854 worksiz = mnrhsp * nq + ltau + lwf +
855 $ ipostpad
856*
857 END IF
858*
859* Check for adequate memory for problem
860* size
861*
862 ierr( 1 ) = 0
863 IF( ( ipw+worksiz ).GT.memsiz ) THEN
864 IF( iam.EQ.0 )
865 $ WRITE( nout, fmt = 9996 )
866 $ 'MEMORY', ( ipw+worksiz )*cplxsz
867 ierr( 1 ) = 1
868 END IF
869*
870* Check all processes for an error
871*
872 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
873 $ ierr, 1, -1, 0 )
874*
875 IF( ierr( 1 ).GT.0 ) THEN
876 IF( iam.EQ.0 )
877 $ WRITE( nout, fmt = 9997 )
878 $ 'MEMORY'
879 kskip = kskip + 1
880 GO TO 30
881 END IF
882*
883 CALL pcfillpad( ictxt,
884 $ worksiz-ipostpad, 1,
885 $ mem( ipw-iprepad ),
886 $ worksiz-ipostpad,
887 $ iprepad, ipostpad,
888 $ padval )
889*
890* Solve underdetermined system
891*
892 result( 2 ) = pcqrt14( trans, m, n,
893 $ nrhs,
894 $ mem( ipa ), 1,
895 $ 1, desca,
896 $ mem( ipx ),
897 $ 1, 1, descx,
898 $ mem( ipw ) )
899 sresid = result( 2 )
900*
901 CALL pcchekpad( ictxt, 'PCQRT14',
902 $ mp, nq,
903 $ mem( ipa-iprepad ),
904 $ desca( lld_ ),
905 $ iprepad, ipostpad,
906 $ padval )
907 CALL pcchekpad( ictxt, 'PCQRT14',
908 $ mnp, nrhsq,
909 $ mem( ipx-iprepad ),
910 $ descx( lld_ ), iprepad,
911 $ ipostpad, padval )
912 CALL pcchekpad( ictxt, 'PCQRT14',
913 $ worksiz-ipostpad, 1,
914 $ mem( ipw-iprepad ),
915 $ worksiz-ipostpad,
916 $ iprepad, ipostpad,
917 $ padval )
918 END IF
919*
920* Print information about the tests that
921* did not pass the threshold.
922*
923 passed = 'PASSED'
924 DO 20 ii = 1, 2
925 IF( ( result( ii ).GE.thresh ) .AND.
926 $ ( result( ii )-result( ii ).EQ.0.0e+0
927 $ ) ) THEN
928 IF( iam.EQ.0 )
929 $ WRITE( nout, fmt = 9986 )trans,
930 $ m, n, nrhs, nb, itype, ii,
931 $ result( ii )
932 kfail = kfail + 1
933 passed = 'FAILED'
934 ELSE
935 kpass = kpass + 1
936 END IF
937 20 CONTINUE
938*
939 ELSE
940*
941* By-pass the solve check
942*
943 kpass = kpass + 1
944 sresid = sresid - sresid
945 passed = 'BYPASS'
946*
947 END IF
948*
949* Gather maximum of all CPU and WALL clock
950* timings
951*
952 CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
953 $ wtime )
954 CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
955 $ ctime )
956*
957* Print results
958*
959 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
960 addfac = 2
961 mulfac = 6
962 IF( m.GE.n ) THEN
963*
964* NOPS = SOPLA( 'CGEQRF', M, N, 0, 0,
965* NB ) + SOPLA( 'CUNMQR', M, NRHS, N,
966* 0, NB )
967*
968 mults = n*( ( ( 23.d0 / 6.d0 )+m+n /
969 $ 2.d0 )+ n*( m-n / 3.d0 ) ) +
970 $ n*nrhs*( 2.d0*m+2.d0-n )
971 adds = n*( ( 5.d0 / 6.d0 )+n*
972 $ ( 1.d0 / 2.d0+( m-n / 3.d0 ) ) )
973 $ + n*nrhs*( 2.d0*m+1.d0-n )
974 ELSE
975*
976* NOPS = SOPLA( 'CGELQF', M, N, 0, 0,
977* NB ) + SOPLA( 'CUNMLQ', M,
978* NRHS, N, 0, NB )
979*
980 mults = m*( ( ( 29.d0 / 6.d0 )+2.d0*n-m
981 $ / 2.d0 )+m*( n-m / 3.d0 ) )
982 $ + n*nrhs*( 2.d0*m+2.d0-n )
983 adds = m*( ( 5.d0 / 6.d0 )+m / 2.d0+m*
984 $ ( n-m / 3.d0 ) )
985 $ + n*nrhs*( 2.d0*m+1.d0-n )
986 END IF
987 nops = addfac*adds + mulfac*mults
988*
989* Calculate total megaflops, for WALL and
990* CPU time, and print output
991*
992* Print WALL time if machine supports it
993*
994 IF( wtime( 1 ).GT.0.0d+0 ) THEN
995 tmflops = nops / ( wtime( 1 )*1.0d+6 )
996 ELSE
997 tmflops = 0.0d+0
998 END IF
999*
1000 IF( wtime( 1 ).GE.0.0d+0 )
1001 $ WRITE( nout, fmt = 9993 )
1002 $ 'WALL', trans, m, n, nb, nrhs,
1003 $ nbrhs, nprow, npcol, wtime( 1 ),
1004 $ tmflops, passed
1005*
1006* Print CPU time if machine supports it
1007*
1008 IF( ctime( 1 ).GT.0.0d+0 ) THEN
1009 tmflops = nops / ( ctime( 1 )*1.0d+6 )
1010 ELSE
1011 tmflops = 0.0d+0
1012 END IF
1013*
1014 IF( ctime( 1 ).GE.0.0d+0 )
1015 $ WRITE( nout, fmt = 9993 )
1016 $ 'CPU ', trans, m, n, nb, nrhs,
1017 $ nbrhs, nprow, npcol, ctime( 1 ),
1018 $ tmflops, passed
1019 END IF
1020 30 CONTINUE
1021 40 CONTINUE
1022 50 CONTINUE
1023 60 CONTINUE
1024 70 CONTINUE
1025 80 CONTINUE
1026 CALL blacs_gridexit( ictxt )
1027 90 CONTINUE
1028*
1029* Print out ending messages and close output file
1030*
1031 IF( iam.EQ.0 ) THEN
1032 ktests = kpass + kfail + kskip
1033 WRITE( nout, fmt = * )
1034 WRITE( nout, fmt = 9992 ) ktests
1035 IF( check ) THEN
1036 WRITE( nout, fmt = 9991 ) kpass
1037 WRITE( nout, fmt = 9989 ) kfail
1038 ELSE
1039 WRITE( nout, fmt = 9990 ) kpass
1040 END IF
1041 WRITE( nout, fmt = 9988 ) kskip
1042 WRITE( nout, fmt = * )
1043 WRITE( nout, fmt = * )
1044 WRITE( nout, fmt = 9987 )
1045 IF( nout.NE.6 .AND. nout.NE.0 )
1046 $ CLOSE ( nout )
1047 END IF
1048*
1049 CALL blacs_exit( 0 )
1050*
1051 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1052 $ '; It should be at least 1' )
1053 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1054 $ i4 )
1055 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1056 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1057 $ i11 )
1058 9995 FORMAT( 'Time TRANS M N NB NRHS NBRHS P Q ',
1059 $ 'LS Time MFLOPS CHECK' )
1060 9994 FORMAT( '---- ----- ------ ------ --- ----- ----- ----- ----- ',
1061 $ '--------- -------- ------' )
1062 9993 FORMAT( a4, 3x, a1, 3x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
1063 $ i5, 1x, i5, 1x, f9.2, 1x, f8.2, 1x, a6 )
1064 9992 FORMAT( 'Finished', i6, ' tests, with the following results:' )
1065 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1066 9990 FORMAT( i5, ' tests completed without checking.' )
1067 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1068 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1069 9987 FORMAT( 'END OF TESTS.' )
1070 9986 FORMAT( ' TRANS=''', a1, ''', M=', i5, ', N=', i5, ', NRHS=', i4,
1071 $ ', NB=', i4, ', type', i2, ', test(', i2, ')=', g12.5 )
1072*
1073 stop
1074*
1075* End of PCLSDRIVER
1076*
1077 END
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pcmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pcchekpad.f:3
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pcfillpad.f:2
subroutine pcgels(trans, m, n, nrhs, a, ia, ja, desca, b, ib, jb, descb, work, lwork, info)
Definition pcgels.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pclacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pclacpy.f:3
real function pclange(norm, m, n, a, ia, ja, desca, work)
Definition pclange.f:3
program pclsdriver
Definition pclsdriver.f:1
subroutine pclsinfo(summry, nout, nmat, mval, ldmval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pclsinfo.f:6
subroutine pcqrt13(scale, m, n, a, ia, ja, desca, norma, iseed, work)
Definition pcqrt13.f:3
real function pcqrt14(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, work)
Definition pcqrt14.f:3
subroutine pcqrt16(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, rwork, resid)
Definition pcqrt16.f:3
real function pcqrt17(trans, iresid, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, work, rwork)
Definition pcqrt17.f:5
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267
logical function lsame(ca, cb)
Definition tools.f:1724