SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pslsdriver.f
Go to the documentation of this file.
1 PROGRAM pslsdriver
2*
3* -- ScaLAPACK routine (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 28, 2001
7*
8* Purpose
9* =======
10*
11* PSLSDRIVER is the main test program for the REAL
12* SCALAPACK (full rank) Least Squares routines. This test driver solves
13* full-rank least square problems.
14*
15* The program must be driven by a short data file. An annotated
16* example of a data file can be obtained by deleting the first 3
17* characters from the following 17 lines:
18* 'ScaLapack LS solve input file'
19* 'Intel iPSC/860 hypercube, gamma model.'
20* 'LS.out' output file name (if any)
21* 6 device out
22* 4 number of problems sizes
23* 55 17 31 201 values of M
24* 5 71 31 201 values of N
25* 3 number of NB's
26* 2 3 5 values of NB
27* 3 number of NRHS's
28* 2 3 5 values of NRHS
29* 2 number of NBRHS's
30* 1 2 values of NBRHS
31* 7 number of process grids (ordered P & Q)
32* 1 2 1 4 2 3 8 values of P
33* 7 2 4 1 3 2 1 values of Q
34* 3.0 threshold
35*
36* Internal Parameters
37* ===================
38*
39* TOTMEM INTEGER, default = 6200000.
40* TOTMEM is a machine-specific parameter indicating the
41* maximum amount of available memory in bytes.
42* The user should customize TOTMEM to his platform. Remember
43* to leave room in memory for the operating system, the BLACS
44* buffer, etc. For example, on a system with 8 MB of memory
45* per process (e.g., one processor on an Intel iPSC/860), the
46* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
47* code, BLACS buffer, etc). However, for PVM, we usually set
48* TOTMEM = 2000000. Some experimenting with the maximum value
49* of TOTMEM may be required.
50* INTGSZ INTEGER, default = 4 bytes.
51* REALSZ INTEGER, default = 4 bytes.
52* INTGSZ and REALSZ indicate the length in bytes on the
53* given platform for an integer and a single precision real.
54* MEM REAL array, dimension ( TOTMEM / REALSZ )
55* All arrays used by SCALAPACK routines are allocated from
56* this array and referenced by pointers. The integer IPA,
57* for example, is a pointer to the starting element of MEM for
58* the matrix A.
59*
60* =====================================================================
61*
62* .. Parameters ..
63 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
64 $ lld_, mb_, m_, nb_, n_, rsrc_
65 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
66 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
67 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
68 INTEGER memsiz, ntests, realsz, totmem
69 REAL padval
70 REAL one, zero
71 parameter( realsz = 4, totmem = 2000000,
72 $ memsiz = totmem / realsz, ntests = 20,
73 $ padval = -9923.0e+0 )
74 parameter( one = 1.0e+0, zero = 0.0e+0 )
75* ..
76* .. Local Scalars ..
77 LOGICAL check, tpsd
78 CHARACTER trans
79 CHARACTER*6 passed
80 CHARACTER*80 outfile
81 INTEGER hh, i, iam, iaseed, ibseed, ictxt, ii, imidpad,
82 $ info, ipa, ipb, ipostpad, iprepad, ipw, ipw2,
83 $ ipx, iscale, itran, itype, j, jj, k, kfail, kk,
84 $ kpass, kskip, ktests, lcm, lcmp, ltau, lwf,
85 $ lwork, lws, m, mnp, mnrhsp, mp, mq, mycol,
86 $ myrow, n, nb, nbrhs, ncols, ngrids, nmat, nnb,
87 $ nnbr, nnr, nnrhsq, nout, np, npcol, nprocs,
88 $ nprow, nrows, nq, nrhs, nrhsp, nrhsq, worksiz
89 REAL anorm, bnorm, sresid, thresh
90 DOUBLE PRECISION addfac, adds, mulfac, mults, nops, tmflops
91* ..
92* .. Local Arrays ..
93 INTEGER desca( dlen_ ), descb( dlen_ ), descw( lld_ ),
94 $ descx( dlen_ ), ierr( 2 ), mval( ntests ),
95 $ nbrval( ntests ), nbval( ntests ),
96 $ nrval( ntests ), nval( ntests ),
97 $ pval( ntests ), qval( ntests )
98 REAL mem( memsiz ), result( 2 )
99 DOUBLE PRECISION ctime( 1 ), wtime( 1 )
100* ..
101* .. External Subroutines ..
102 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
103 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
104 $ blacs_pinfo, descinit, igsum2d, pschekpad,
105 $ psfillpad, psgels, psgemm, pslacpy,
106 $ pslsinfo, psmatgen, psnrm2, psscal,
108* ..
109* .. External Functions ..
110 LOGICAL lsame
111 INTEGER iceil, ilcm, numroc
112 REAL pslange, psqrt14, psqrt17
113 EXTERNAL iceil, ilcm, lsame, numroc, pslange,
115* ..
116* .. Intrinsic Functions ..
117 INTRINSIC max, min
118* ..
119* .. Data Statements ..
120 DATA ktests, kpass, kfail, kskip / 4*0 /
121* ..
122* .. Executable Statements ..
123*
124* Get starting information
125*
126 CALL blacs_pinfo( iam, nprocs )
127*
128 iaseed = 100
129 ibseed = 200
130 CALL pslsinfo( outfile, nout, nmat, mval, ntests, nval,
131 $ ntests, nnb, nbval, ntests, nnr, nrval, ntests,
132 $ nnbr, nbrval, ntests, ngrids, pval, ntests, qval,
133 $ ntests, thresh, mem, iam, nprocs )
134 check = ( thresh.GE.0.0e+0 )
135*
136* Print headings
137*
138 IF( iam.EQ.0 ) THEN
139 WRITE( nout, fmt = * )
140 WRITE( nout, fmt = 9995 )
141 WRITE( nout, fmt = 9994 )
142 WRITE( nout, fmt = * )
143 END IF
144*
145* Loop over different process grids
146*
147 DO 90 i = 1, ngrids
148*
149 nprow = pval( i )
150 npcol = qval( i )
151*
152* Make sure grid information is correct
153*
154 ierr( 1 ) = 0
155 IF( nprow.LT.1 ) THEN
156 IF( iam.EQ.0 )
157 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
158 ierr( 1 ) = 1
159 ELSE IF( npcol.LT.1 ) THEN
160 IF( iam.EQ.0 )
161 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
162 ierr( 1 ) = 1
163 ELSE IF( nprow*npcol.GT.nprocs ) THEN
164 IF( iam.EQ.0 )
165 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
166 ierr( 1 ) = 1
167 END IF
168*
169 IF( ierr( 1 ).GT.0 ) THEN
170 IF( iam.EQ.0 )
171 $ WRITE( nout, fmt = 9997 ) 'grid'
172 kskip = kskip + 1
173 GO TO 90
174 END IF
175*
176* Define process grid
177*
178 CALL blacs_get( -1, 0, ictxt )
179 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
180 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
181*
182* Go to bottom of loop if this case doesn't use my process
183*
184 IF( ( myrow.GE.nprow ).OR.( mycol.GE.npcol ) )
185 $ GO TO 90
186*
187 DO 80 j = 1, nmat
188*
189 m = mval( j )
190 n = nval( j )
191*
192* Make sure matrix information is correct
193*
194 ierr( 1 ) = 0
195 IF( m.LT.1 ) THEN
196 IF( iam.EQ.0 )
197 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
198 ierr( 1 ) = 1
199 ELSE IF( n.LT.1 ) THEN
200 IF( iam.EQ.0 )
201 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
202 ierr( 1 ) = 1
203 END IF
204*
205* Make sure no one had error
206*
207 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
208*
209 IF( ierr( 1 ).GT.0 ) THEN
210 IF( iam.EQ.0 )
211 $ WRITE( nout, fmt = 9997 ) 'matrix'
212 kskip = kskip + 1
213 GO TO 80
214 END IF
215*
216* Loop over different blocking sizes
217*
218 DO 70 k = 1, nnb
219*
220 nb = nbval( k )
221*
222* Make sure nb is legal
223*
224 ierr( 1 ) = 0
225 IF( nb.LT.1 ) THEN
226 ierr( 1 ) = 1
227 IF( iam.EQ.0 )
228 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
229 END IF
230*
231* Check all processes for an error
232*
233 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
234*
235 IF( ierr( 1 ).GT.0 ) THEN
236 IF( iam.EQ.0 )
237 $ WRITE( nout, fmt = 9997 ) 'NB'
238 kskip = kskip + 1
239 GO TO 70
240 END IF
241*
242* Padding constants
243*
244 mp = numroc( m, nb, myrow, 0, nprow )
245 mq = numroc( m, nb, mycol, 0, npcol )
246 np = numroc( n, nb, myrow, 0, nprow )
247 mnp = max( mp, np )
248 nq = numroc( n, nb, mycol, 0, npcol )
249*
250 IF( check ) THEN
251 iprepad = max( nb, mp )
252 imidpad = nb
253 ipostpad = max( nb, nq )
254 ELSE
255 iprepad = 0
256 imidpad = 0
257 ipostpad = 0
258 END IF
259*
260* Initialize the array descriptor for the matrix A
261*
262 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
263 $ max( 1, mp ) + imidpad, ierr( 1 ) )
264*
265* Check all processes for an error
266*
267 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
268*
269 IF( ierr( 1 ).LT.0 ) THEN
270 IF( iam.EQ.0 )
271 $ WRITE( nout, fmt = 9997 ) 'descriptor'
272 kskip = kskip + 1
273 GO TO 70
274 END IF
275*
276 DO 60 iscale = 1, 3
277*
278 itype = iscale
279*
280* Assign pointers into MEM for SCALAPACK arrays, A is
281* allocated starting at position MEM( IPREPAD+1 )
282*
283 ipa = iprepad + 1
284 ipx = ipa + desca( lld_ )*nq + ipostpad + iprepad
285 ipw = ipx
286*
287 worksiz = nq + ipostpad
288*
289* Check for adequate memory for problem size
290*
291 ierr( 1 ) = 0
292 IF( ( ipw+worksiz ).GT.memsiz ) THEN
293 IF( iam.EQ.0 )
294 $ WRITE( nout, fmt = 9996 ) 'MEMORY',
295 $ ( ipx+worksiz )*realsz
296 ierr( 1 ) = 1
297 END IF
298*
299* Check all processes for an error
300*
301 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
302 $ 0 )
303*
304 IF( ierr( 1 ).GT.0 ) THEN
305 IF( iam.EQ.0 )
306 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
307 kskip = kskip + 1
308 GO TO 70
309 END IF
310*
311 IF( check ) THEN
312 CALL psfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
313 $ desca( lld_ ), iprepad, ipostpad,
314 $ padval )
315 CALL psfillpad( ictxt, worksiz-ipostpad, 1,
316 $ mem( ipw-iprepad ),
317 $ worksiz-ipostpad, iprepad,
318 $ ipostpad, padval )
319 END IF
320*
321* Generate the matrix A and calculate its 1-norm
322*
323 CALL psqrt13( iscale, m, n, mem( ipa ), 1, 1,
324 $ desca, anorm, iaseed, mem( ipw ) )
325*
326 IF( check ) THEN
327 CALL pschekpad( ictxt, 'PSQRT13', mp, nq,
328 $ mem( ipa-iprepad ), desca( lld_ ),
329 $ iprepad, ipostpad, padval )
330 CALL pschekpad( ictxt, 'PSQRT13',
331 $ worksiz-ipostpad, 1,
332 $ mem( ipw-iprepad ),
333 $ worksiz-ipostpad, iprepad,
334 $ ipostpad, padval )
335 END IF
336*
337 DO 50 itran = 1, 2
338*
339 IF( itran.EQ.1 ) THEN
340 nrows = m
341 ncols = n
342 trans = 'N'
343 tpsd = .false.
344 ELSE
345 nrows = n
346 ncols = m
347 trans = 'T'
348 tpsd = .true.
349 END IF
350*
351* Loop over the different values for NRHS
352*
353 DO 40 hh = 1, nnr
354*
355 nrhs = nrval( hh )
356*
357 DO 30 kk = 1, nnbr
358*
359 nbrhs = nbrval( kk )
360*
361 nrhsp = numroc( nrhs, nbrhs, myrow, 0,
362 $ nprow )
363 nrhsq = numroc( nrhs, nbrhs, mycol, 0,
364 $ npcol )
365*
366* Define Array descriptor for rhs MAX(M,N)xNRHS
367*
368 CALL descinit( descx, max( m, n ), nrhs, nb,
369 $ nbrhs, 0, 0, ictxt,
370 $ max( 1, mnp ) + imidpad,
371 $ ierr( 1 ) )
372 IF( tpsd ) THEN
373 CALL descinit( descw, m, nrhs, nb, nbrhs,
374 $ 0, 0, ictxt, max( 1, mp ) +
375 $ imidpad, ierr( 2 ) )
376 ELSE
377 CALL descinit( descw, n, nrhs, nb, nbrhs,
378 $ 0, 0, ictxt, max( 1, np ) +
379 $ imidpad, ierr( 2 ) )
380 END IF
381*
382* Check all processes for an error
383*
384 CALL igsum2d( ictxt, 'All', ' ', 2, 1, ierr,
385 $ 2, -1, 0 )
386*
387 IF( ierr( 1 ).LT.0 .OR. ierr( 2 ).LT.0 ) THEN
388 IF( iam.EQ.0 )
389 $ WRITE( nout, fmt = 9997 ) 'descriptor'
390 kskip = kskip + 1
391 GO TO 30
392 END IF
393*
394* Check for enough memory
395*
396 ipx = ipa + desca( lld_ )*nq + ipostpad +
397 $ iprepad
398 ipw = ipx + descx( lld_ )*nrhsq + ipostpad +
399 $ iprepad
400 worksiz = descw( lld_ )*nrhsq + ipostpad
401*
402 ierr( 1 ) = 0
403 IF( ipw+worksiz.GT.memsiz ) THEN
404 IF( iam.EQ.0 )
405 $ WRITE( nout, fmt = 9996 ) 'Generation',
406 $ ( ipw+worksiz )*realsz
407 ierr( 1 ) = 1
408 END IF
409*
410* Check all processes for an error
411*
412 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
413 $ 1, -1, 0 )
414*
415 IF( ierr( 1 ).GT.0 ) THEN
416 IF( iam.EQ.0 )
417 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
418 kskip = kskip + 1
419 GO TO 30
420 END IF
421*
422* Generate RHS
423*
424 IF( tpsd ) THEN
425 CALL psmatgen( ictxt, 'No', 'No',
426 $ descw( m_ ), descw( n_ ),
427 $ descw( mb_ ), descw( nb_ ),
428 $ mem( ipw ), descw( lld_ ),
429 $ descw( rsrc_ ),
430 $ descw( csrc_ ), ibseed, 0,
431 $ mp, 0, nrhsq, myrow, mycol,
432 $ nprow, npcol )
433 ELSE
434 CALL psmatgen( ictxt, 'No', 'No',
435 $ descw( m_ ), descw( n_ ),
436 $ descw( mb_ ), descw( nb_ ),
437 $ mem( ipw ), descw( lld_ ),
438 $ descw( rsrc_ ),
439 $ descw( csrc_ ), ibseed, 0,
440 $ np, 0, nrhsq, myrow, mycol,
441 $ nprow, npcol )
442 END IF
443*
444 IF( check ) THEN
445 CALL psfillpad( ictxt, mnp, nrhsq,
446 $ mem( ipx-iprepad ),
447 $ descx( lld_ ), iprepad,
448 $ ipostpad, padval )
449 IF( tpsd ) THEN
450 CALL psfillpad( ictxt, mp, nrhsq,
451 $ mem( ipw-iprepad ),
452 $ descw( lld_ ), iprepad,
453 $ ipostpad, padval )
454 ELSE
455 CALL psfillpad( ictxt, np, nrhsq,
456 $ mem( ipw-iprepad ),
457 $ descw( lld_ ), iprepad,
458 $ ipostpad, padval )
459 END IF
460 END IF
461*
462 DO 10 jj = 1, nrhs
463 CALL psnrm2( ncols, bnorm, mem( ipw ), 1,
464 $ jj, descw, 1 )
465 IF( bnorm.GT.zero )
466 $ CALL psscal( ncols, one / bnorm,
467 $ mem( ipw ), 1, jj, descw,
468 $ 1 )
469 10 CONTINUE
470*
471 CALL psgemm( trans, 'N', nrows, nrhs, ncols,
472 $ one, mem( ipa ), 1, 1, desca,
473 $ mem( ipw ), 1, 1, descw, zero,
474 $ mem( ipx ), 1, 1, descx )
475*
476 IF( check ) THEN
477*
478* check for memory overwrite
479*
480 CALL pschekpad( ictxt, 'Generation', mp,
481 $ nq, mem( ipa-iprepad ),
482 $ desca( lld_ ), iprepad,
483 $ ipostpad, padval )
484 CALL pschekpad( ictxt, 'Generation', mnp,
485 $ nrhsq, mem( ipx-iprepad ),
486 $ descx( lld_ ), iprepad,
487 $ ipostpad, padval )
488 IF( tpsd ) THEN
489 CALL pschekpad( ictxt, 'Generation',
490 $ mp, nrhsq,
491 $ mem( ipw-iprepad ),
492 $ descw( lld_ ), iprepad,
493 $ ipostpad, padval )
494 ELSE
495 CALL pschekpad( ictxt, 'Generation',
496 $ np, nrhsq,
497 $ mem( ipw-iprepad ),
498 $ descw( lld_ ), iprepad,
499 $ ipostpad, padval )
500 END IF
501*
502* Allocate space for copy of rhs
503*
504 ipb = ipw
505*
506 IF( tpsd ) THEN
507 CALL descinit( descb, n, nrhs, nb,
508 $ nbrhs, 0, 0, ictxt,
509 $ max( 1, np ) + imidpad,
510 $ ierr( 1 ) )
511 ELSE
512 CALL descinit( descb, m, nrhs, nb,
513 $ nbrhs, 0, 0, ictxt,
514 $ max( 1, mp ) + imidpad,
515 $ ierr( 1 ) )
516 END IF
517*
518* Check all processes for an error
519*
520 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
521 $ ierr, 1, -1, 0 )
522*
523 IF( ierr( 1 ).LT.0 ) THEN
524 IF( iam.EQ.0 )
525 $ WRITE( nout, fmt = 9997 )
526 $ 'descriptor'
527 kskip = kskip + 1
528 GO TO 30
529 END IF
530*
531 ipw = ipb + descb( lld_ )*nrhsq +
532 $ ipostpad + iprepad
533*
534 END IF
535*
536* Calculate the amount of workspace for PSGELS
537*
538 IF( m.GE.n ) THEN
539 ltau = numroc( min(m,n), nb, mycol, 0,
540 $ npcol )
541 lwf = nb * ( mp + nq + nb )
542 lws = max( ( nb*( nb - 1 ) ) / 2,
543 $ ( mp + nrhsq ) * nb ) + nb*nb
544 ELSE
545 lcm = ilcm( nprow, npcol )
546 lcmp = lcm / nprow
547 ltau = numroc( min(m,n), nb, myrow, 0,
548 $ nprow )
549 lwf = nb * ( mp + nq + nb )
550 lws = max( ( nb*( nb - 1 ) ) / 2, ( np +
551 $ max( nq + numroc( numroc( n, nb, 0,
552 $ 0, nprow ), nb, 0, 0, lcmp ),
553 $ nrhsq ) ) * nb ) + nb*nb
554 END IF
555*
556 lwork = ltau + max( lwf, lws )
557 worksiz = lwork + ipostpad
558*
559* Check for adequate memory for problem size
560*
561 ierr( 1 ) = 0
562 IF( ipw+worksiz.GT.memsiz ) THEN
563 IF( iam.EQ.0 )
564 $ WRITE( nout, fmt = 9996 ) 'solve',
565 $ ( ipw+worksiz )*realsz
566 ierr( 1 ) = 1
567 END IF
568*
569* Check all processes for an error
570*
571 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
572 $ 1, -1, 0 )
573*
574 IF( ierr( 1 ).GT.0 ) THEN
575 IF( iam.EQ.0 )
576 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
577 kskip = kskip + 1
578 GO TO 30
579 END IF
580*
581 IF( check ) THEN
582*
583* Make the copy of the right hand side
584*
585 CALL pslacpy( 'All', nrows, nrhs,
586 $ mem( ipx ), 1, 1, descx,
587 $ mem( ipb ), 1, 1, descb )
588*
589 IF( tpsd ) THEN
590 CALL psfillpad( ictxt, np, nrhsq,
591 $ mem( ipb-iprepad ),
592 $ descb( lld_ ), iprepad,
593 $ ipostpad, padval )
594 ELSE
595 CALL psfillpad( ictxt, mp, nrhsq,
596 $ mem( ipb-iprepad ),
597 $ descb( lld_ ), iprepad,
598 $ ipostpad, padval )
599 END IF
600 CALL psfillpad( ictxt, lwork, 1,
601 $ mem( ipw-iprepad ),
602 $ lwork, iprepad,
603 $ ipostpad, padval )
604 END IF
605*
606 CALL slboot( )
607 CALL blacs_barrier( ictxt, 'All' )
608 CALL sltimer( 1 )
609*
610* Solve the LS or overdetermined system
611*
612 CALL psgels( trans, m, n, nrhs, mem( ipa ),
613 $ 1, 1, desca, mem( ipx ), 1, 1,
614 $ descx, mem( ipw ), lwork, info )
615*
616 CALL sltimer( 1 )
617*
618 IF( check ) THEN
619*
620* check for memory overwrite
621*
622 CALL pschekpad( ictxt, 'PSGELS', mp,
623 $ nq, mem( ipa-iprepad ),
624 $ desca( lld_ ), iprepad,
625 $ ipostpad, padval )
626 CALL pschekpad( ictxt, 'PSGELS', mnp,
627 $ nrhsq, mem( ipx-iprepad ),
628 $ descx( lld_ ), iprepad,
629 $ ipostpad, padval )
630 CALL pschekpad( ictxt, 'PSGELS', lwork,
631 $ 1, mem( ipw-iprepad ),
632 $ lwork, iprepad,
633 $ ipostpad, padval )
634 END IF
635*
636* Regenerate A in place for testing and next
637* iteration
638*
639 CALL psqrt13( iscale, m, n, mem( ipa ), 1, 1,
640 $ desca, anorm, iaseed,
641 $ mem( ipw ) )
642*
643* check the solution to rhs
644*
645 IF( check ) THEN
646*
647* Am I going to call PSQRT17 ?
648*
649 IF( ( m.GE.n .AND. ( .NOT.tpsd ) ) .OR.
650 $ ( m.LT.n .AND. tpsd ) ) THEN
651*
652* Call PSQRT17 first, A, X, and B remain
653* unchanged. Solving LS system
654*
655* Check amount of memory for PSQRT17
656*
657 IF( tpsd ) THEN
658 worksiz = np*nrhsq + nrhsp*mq
659 ipw2 = ipw + worksiz
660 worksiz = worksiz +
661 $ max( nq, max( mq, nrhsq ) ) +
662 $ ipostpad
663 ELSE
664 worksiz = mp*nrhsq + nrhsp*nq
665 ipw2 = ipw + worksiz
666 worksiz = worksiz +
667 $ max( nq, nrhsq ) +
668 $ ipostpad
669 END IF
670*
671* Check for adequate memory for problem
672* size
673*
674 ierr( 1 ) = 0
675 IF( ( ipw+worksiz ).GT.memsiz ) THEN
676 IF( iam.EQ.0 )
677 $ WRITE( nout, fmt = 9996 )
678 $ 'MEMORY', ( ipw+worksiz )*realsz
679 ierr( 1 ) = 1
680 END IF
681*
682* Check all processes for an error
683*
684 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
685 $ ierr, 1, -1, 0 )
686*
687 IF( ierr( 1 ).GT.0 ) THEN
688 IF( iam.EQ.0 )
689 $ WRITE( nout, fmt = 9997 )
690 $ 'MEMORY'
691 kskip = kskip + 1
692 GO TO 30
693 END IF
694*
695 CALL psfillpad( ictxt,
696 $ worksiz-ipostpad, 1,
697 $ mem( ipw-iprepad ),
698 $ worksiz-ipostpad,
699 $ iprepad, ipostpad,
700 $ padval )
701*
702 result( 2 ) = psqrt17( trans, 1, m, n,
703 $ nrhs,
704 $ mem( ipa ),
705 $ 1, 1, desca,
706 $ mem( ipx ), 1,
707 $ 1, descx,
708 $ mem( ipb ),
709 $ 1, 1, descb,
710 $ mem( ipw ),
711 $ mem( ipw2 ) )
712 sresid = result( 2 )
713*
714 CALL pschekpad( ictxt, 'PSQRT17',
715 $ mp, nq,
716 $ mem( ipa-iprepad ),
717 $ desca( lld_ ),
718 $ iprepad, ipostpad,
719 $ padval )
720 CALL pschekpad( ictxt, 'PSQRT17',
721 $ mnp, nrhsq,
722 $ mem( ipx-iprepad ),
723 $ descx( lld_ ), iprepad,
724 $ ipostpad, padval )
725 IF( tpsd ) THEN
726 CALL pschekpad( ictxt, 'PSQRT17',
727 $ np, nrhsq,
728 $ mem( ipb-iprepad ),
729 $ descb( lld_ ),
730 $ iprepad, ipostpad,
731 $ padval )
732 ELSE
733 CALL pschekpad( ictxt, 'PSQRT17',
734 $ mp, nrhsq,
735 $ mem( ipb-iprepad ),
736 $ descb( lld_ ),
737 $ iprepad, ipostpad,
738 $ padval )
739 END IF
740 CALL pschekpad( ictxt, 'PSQRT17',
741 $ worksiz-ipostpad, 1,
742 $ mem( ipw-iprepad ),
743 $ worksiz-ipostpad,
744 $ iprepad, ipostpad,
745 $ padval )
746 END IF
747*
748* Call PSQRT16, B will be destroyed.
749*
750 IF( tpsd ) THEN
751 worksiz = mp + ipostpad
752 ELSE
753 worksiz = nq + ipostpad
754 END IF
755*
756* Check for adequate memory for problem size
757*
758 ierr( 1 ) = 0
759 IF( ( ipw+worksiz ).GT.memsiz ) THEN
760 IF( iam.EQ.0 )
761 $ WRITE( nout, fmt = 9996 ) 'MEMORY',
762 $ ( ipw+worksiz )*realsz
763 ierr( 1 ) = 1
764 END IF
765*
766* Check all processes for an error
767*
768 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
769 $ ierr, 1, -1, 0 )
770*
771 IF( ierr( 1 ).GT.0 ) THEN
772 IF( iam.EQ.0 )
773 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
774 kskip = kskip + 1
775 GO TO 30
776 END IF
777*
778 CALL psfillpad( ictxt,
779 $ worksiz-ipostpad, 1,
780 $ mem( ipw-iprepad ),
781 $ worksiz-ipostpad,
782 $ iprepad, ipostpad,
783 $ padval )
784*
785 CALL psqrt16( trans, m, n, nrhs,
786 $ mem( ipa ), 1, 1, desca,
787 $ mem( ipx ), 1, 1, descx,
788 $ mem( ipb ), 1, 1, descb,
789 $ mem( ipw ), result( 1 ) )
790*
791 CALL pschekpad( ictxt, 'PSQRT16',
792 $ mp, nq,
793 $ mem( ipa-iprepad ),
794 $ desca( lld_ ),
795 $ iprepad, ipostpad,
796 $ padval )
797 CALL pschekpad( ictxt, 'PSQRT16',
798 $ mnp, nrhsq,
799 $ mem( ipx-iprepad ),
800 $ descx( lld_ ), iprepad,
801 $ ipostpad, padval )
802 IF( tpsd ) THEN
803 CALL pschekpad( ictxt, 'PSQRT16',
804 $ np, nrhsq,
805 $ mem( ipb-iprepad ),
806 $ descb( lld_ ),
807 $ iprepad, ipostpad,
808 $ padval )
809 ELSE
810 CALL pschekpad( ictxt, 'PSQRT16',
811 $ mp, nrhsq,
812 $ mem( ipb-iprepad ),
813 $ descb( lld_ ),
814 $ iprepad, ipostpad,
815 $ padval )
816 END IF
817 CALL pschekpad( ictxt, 'PSQRT16',
818 $ worksiz-ipostpad, 1,
819 $ mem( ipw-iprepad ),
820 $ worksiz-ipostpad,
821 $ iprepad, ipostpad,
822 $ padval )
823*
824* Call PSQRT14
825*
826 IF( ( m.GE.n .AND. tpsd ) .OR.
827 $ ( m.LT.n .AND. ( .NOT.tpsd ) ) ) THEN
828*
829 ipw = ipb
830*
831 IF( tpsd ) THEN
832*
833 nnrhsq = numroc( n+nrhs, nb, mycol,
834 $ 0, npcol )
835 ltau = numroc( min( m, n+nrhs ), nb,
836 $ mycol, 0, npcol )
837 lwf = nb * ( nb + mp + nnrhsq )
838 worksiz = mp * nnrhsq + ltau + lwf +
839 $ ipostpad
840*
841 ELSE
842*
843 mnrhsp = numroc( m+nrhs, nb, myrow,
844 $ 0, nprow )
845 ltau = numroc( min( m+nrhs, n ), nb,
846 $ myrow, 0, nprow )
847 lwf = nb * ( nb + mnrhsp + nq )
848 worksiz = mnrhsp * nq + ltau + lwf +
849 $ ipostpad
850*
851 END IF
852*
853* Check for adequate memory for problem
854* size
855*
856 ierr( 1 ) = 0
857 IF( ( ipw+worksiz ).GT.memsiz ) THEN
858 IF( iam.EQ.0 )
859 $ WRITE( nout, fmt = 9996 )
860 $ 'MEMORY', ( ipw+worksiz )*realsz
861 ierr( 1 ) = 1
862 END IF
863*
864* Check all processes for an error
865*
866 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
867 $ ierr, 1, -1, 0 )
868*
869 IF( ierr( 1 ).GT.0 ) THEN
870 IF( iam.EQ.0 )
871 $ WRITE( nout, fmt = 9997 )
872 $ 'MEMORY'
873 kskip = kskip + 1
874 GO TO 30
875 END IF
876*
877 CALL psfillpad( ictxt,
878 $ worksiz-ipostpad, 1,
879 $ mem( ipw-iprepad ),
880 $ worksiz-ipostpad,
881 $ iprepad, ipostpad,
882 $ padval )
883*
884* Solve underdetermined system
885*
886 result( 2 ) = psqrt14( trans, m, n,
887 $ nrhs,
888 $ mem( ipa ), 1,
889 $ 1, desca,
890 $ mem( ipx ),
891 $ 1, 1, descx,
892 $ mem( ipw ) )
893 sresid = result( 2 )
894*
895 CALL pschekpad( ictxt, 'PSQRT14',
896 $ mp, nq,
897 $ mem( ipa-iprepad ),
898 $ desca( lld_ ),
899 $ iprepad, ipostpad,
900 $ padval )
901 CALL pschekpad( ictxt, 'PSQRT14',
902 $ mnp, nrhsq,
903 $ mem( ipx-iprepad ),
904 $ descx( lld_ ), iprepad,
905 $ ipostpad, padval )
906 CALL pschekpad( ictxt, 'PSQRT14',
907 $ worksiz-ipostpad, 1,
908 $ mem( ipw-iprepad ),
909 $ worksiz-ipostpad,
910 $ iprepad, ipostpad,
911 $ padval )
912 END IF
913*
914* Print information about the tests that
915* did not pass the threshold.
916*
917 passed = 'PASSED'
918 DO 20 ii = 1, 2
919 IF( ( result( ii ).GE.thresh ) .AND.
920 $ ( result( ii )-result( ii ).EQ.0.0e+0
921 $ ) ) THEN
922 IF( iam.EQ.0 )
923 $ WRITE( nout, fmt = 9986 )trans,
924 $ m, n, nrhs, nb, itype, ii,
925 $ result( ii )
926 kfail = kfail + 1
927 passed = 'FAILED'
928 ELSE
929 kpass = kpass + 1
930 END IF
931 20 CONTINUE
932*
933 ELSE
934*
935* By-pass the solve check
936*
937 kpass = kpass + 1
938 sresid = sresid - sresid
939 passed = 'BYPASS'
940*
941 END IF
942*
943* Gather maximum of all CPU and WALL clock
944* timings
945*
946 CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
947 $ wtime )
948 CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
949 $ ctime )
950*
951* Print results
952*
953 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
954 addfac = 1
955 mulfac = 1
956 IF( m.GE.n ) THEN
957*
958* NOPS = SOPLA( 'SGEQRF', M, N, 0, 0,
959* NB ) + SOPLA( 'SORMQR', M, NRHS, N,
960* 0, NB )
961*
962 mults = n*( ( ( 23.d0 / 6.d0 )+m+n /
963 $ 2.d0 )+ n*( m-n / 3.d0 ) ) +
964 $ n*nrhs*( 2.d0*m+2.d0-n )
965 adds = n*( ( 5.d0 / 6.d0 )+n*
966 $ ( 1.d0 / 2.d0+( m-n / 3.d0 ) ) )
967 $ + n*nrhs*( 2.d0*m+1.d0-n )
968 ELSE
969*
970* NOPS = SOPLA( 'SGELQF', M, N, 0, 0,
971* NB ) + SOPLA( 'SORMLQ', M,
972* NRHS, N, 0, NB )
973*
974 mults = m*( ( ( 29.d0 / 6.d0 )+2.d0*n-m
975 $ / 2.d0 )+m*( n-m / 3.d0 ) )
976 $ + n*nrhs*( 2.d0*m+2.d0-n )
977 adds = m*( ( 5.d0 / 6.d0 )+m / 2.d0+m*
978 $ ( n-m / 3.d0 ) )
979 $ + n*nrhs*( 2.d0*m+1.d0-n )
980 END IF
981 nops = addfac*adds + mulfac*mults
982*
983* Calculate total megaflops, for WALL and
984* CPU time, and print output
985*
986* Print WALL time if machine supports it
987*
988 IF( wtime( 1 ).GT.0.0d+0 ) THEN
989 tmflops = nops / ( wtime( 1 )*1.0d+6 )
990 ELSE
991 tmflops = 0.0d+0
992 END IF
993*
994 IF( wtime( 1 ).GE.0.0d+0 )
995 $ WRITE( nout, fmt = 9993 )
996 $ 'WALL', trans, m, n, nb, nrhs,
997 $ nbrhs, nprow, npcol, wtime( 1 ),
998 $ tmflops, passed
999*
1000* Print CPU time if machine supports it
1001*
1002 IF( ctime( 1 ).GT.0.0d+0 ) THEN
1003 tmflops = nops / ( ctime( 1 )*1.0d+6 )
1004 ELSE
1005 tmflops = 0.0d+0
1006 END IF
1007*
1008 IF( ctime( 1 ).GE.0.0d+0 )
1009 $ WRITE( nout, fmt = 9993 )
1010 $ 'CPU ', trans, m, n, nb, nrhs,
1011 $ nbrhs, nprow, npcol, ctime( 1 ),
1012 $ tmflops, passed
1013 END IF
1014 30 CONTINUE
1015 40 CONTINUE
1016 50 CONTINUE
1017 60 CONTINUE
1018 70 CONTINUE
1019 80 CONTINUE
1020 CALL blacs_gridexit( ictxt )
1021 90 CONTINUE
1022*
1023* Print out ending messages and close output file
1024*
1025 IF( iam.EQ.0 ) THEN
1026 ktests = kpass + kfail + kskip
1027 WRITE( nout, fmt = * )
1028 WRITE( nout, fmt = 9992 ) ktests
1029 IF( check ) THEN
1030 WRITE( nout, fmt = 9991 ) kpass
1031 WRITE( nout, fmt = 9989 ) kfail
1032 ELSE
1033 WRITE( nout, fmt = 9990 ) kpass
1034 END IF
1035 WRITE( nout, fmt = 9988 ) kskip
1036 WRITE( nout, fmt = * )
1037 WRITE( nout, fmt = * )
1038 WRITE( nout, fmt = 9987 )
1039 IF( nout.NE.6 .AND. nout.NE.0 )
1040 $ CLOSE ( nout )
1041 END IF
1042*
1043 CALL blacs_exit( 0 )
1044*
1045 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1046 $ '; It should be at least 1' )
1047 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1048 $ i4 )
1049 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1050 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1051 $ i11 )
1052 9995 FORMAT( 'Time TRANS M N NB NRHS NBRHS P Q ',
1053 $ 'LS Time MFLOPS CHECK' )
1054 9994 FORMAT( '---- ----- ------ ------ --- ----- ----- ----- ----- ',
1055 $ '--------- -------- ------' )
1056 9993 FORMAT( a4, 3x, a1, 3x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
1057 $ i5, 1x, i5, 1x, f9.2, 1x, f8.2, 1x, a6 )
1058 9992 FORMAT( 'Finished', i6, ' tests, with the following results:' )
1059 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1060 9990 FORMAT( i5, ' tests completed without checking.' )
1061 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1062 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1063 9987 FORMAT( 'END OF TESTS.' )
1064 9986 FORMAT( ' TRANS=''', a1, ''', M=', i5, ', N=', i5, ', NRHS=', i4,
1065 $ ', NB=', i4, ', type', i2, ', test(', i2, ')=', g12.5 )
1066*
1067 stop
1068*
1069* End of PSLSDRIVER
1070*
1071 END
subroutine psmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition psmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pschekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pschekpad.f:3
subroutine psfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition psfillpad.f:2
subroutine psgels(trans, m, n, nrhs, a, ia, ja, desca, b, ib, jb, descb, work, lwork, info)
Definition psgels.f:3
subroutine pslacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pslacpy.f:3
real function pslange(norm, m, n, a, ia, ja, desca, work)
Definition pslange.f:3
program pslsdriver
Definition pslsdriver.f:1
subroutine pslsinfo(summry, nout, nmat, mval, ldmval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pslsinfo.f:6
subroutine psqrt13(scale, m, n, a, ia, ja, desca, norma, iseed, work)
Definition psqrt13.f:3
real function psqrt14(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, work)
Definition psqrt14.f:3
subroutine psqrt16(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, rwork, resid)
Definition psqrt16.f:3
real function psqrt17(trans, iresid, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, work, rwork)
Definition psqrt17.f:5
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267
logical function lsame(ca, cb)
Definition tools.f:1724