SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdqrdriver.f
Go to the documentation of this file.
1 PROGRAM pdqrdriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 28, 2001
7*
8* Purpose
9* =======
10*
11* PDQRDRIVER is the main test program for the DOUBLE PRECISION
12* SCALAPACK QR factorization routines. This test driver performs a QR
13* QL, LQ, RQ, QP (QR factorization with column pivoting) or TZ
14* (complete orthogonal factorization) factorization and checks the
15* results.
16*
17* The program must be driven by a short data file. An annotated
18* example of a data file can be obtained by deleting the first 3
19* characters from the following 16 lines:
20* 'ScaLAPACK QR factorizations input file'
21* 'PVM machine'
22* 'QR.out' output file name (if any)
23* 6 device out
24* 6 number of factorizations
25* 'QR' 'QL' 'LQ' 'RQ' 'QP' 'TZ' factorization: QR, QL, LQ, RQ, QP, TZ
26* 4 number of problems sizes
27* 55 17 31 201 values of M
28* 5 71 31 201 values of N
29* 3 number of MB's and NB's
30* 4 3 5 values of MB
31* 4 7 3 values of NB
32* 7 number of process grids (ordered P & Q)
33* 1 2 1 4 2 3 8 values of P
34* 7 2 4 1 3 2 1 values of Q
35* 1.0 threshold
36*
37* Internal Parameters
38* ===================
39*
40* TOTMEM INTEGER, default = 2000000
41* TOTMEM is a machine-specific parameter indicating the
42* maximum amount of available memory in bytes.
43* The user should customize TOTMEM to his platform. Remember
44* to leave room in memory for the operating system, the BLACS
45* buffer, etc. For example, on a system with 8 MB of memory
46* per process (e.g., one processor on an Intel iPSC/860), the
47* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
48* code, BLACS buffer, etc). However, for PVM, we usually set
49* TOTMEM = 2000000. Some experimenting with the maximum value
50* of TOTMEM may be required.
51*
52* INTGSZ INTEGER, default = 4 bytes.
53* DBLESZ INTEGER, default = 8 bytes.
54* INTGSZ and DBLESZ indicate the length in bytes on the
55* given platform for an integer and a double precision real.
56* MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
57*
58* All arrays used by SCALAPACK routines are allocated from
59* this array and referenced by pointers. The integer IPA,
60* for example, is a pointer to the starting element of MEM for
61* the matrix A.
62*
63* =====================================================================
64*
65* .. Parameters ..
66 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
67 $ lld_, mb_, m_, nb_, n_, rsrc_
68 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
69 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
70 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
71 INTEGER dblesz, intgsz, memsiz, ntests, totmem
72 DOUBLE PRECISION padval
73 parameter( dblesz = 8, intgsz = 4, totmem = 2000000,
74 $ memsiz = totmem / dblesz, ntests = 20,
75 $ padval = -9923.0d+0 )
76* ..
77* .. Local Scalars ..
78 CHARACTER*2 fact
79 CHARACTER*6 passed
80 CHARACTER*7 rout
81 CHARACTER*8 routchk
82 CHARACTER*80 outfile
83 LOGICAL check
84 INTEGER i, iam, iaseed, ictxt, imidpad, info, ipa,
85 $ ipostpad, ippiv, iprepad, iptau, ipw, j, k,
86 $ kfail, kpass, kskip, ktests, l, lipiv, ltau,
87 $ lwork, m, maxmn, mb, minmn, mnp, mnq, mp,
88 $ mycol, myrow, n, nb, nfact, ngrids, nmat, nnb,
89 $ nout, npcol, nprocs, nprow, nq, workfct,
90 $ worksiz
91 REAL thresh
92 DOUBLE PRECISION anorm, fresid, nops, tmflops
93* ..
94* .. Arrays ..
95 CHARACTER*2 factor( ntests )
96 INTEGER desca( dlen_ ), ierr( 1 ), mbval( ntests ),
97 $ mval( ntests ), nbval( ntests ),
98 $ nval( ntests ), pval( ntests ), qval( ntests )
99 DOUBLE PRECISION ctime( 1 ), mem( memsiz ), wtime( 1 )
100* ..
101* .. External Subroutines ..
102 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
103 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
104 $ blacs_pinfo, descinit, igsum2d, pdchekpad,
111* ..
112* .. External Functions ..
113 LOGICAL lsamen
114 INTEGER iceil, numroc
115 DOUBLE PRECISION pdlange
116 EXTERNAL iceil, lsamen, numroc, pdlange
117* ..
118* .. Intrinsic Functions ..
119 INTRINSIC dble, max, min
120* ..
121* .. Data Statements ..
122 DATA ktests, kpass, kfail, kskip /4*0/
123* ..
124* .. Executable Statements ..
125*
126* Get starting information
127*
128 CALL blacs_pinfo( iam, nprocs )
129 iaseed = 100
130 CALL pdqrinfo( outfile, nout, nfact, factor, ntests, nmat, mval,
131 $ ntests, nval, ntests, nnb, mbval, ntests, nbval,
132 $ ntests, ngrids, pval, ntests, qval, ntests,
133 $ thresh, mem, iam, nprocs )
134 check = ( thresh.GE.0.0e+0 )
135*
136* Loop over the different factorization types
137*
138 DO 40 i = 1, nfact
139*
140 fact = factor( i )
141*
142* Print headings
143*
144 IF( iam.EQ.0 ) THEN
145 WRITE( nout, fmt = * )
146 IF( lsamen( 2, fact, 'QR' ) ) THEN
147 rout = 'PDGEQRF'
148 routchk = 'PDGEQRRV'
149 WRITE( nout, fmt = 9986 )
150 $ 'QR factorization tests.'
151 ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
152 rout = 'PDGEQLF'
153 routchk = 'PDGEQLRV'
154 WRITE( nout, fmt = 9986 )
155 $ 'QL factorization tests.'
156 ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
157 rout = 'PDGELQF'
158 routchk = 'PDGELQRV'
159 WRITE( nout, fmt = 9986 )
160 $ 'LQ factorization tests.'
161 ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
162 rout = 'PDGERQF'
163 routchk = 'PDGERQRV'
164 WRITE( nout, fmt = 9986 )
165 $ 'RQ factorization tests.'
166 ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
167 rout = 'PDGEQPF'
168 routchk = 'PDGEQRRV'
169 WRITE( nout, fmt = 9986 )
170 $ 'QR factorization with column pivoting tests.'
171 ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
172 rout = 'PDTZRZF'
173 routchk = 'PDTZRZRV'
174 WRITE( nout, fmt = 9986 )
175 $ 'Complete orthogonal factorization tests.'
176 END IF
177 WRITE( nout, fmt = * )
178 WRITE( nout, fmt = 9995 )
179 WRITE( nout, fmt = 9994 )
180 WRITE( nout, fmt = * )
181 END IF
182*
183* Loop over different process grids
184*
185 DO 30 j = 1, ngrids
186*
187 nprow = pval( j )
188 npcol = qval( j )
189*
190* Make sure grid information is correct
191*
192 ierr( 1 ) = 0
193 IF( nprow.LT.1 ) THEN
194 IF( iam.EQ.0 )
195 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
196 ierr( 1 ) = 1
197 ELSE IF( npcol.LT.1 ) THEN
198 IF( iam.EQ.0 )
199 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
200 ierr( 1 ) = 1
201 ELSE IF( nprow*npcol.GT.nprocs ) THEN
202 IF( iam.EQ.0 )
203 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
204 ierr( 1 ) = 1
205 END IF
206*
207 IF( ierr( 1 ).GT.0 ) THEN
208 IF( iam.EQ.0 )
209 $ WRITE( nout, fmt = 9997 ) 'grid'
210 kskip = kskip + 1
211 GO TO 30
212 END IF
213*
214* Define process grid
215*
216 CALL blacs_get( -1, 0, ictxt )
217 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
218 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
219*
220* Go to bottom of loop if this case doesn't use my process
221*
222 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
223 $ GO TO 30
224*
225 DO 20 k = 1, nmat
226*
227 m = mval( k )
228 n = nval( k )
229*
230* Make sure matrix information is correct
231*
232 ierr(1) = 0
233 IF( m.LT.1 ) THEN
234 IF( iam.EQ.0 )
235 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
236 ierr( 1 ) = 1
237 ELSE IF( n.LT.1 ) THEN
238 IF( iam.EQ.0 )
239 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
240 ierr( 1 ) = 1
241 END IF
242*
243* Make sure no one had error
244*
245 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
246*
247 IF( ierr( 1 ).GT.0 ) THEN
248 IF( iam.EQ.0 )
249 $ WRITE( nout, fmt = 9997 ) 'matrix'
250 kskip = kskip + 1
251 GO TO 20
252 END IF
253*
254* Loop over different blocking sizes
255*
256 DO 10 l = 1, nnb
257*
258 mb = mbval( l )
259 nb = nbval( l )
260*
261* Make sure mb is legal
262*
263 ierr( 1 ) = 0
264 IF( mb.LT.1 ) THEN
265 ierr( 1 ) = 1
266 IF( iam.EQ.0 )
267 $ WRITE( nout, fmt = 9999 ) 'MB', 'MB', mb
268 END IF
269*
270* Check all processes for an error
271*
272 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
273 $ 0 )
274*
275 IF( ierr( 1 ).GT.0 ) THEN
276 IF( iam.EQ.0 )
277 $ WRITE( nout, fmt = 9997 ) 'MB'
278 kskip = kskip + 1
279 GO TO 10
280 END IF
281*
282* Make sure nb is legal
283*
284 ierr( 1 ) = 0
285 IF( nb.LT.1 ) THEN
286 ierr( 1 ) = 1
287 IF( iam.EQ.0 )
288 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
289 END IF
290*
291* Check all processes for an error
292*
293 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
294 $ 0 )
295*
296 IF( ierr( 1 ).GT.0 ) THEN
297 IF( iam.EQ.0 )
298 $ WRITE( nout, fmt = 9997 ) 'NB'
299 kskip = kskip + 1
300 GO TO 10
301 END IF
302*
303* Padding constants
304*
305 mp = numroc( m, mb, myrow, 0, nprow )
306 nq = numroc( n, nb, mycol, 0, npcol )
307 mnp = numroc( min( m, n ), mb, myrow, 0, nprow )
308 mnq = numroc( min( m, n ), nb, mycol, 0, npcol )
309 IF( check ) THEN
310 iprepad = max( mb, mp )
311 imidpad = nb
312 ipostpad = max( nb, nq )
313 ELSE
314 iprepad = 0
315 imidpad = 0
316 ipostpad = 0
317 END IF
318*
319* Initialize the array descriptor for the matrix A
320*
321 CALL descinit( desca, m, n, mb, nb, 0, 0, ictxt,
322 $ max( 1, mp ) + imidpad, ierr( 1 ) )
323*
324* Check all processes for an error
325*
326 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
327 $ 0 )
328*
329 IF( ierr( 1 ).LT.0 ) THEN
330 IF( iam.EQ.0 )
331 $ WRITE( nout, fmt = 9997 ) 'descriptor'
332 kskip = kskip + 1
333 GO TO 10
334 END IF
335*
336* Assign pointers into MEM for ScaLAPACK arrays, A is
337* allocated starting at position MEM( IPREPAD+1 )
338*
339 ipa = iprepad+1
340 iptau = ipa + desca( lld_ ) * nq + ipostpad + iprepad
341*
342 IF( lsamen( 2, fact, 'QR' ) ) THEN
343*
344 ltau = mnq
345 ipw = iptau + ltau + ipostpad + iprepad
346*
347* Figure the amount of workspace required by the QR
348* factorization
349*
350 lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )
351 workfct = lwork + ipostpad
352 worksiz = workfct
353*
354 IF( check ) THEN
355*
356* Figure the amount of workspace required by the
357* checking routines PDLAFCHK, PDGEQRRV and
358* PDLANGE
359*
360 worksiz = lwork + mp*desca( nb_ ) + ipostpad
361*
362 END IF
363*
364 ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
365*
366 ltau = nq
367 ipw = iptau + ltau + ipostpad + iprepad
368*
369* Figure the amount of workspace required by the QL
370* factorization
371*
372 lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )
373 workfct = lwork + ipostpad
374 worksiz = workfct
375*
376 IF( check ) THEN
377*
378* Figure the amount of workspace required by the
379* checking routines PDLAFCHK, PDGEQLRV and
380* PDLANGE
381*
382 worksiz = lwork + mp*desca( nb_ ) + ipostpad
383*
384 END IF
385*
386 ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
387*
388 ltau = mnp
389 ipw = iptau + ltau + ipostpad + iprepad
390*
391* Figure the amount of workspace required by the LQ
392* factorization
393*
394 lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
395 workfct = lwork + ipostpad
396 worksiz = workfct
397*
398 IF( check ) THEN
399*
400* Figure the amount of workspace required by the
401* checking routines PDLAFCHK, PDGELQRV and
402* PDLANGE
403*
404 worksiz = lwork +
405 $ max( mp*desca( nb_ ), nq*desca( mb_ )
406 $ ) + ipostpad
407*
408 END IF
409*
410 ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
411*
412 ltau = mp
413 ipw = iptau + ltau + ipostpad + iprepad
414*
415* Figure the amount of workspace required by the QR
416* factorization
417*
418 lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
419 workfct = lwork + ipostpad
420 worksiz = workfct
421*
422 IF( check ) THEN
423*
424* Figure the amount of workspace required by the
425* checking routines PDLAFCHK, PDGERQRV and
426* PDLANGE
427*
428 worksiz = lwork +
429 $ max( mp*desca( nb_ ), nq*desca( mb_ )
430 $ ) + ipostpad
431*
432 END IF
433*
434 ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
435*
436 ltau = mnq
437 ippiv = iptau + ltau + ipostpad + iprepad
438 lipiv = iceil( intgsz*nq, dblesz )
439 ipw = ippiv + lipiv + ipostpad + iprepad
440*
441* Figure the amount of workspace required by the
442* factorization i.e from IPW on.
443*
444 lwork = max( 3, mp + max( 1, nq ) ) + 2 * nq
445 workfct = lwork + ipostpad
446 worksiz = workfct
447*
448 IF( check ) THEN
449*
450* Figure the amount of workspace required by the
451* checking routines PDLAFCHK, PDGEQRRV,
452* PDLANGE.
453*
454 worksiz = max( worksiz - ipostpad,
455 $ desca( nb_ )*( 2*mp + nq + desca( nb_ ) ) ) +
456 $ ipostpad
457 END IF
458*
459 ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
460*
461 ltau = mp
462 ipw = iptau + ltau + ipostpad + iprepad
463*
464* Figure the amount of workspace required by the TZ
465* factorization
466*
467 lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
468 workfct = lwork + ipostpad
469 worksiz = workfct
470*
471 IF( check ) THEN
472*
473* Figure the amount of workspace required by the
474* checking routines PDLAFCHK, PDTZRZRV and
475* PDLANGE
476*
477 worksiz = lwork +
478 $ max( mp*desca( nb_ ), nq*desca( mb_ )
479 $ ) + ipostpad
480*
481 END IF
482*
483 END IF
484*
485* Check for adequate memory for problem size
486*
487 ierr( 1 ) = 0
488 IF( ipw+worksiz.GT.memsiz ) THEN
489 IF( iam.EQ.0 )
490 $ WRITE( nout, fmt = 9996 )
491 $ fact // ' factorization',
492 $ ( ipw+worksiz )*dblesz
493 ierr( 1 ) = 1
494 END IF
495*
496* Check all processes for an error
497*
498 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
499 $ 0 )
500*
501 IF( ierr( 1 ).GT.0 ) THEN
502 IF( iam.EQ.0 )
503 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
504 kskip = kskip + 1
505 GO TO 10
506 END IF
507*
508* Generate the matrix A
509*
510 CALL pdmatgen( ictxt, 'N', 'N', desca( m_ ),
511 $ desca( n_ ), desca( mb_ ),
512 $ desca( nb_ ), mem( ipa ),
513 $ desca( lld_ ), desca( rsrc_ ),
514 $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
515 $ myrow, mycol, nprow, npcol )
516*
517* Need the Infinity of A for checking
518*
519 IF( check ) THEN
520 CALL pdfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
521 $ desca( lld_ ), iprepad, ipostpad,
522 $ padval )
523 IF( lsamen( 2, fact, 'QP' ) ) THEN
524 CALL pdfillpad( ictxt, lipiv, 1,
525 $ mem( ippiv-iprepad ), lipiv,
526 $ iprepad, ipostpad, padval )
527 END IF
528 CALL pdfillpad( ictxt, ltau, 1,
529 $ mem( iptau-iprepad ), ltau,
530 $ iprepad, ipostpad, padval )
531 CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
532 $ mem( ipw-iprepad ),
533 $ worksiz-ipostpad,
534 $ iprepad, ipostpad, padval )
535 anorm = pdlange( 'I', m, n, mem( ipa ), 1, 1,
536 $ desca, mem( ipw ) )
537 CALL pdchekpad( ictxt, 'PDLANGE', mp, nq,
538 $ mem( ipa-iprepad ), desca( lld_ ),
539 $ iprepad, ipostpad, padval )
540 CALL pdchekpad( ictxt, 'PDLANGE',
541 $ worksiz-ipostpad, 1,
542 $ mem( ipw-iprepad ),
543 $ worksiz-ipostpad, iprepad,
544 $ ipostpad, padval )
545 CALL pdfillpad( ictxt, workfct-ipostpad, 1,
546 $ mem( ipw-iprepad ),
547 $ workfct-ipostpad,
548 $ iprepad, ipostpad, padval )
549 END IF
550*
551 CALL slboot()
552 CALL blacs_barrier( ictxt, 'All' )
553*
554* Perform QR factorizations
555*
556 IF( lsamen( 2, fact, 'QR' ) ) THEN
557 CALL sltimer( 1 )
558 CALL pdgeqrf( m, n, mem( ipa ), 1, 1, desca,
559 $ mem( iptau ), mem( ipw ), lwork,
560 $ info )
561 CALL sltimer( 1 )
562 ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
563 CALL sltimer( 1 )
564 CALL pdgeqlf( m, n, mem( ipa ), 1, 1, desca,
565 $ mem( iptau ), mem( ipw ), lwork,
566 $ info )
567 CALL sltimer( 1 )
568 ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
569 CALL sltimer( 1 )
570 CALL pdgelqf( m, n, mem( ipa ), 1, 1, desca,
571 $ mem( iptau ), mem( ipw ), lwork,
572 $ info )
573 CALL sltimer( 1 )
574 ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
575 CALL sltimer( 1 )
576 CALL pdgerqf( m, n, mem( ipa ), 1, 1, desca,
577 $ mem( iptau ), mem( ipw ), lwork,
578 $ info )
579 CALL sltimer( 1 )
580 ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
581 CALL sltimer( 1 )
582 CALL pdgeqpf( m, n, mem( ipa ), 1, 1, desca,
583 $ mem( ippiv ), mem( iptau ),
584 $ mem( ipw ), lwork, info )
585 CALL sltimer( 1 )
586 ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
587 CALL sltimer( 1 )
588 IF( n.GE.m )
589 $ CALL pdtzrzf( m, n, mem( ipa ), 1, 1, desca,
590 $ mem( iptau ), mem( ipw ), lwork,
591 $ info )
592 CALL sltimer( 1 )
593 END IF
594*
595 IF( check ) THEN
596*
597* Check for memory overwrite in factorization
598*
599 CALL pdchekpad( ictxt, rout, mp, nq,
600 $ mem( ipa-iprepad ), desca( lld_ ),
601 $ iprepad, ipostpad, padval )
602 CALL pdchekpad( ictxt, rout, ltau, 1,
603 $ mem( iptau-iprepad ), ltau,
604 $ iprepad, ipostpad, padval )
605 IF( lsamen( 2, fact, 'QP' ) ) THEN
606 CALL pdchekpad( ictxt, rout, lipiv, 1,
607 $ mem( ippiv-iprepad ), lipiv,
608 $ iprepad, ipostpad, padval )
609 END IF
610 CALL pdchekpad( ictxt, rout, workfct-ipostpad, 1,
611 $ mem( ipw-iprepad ),
612 $ workfct-ipostpad, iprepad,
613 $ ipostpad, padval )
614 CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
615 $ mem( ipw-iprepad ),
616 $ worksiz-ipostpad,
617 $ iprepad, ipostpad, padval )
618*
619 IF( lsamen( 2, fact, 'QR' ) ) THEN
620*
621* Compute residual = ||A-Q*R|| / (||A||*N*eps)
622*
623 CALL pdgeqrrv( m, n, mem( ipa ), 1, 1, desca,
624 $ mem( iptau ), mem( ipw ) )
625 CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
626 $ 1, desca, iaseed, anorm, fresid,
627 $ mem( ipw ) )
628 ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
629*
630* Compute residual = ||A-Q*L|| / (||A||*N*eps)
631*
632 CALL pdgeqlrv( m, n, mem( ipa ), 1, 1, desca,
633 $ mem( iptau ), mem( ipw ) )
634 CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
635 $ 1, desca, iaseed, anorm, fresid,
636 $ mem( ipw ) )
637 ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
638*
639* Compute residual = ||A-L*Q|| / (||A||*N*eps)
640*
641 CALL pdgelqrv( m, n, mem( ipa ), 1, 1, desca,
642 $ mem( iptau ), mem( ipw ) )
643 CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
644 $ 1, desca, iaseed, anorm, fresid,
645 $ mem( ipw ) )
646 ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
647*
648* Compute residual = ||A-R*Q|| / (||A||*N*eps)
649*
650 CALL pdgerqrv( m, n, mem( ipa ), 1, 1, desca,
651 $ mem( iptau ), mem( ipw ) )
652 CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
653 $ 1, desca, iaseed, anorm, fresid,
654 $ mem( ipw ) )
655 ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
656*
657* Compute residual = ||AP-Q*R|| / (||A||*N*eps)
658*
659 CALL pdgeqrrv( m, n, mem( ipa ), 1, 1, desca,
660 $ mem( iptau ), mem( ipw ) )
661 ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
662*
663* Compute residual = ||A-T*Z|| / (||A||*N*eps)
664*
665 IF( n.GE.m ) THEN
666 CALL pdtzrzrv( m, n, mem( ipa ), 1, 1, desca,
667 $ mem( iptau ), mem( ipw ) )
668 END IF
669 CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
670 $ 1, desca, iaseed, anorm, fresid,
671 $ mem( ipw ) )
672 END IF
673*
674* Check for memory overwrite
675*
676 CALL pdchekpad( ictxt, routchk, mp, nq,
677 $ mem( ipa-iprepad ), desca( lld_ ),
678 $ iprepad, ipostpad, padval )
679 CALL pdchekpad( ictxt, routchk, ltau, 1,
680 $ mem( iptau-iprepad ), ltau,
681 $ iprepad, ipostpad, padval )
682 CALL pdchekpad( ictxt, routchk, worksiz-ipostpad,
683 $ 1, mem( ipw-iprepad ),
684 $ worksiz-ipostpad, iprepad,
685 $ ipostpad, padval )
686*
687 IF( lsamen( 2, fact, 'QP' ) ) THEN
688*
689 CALL pdqppiv( m, n, mem( ipa ), 1, 1, desca,
690 $ mem( ippiv ) )
691*
692* Check for memory overwrite
693*
694 CALL pdchekpad( ictxt, 'PDQPPIV', mp, nq,
695 $ mem( ipa-iprepad ),
696 $ desca( lld_ ),
697 $ iprepad, ipostpad, padval )
698 CALL pdchekpad( ictxt, 'PDQPPIV', lipiv, 1,
699 $ mem( ippiv-iprepad ), lipiv,
700 $ iprepad, ipostpad, padval )
701*
702 CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
703 $ 1, desca, iaseed, anorm, fresid,
704 $ mem( ipw ) )
705*
706* Check for memory overwrite
707*
708 CALL pdchekpad( ictxt, 'PDLAFCHK', mp, nq,
709 $ mem( ipa-iprepad ),
710 $ desca( lld_ ),
711 $ iprepad, ipostpad, padval )
712 CALL pdchekpad( ictxt, 'PDLAFCHK',
713 $ worksiz-ipostpad, 1,
714 $ mem( ipw-iprepad ),
715 $ worksiz-ipostpad, iprepad,
716 $ ipostpad, padval )
717 END IF
718*
719* Test residual and detect NaN result
720*
721 IF( lsamen( 2, fact, 'TZ' ) .AND. n.LT.m ) THEN
722 kskip = kskip + 1
723 passed = 'BYPASS'
724 ELSE
725 IF( fresid.LE.thresh .AND.
726 $ (fresid-fresid).EQ.0.0d+0 ) THEN
727 kpass = kpass + 1
728 passed = 'PASSED'
729 ELSE
730 kfail = kfail + 1
731 passed = 'FAILED'
732 END IF
733 END IF
734*
735 ELSE
736*
737* Don't perform the checking, only timing
738*
739 kpass = kpass + 1
740 fresid = fresid - fresid
741 passed = 'BYPASS'
742*
743 END IF
744*
745* Gather maximum of all CPU and WALL clock timings
746*
747 CALL slcombine( ictxt, 'All', '>', 'W', 1, 1, wtime )
748 CALL slcombine( ictxt, 'All', '>', 'C', 1, 1, ctime )
749*
750* Print results
751*
752 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
753*
754 minmn = min( m, n )
755 maxmn = max( m, n )
756*
757 IF( lsamen( 2, fact, 'TZ' ) ) THEN
758 IF( m.GE.n ) THEN
759 nops = 0.0d+0
760 ELSE
761*
762* 5/2 ( M^2 N - M^3 ) + 5/2 N M + 1/2 M^2 for
763* complete orthogonal factorization (M <= N).
764*
765 nops = ( 5.0d+0 * (
766 $ dble( n )*( dble( m )**2 ) -
767 $ dble( m )**3 +
768 $ dble( n )*dble( m ) ) +
769 $ dble( m )**2 ) / 2.0d+0
770 END IF
771*
772 ELSE
773*
774* 2 M N^2 - 2/3 N^2 + M N + N^2 for QR type
775* factorization when M >= N.
776*
777 nops = 2.0d+0 * ( dble( minmn )**2 ) *
778 $ ( dble( maxmn )-dble( minmn ) / 3.0d+0 ) +
779 $ ( dble( maxmn )+dble( minmn ) )*dble( minmn )
780 END IF
781*
782* Print WALL time
783*
784 IF( wtime( 1 ).GT.0.0d+0 ) THEN
785 tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
786 ELSE
787 tmflops = 0.0d+0
788 END IF
789 IF( wtime( 1 ).GE.0.0d+0 )
790 $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, mb, nb,
791 $ nprow, npcol, wtime( 1 ), tmflops,
792 $ passed, fresid
793*
794* Print CPU time
795*
796 IF( ctime( 1 ).GT.0.0d+0 ) THEN
797 tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
798 ELSE
799 tmflops = 0.0d+0
800 END IF
801 IF( ctime( 1 ).GE.0.0d+0 )
802 $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, mb, nb,
803 $ nprow, npcol, ctime( 1 ), tmflops,
804 $ passed, fresid
805*
806 END IF
807*
808 10 CONTINUE
809*
810 20 CONTINUE
811*
812 CALL blacs_gridexit( ictxt )
813*
814 30 CONTINUE
815*
816 40 CONTINUE
817*
818* Print out ending messages and close output file
819*
820 IF( iam.EQ.0 ) THEN
821 ktests = kpass + kfail + kskip
822 WRITE( nout, fmt = * )
823 WRITE( nout, fmt = 9992 ) ktests
824 IF( check ) THEN
825 WRITE( nout, fmt = 9991 ) kpass
826 WRITE( nout, fmt = 9989 ) kfail
827 ELSE
828 WRITE( nout, fmt = 9990 ) kpass
829 END IF
830 WRITE( nout, fmt = 9988 ) kskip
831 WRITE( nout, fmt = * )
832 WRITE( nout, fmt = * )
833 WRITE( nout, fmt = 9987 )
834 IF( nout.NE.6 .AND. nout.NE.0 )
835 $ CLOSE ( nout )
836 END IF
837*
838 CALL blacs_exit( 0 )
839*
840 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
841 $ '; It should be at least 1' )
842 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
843 $ i4 )
844 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
845 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
846 $ i11 )
847 9995 FORMAT( 'TIME M N MB NB P Q Fact Time ',
848 $ ' MFLOPS CHECK Residual' )
849 9994 FORMAT( '---- ------ ------ --- --- ----- ----- --------- ',
850 $ '----------- ------ --------' )
851 9993 FORMAT( a4, 1x, i6, 1x, i6, 1x, i3, 1x, i3, 1x, i5, 1x, i5, 1x,
852 $ f9.2, 1x, f11.2, 1x, a6, 2x, g8.1 )
853 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
854 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
855 9990 FORMAT( i5, ' tests completed without checking.' )
856 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
857 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
858 9987 FORMAT( 'END OF TESTS.' )
859 9986 FORMAT( a )
860*
861 stop
862*
863* End of PDQRDRIVER
864*
865 END
866*
867 SUBROUTINE pdqppiv( M, N, A, IA, JA, DESCA, IPIV )
868*
869* -- ScaLAPACK routine (version 1.7) --
870* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
871* and University of California, Berkeley.
872* May 1, 1997
873*
874* .. Scalar Arguments ..
875 INTEGER IA, JA, M, N
876* ..
877* .. Array Arguments ..
878 INTEGER DESCA( * ), IPIV( * )
879 DOUBLE PRECISION A( * )
880* ..
881*
882* Purpose
883* =======
884*
885* PDQPPIV applies to sub( A ) = A(IA:IA+M-1,JA:JA+N-1) the pivots
886* returned by PDGEQPF in reverse order for checking purposes.
887*
888* Notes
889* =====
890*
891* Each global data object is described by an associated description
892* vector. This vector stores the information required to establish
893* the mapping between an object element and its corresponding process
894* and memory location.
895*
896* Let A be a generic term for any 2D block cyclicly distributed array.
897* Such a global array has an associated description vector DESCA.
898* In the following comments, the character _ should be read as
899* "of the global array".
900*
901* NOTATION STORED IN EXPLANATION
902* --------------- -------------- --------------------------------------
903* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
904* DTYPE_A = 1.
905* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
906* the BLACS process grid A is distribu-
907* ted over. The context itself is glo-
908* bal, but the handle (the integer
909* value) may vary.
910* M_A (global) DESCA( M_ ) The number of rows in the global
911* array A.
912* N_A (global) DESCA( N_ ) The number of columns in the global
913* array A.
914* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
915* the rows of the array.
916* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
917* the columns of the array.
918* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
919* row of the array A is distributed.
920* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
921* first column of the array A is
922* distributed.
923* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
924* array. LLD_A >= MAX(1,LOCr(M_A)).
925*
926* Let K be the number of rows or columns of a distributed matrix,
927* and assume that its process grid has dimension p x q.
928* LOCr( K ) denotes the number of elements of K that a process
929* would receive if K were distributed over the p processes of its
930* process column.
931* Similarly, LOCc( K ) denotes the number of elements of K that a
932* process would receive if K were distributed over the q processes of
933* its process row.
934* The values of LOCr() and LOCc() may be determined via a call to the
935* ScaLAPACK tool function, NUMROC:
936* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
937* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
938* An upper bound for these quantities may be computed by:
939* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
940* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
941*
942* Arguments
943* =========
944*
945* M (global input) INTEGER
946* The number of rows to be operated on, i.e. the number of rows
947* of the distributed submatrix sub( A ). M >= 0.
948*
949* N (global input) INTEGER
950* The number of columns to be operated on, i.e. the number of
951* columns of the distributed submatrix sub( A ). N >= 0.
952*
953* A (local input/local output) DOUBLE PRECISION pointer into the
954* local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
955* On entry, the local pieces of the M-by-N distributed matrix
956* sub( A ) which is to be permuted. On exit, the local pieces
957* of the distributed permuted submatrix sub( A ) * Inv( P ).
958*
959* IA (global input) INTEGER
960* The row index in the global array A indicating the first
961* row of sub( A ).
962*
963* JA (global input) INTEGER
964* The column index in the global array A indicating the
965* first column of sub( A ).
966*
967* DESCA (global and local input) INTEGER array of dimension DLEN_.
968* The array descriptor for the distributed matrix A.
969*
970* IPIV (local input) INTEGER array, dimension LOCc(JA+N-1).
971* On exit, if IPIV(I) = K, the local i-th column of sub( A )*P
972* was the global K-th column of sub( A ). IPIV is tied to the
973* distributed matrix A.
974*
975* =====================================================================
976*
977* .. Parameters ..
978 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
979 $ LLD_, MB_, M_, NB_, N_, RSRC_
980 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
981 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
982 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
983* ..
984* .. Local Scalars ..
985 INTEGER IACOL, ICOFFA, ICTXT, IITMP, IPVT, IPCOL,
986 $ IPROW, ITMP, J, JJ, JJA, KK, MYCOL, MYROW,
987 $ NPCOL, NPROW, NQ
988* ..
989* .. External Subroutines ..
990 EXTERNAL blacs_gridinfo, igebr2d, igebs2d, igerv2d,
991 $ igesd2d, igamn2d, infog1l, pdswap
992* ..
993* .. External Functions ..
994 INTEGER INDXL2G, NUMROC
995 EXTERNAL indxl2g, numroc
996* ..
997* .. Intrinsic Functions ..
998 INTRINSIC min, mod
999* ..
1000* .. Executable Statements ..
1001*
1002* Get grid parameters
1003*
1004 ictxt = desca( ctxt_ )
1005 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
1006 CALL infog1l( ja, desca( nb_ ), npcol, mycol, desca( csrc_ ), jja,
1007 $ iacol )
1008 icoffa = mod( ja-1, desca( nb_ ) )
1009 nq = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
1010 IF( mycol.EQ.iacol )
1011 $ nq = nq - icoffa
1012*
1013 DO 20 j = ja, ja+n-2
1014*
1015 ipvt = ja+n-1
1016 itmp = ja+n
1017*
1018* Find first the local minimum candidate for pivoting
1019*
1020 CALL infog1l( j, desca( nb_ ), npcol, mycol, desca( csrc_ ),
1021 $ jj, iacol )
1022 DO 10 kk = jj, jja+nq-1
1023 IF( ipiv( kk ).LT.ipvt )THEN
1024 iitmp = kk
1025 ipvt = ipiv( kk )
1026 END IF
1027 10 CONTINUE
1028*
1029* Find the global minimum pivot
1030*
1031 CALL igamn2d( ictxt, 'Rowwise', ' ', 1, 1, ipvt, 1, iprow,
1032 $ ipcol, 1, -1, mycol )
1033*
1034* Broadcast the corresponding index to the other process columns
1035*
1036 IF( mycol.EQ.ipcol ) THEN
1037 itmp = indxl2g( iitmp, desca( nb_ ), mycol, desca( csrc_ ),
1038 $ npcol )
1039 CALL igebs2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1 )
1040 IF( ipcol.NE.iacol ) THEN
1041 CALL igerv2d( ictxt, 1, 1, ipiv( iitmp ), 1, myrow,
1042 $ iacol )
1043 ELSE
1044 IF( mycol.EQ.iacol )
1045 $ ipiv( iitmp ) = ipiv( jj )
1046 END IF
1047 ELSE
1048 CALL igebr2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1, myrow,
1049 $ ipcol )
1050 IF( mycol.EQ.iacol .AND. ipcol.NE.iacol )
1051 $ CALL igesd2d( ictxt, 1, 1, ipiv( jj ), 1, myrow, ipcol )
1052 END IF
1053*
1054* Swap the columns of A
1055*
1056 CALL pdswap( m, a, ia, itmp, desca, 1, a, ia, j, desca, 1 )
1057*
1058 20 CONTINUE
1059*
1060* End of PDQPPIV
1061*
1062 END
subroutine pdlafchk(aform, diag, m, n, a, ia, ja, desca, iaseed, anorm, fresid, work)
Definition pdlafchk.f:3
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pdmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
logical function lsamen(n, ca, cb)
Definition pblastst.f:1457
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pdchekpad.f:3
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pdfillpad.f:2
subroutine pdgelqf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pdgelqf.f:3
subroutine pdgelqrv(m, n, a, ia, ja, desca, tau, work)
Definition pdgelqrv.f:2
subroutine pdgeqlf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pdgeqlf.f:3
subroutine pdgeqlrv(m, n, a, ia, ja, desca, tau, work)
Definition pdgeqlrv.f:2
subroutine pdgeqpf(m, n, a, ia, ja, desca, ipiv, tau, work, lwork, info)
Definition pdgeqpf.f:3
subroutine pdgeqrf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pdgeqrf.f:3
subroutine pdgeqrrv(m, n, a, ia, ja, desca, tau, work)
Definition pdgeqrrv.f:2
subroutine pdgerqf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pdgerqf.f:3
subroutine pdgerqrv(m, n, a, ia, ja, desca, tau, work)
Definition pdgerqrv.f:2
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
Definition pdlange.f:3
subroutine pdqppiv(m, n, a, ia, ja, desca, ipiv)
Definition pdqrdriver.f:868
program pdqrdriver
Definition pdqrdriver.f:1
subroutine pdqrinfo(summry, nout, nfact, factor, ldfact, nmat, mval, ldmval, nval, ldnval, nnb, mbval, ldmbval, nbval, ldnbval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pdqrinfo.f:6
subroutine pdtzrzf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pdtzrzf.f:3
subroutine pdtzrzrv(m, n, a, ia, ja, desca, tau, work)
Definition pdtzrzrv.f:2
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267