ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdqrdriver.f
Go to the documentation of this file.
1  PROGRAM pdqrdriver
2 *
3 * -- ScaLAPACK testing driver (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 28, 2001
7 *
8 * Purpose
9 * =======
10 *
11 * PDQRDRIVER is the main test program for the DOUBLE PRECISION
12 * SCALAPACK QR factorization routines. This test driver performs a QR
13 * QL, LQ, RQ, QP (QR factorization with column pivoting) or TZ
14 * (complete orthogonal factorization) factorization and checks the
15 * results.
16 *
17 * The program must be driven by a short data file. An annotated
18 * example of a data file can be obtained by deleting the first 3
19 * characters from the following 16 lines:
20 * 'ScaLAPACK QR factorizations input file'
21 * 'PVM machine'
22 * 'QR.out' output file name (if any)
23 * 6 device out
24 * 6 number of factorizations
25 * 'QR' 'QL' 'LQ' 'RQ' 'QP' 'TZ' factorization: QR, QL, LQ, RQ, QP, TZ
26 * 4 number of problems sizes
27 * 55 17 31 201 values of M
28 * 5 71 31 201 values of N
29 * 3 number of MB's and NB's
30 * 4 3 5 values of MB
31 * 4 7 3 values of NB
32 * 7 number of process grids (ordered P & Q)
33 * 1 2 1 4 2 3 8 values of P
34 * 7 2 4 1 3 2 1 values of Q
35 * 1.0 threshold
36 *
37 * Internal Parameters
38 * ===================
39 *
40 * TOTMEM INTEGER, default = 2000000
41 * TOTMEM is a machine-specific parameter indicating the
42 * maximum amount of available memory in bytes.
43 * The user should customize TOTMEM to his platform. Remember
44 * to leave room in memory for the operating system, the BLACS
45 * buffer, etc. For example, on a system with 8 MB of memory
46 * per process (e.g., one processor on an Intel iPSC/860), the
47 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
48 * code, BLACS buffer, etc). However, for PVM, we usually set
49 * TOTMEM = 2000000. Some experimenting with the maximum value
50 * of TOTMEM may be required.
51 *
52 * INTGSZ INTEGER, default = 4 bytes.
53 * DBLESZ INTEGER, default = 8 bytes.
54 * INTGSZ and DBLESZ indicate the length in bytes on the
55 * given platform for an integer and a double precision real.
56 * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
57 *
58 * All arrays used by SCALAPACK routines are allocated from
59 * this array and referenced by pointers. The integer IPA,
60 * for example, is a pointer to the starting element of MEM for
61 * the matrix A.
62 *
63 * =====================================================================
64 *
65 * .. Parameters ..
66  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
67  $ lld_, mb_, m_, nb_, n_, rsrc_
68  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
69  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
70  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
71  INTEGER dblesz, intgsz, memsiz, ntests, totmem
72  DOUBLE PRECISION padval
73  parameter( dblesz = 8, intgsz = 4, totmem = 2000000,
74  $ memsiz = totmem / dblesz, ntests = 20,
75  $ padval = -9923.0d+0 )
76 * ..
77 * .. Local Scalars ..
78  CHARACTER*2 fact
79  CHARACTER*6 passed
80  CHARACTER*7 rout
81  CHARACTER*8 routchk
82  CHARACTER*80 outfile
83  LOGICAL check
84  INTEGER i, iam, iaseed, ictxt, imidpad, info, ipa,
85  $ ipostpad, ippiv, iprepad, iptau, ipw, j, k,
86  $ kfail, kpass, kskip, ktests, l, lipiv, ltau,
87  $ lwork, m, maxmn, mb, minmn, mnp, mnq, mp,
88  $ mycol, myrow, n, nb, nfact, ngrids, nmat, nnb,
89  $ nout, npcol, nprocs, nprow, nq, workfct,
90  $ worksiz
91  REAL thresh
92  DOUBLE PRECISION anorm, fresid, nops, tmflops
93 * ..
94 * .. Arrays ..
95  CHARACTER*2 factor( ntests )
96  INTEGER desca( dlen_ ), ierr( 1 ), mbval( ntests ),
97  $ mval( ntests ), nbval( ntests ),
98  $ nval( ntests ), pval( ntests ), qval( ntests )
99  DOUBLE PRECISION ctime( 1 ), mem( memsiz ), wtime( 1 )
100 * ..
101 * .. External Subroutines ..
102  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
103  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
104  $ blacs_pinfo, descinit, igsum2d, pdchekpad,
111 * ..
112 * .. External Functions ..
113  LOGICAL lsamen
114  INTEGER iceil, numroc
115  DOUBLE PRECISION pdlange
116  EXTERNAL iceil, lsamen, numroc, pdlange
117 * ..
118 * .. Intrinsic Functions ..
119  INTRINSIC dble, max, min
120 * ..
121 * .. Data Statements ..
122  DATA ktests, kpass, kfail, kskip /4*0/
123 * ..
124 * .. Executable Statements ..
125 *
126 * Get starting information
127 *
128  CALL blacs_pinfo( iam, nprocs )
129  iaseed = 100
130  CALL pdqrinfo( outfile, nout, nfact, factor, ntests, nmat, mval,
131  $ ntests, nval, ntests, nnb, mbval, ntests, nbval,
132  $ ntests, ngrids, pval, ntests, qval, ntests,
133  $ thresh, mem, iam, nprocs )
134  check = ( thresh.GE.0.0e+0 )
135 *
136 * Loop over the different factorization types
137 *
138  DO 40 i = 1, nfact
139 *
140  fact = factor( i )
141 *
142 * Print headings
143 *
144  IF( iam.EQ.0 ) THEN
145  WRITE( nout, fmt = * )
146  IF( lsamen( 2, fact, 'QR' ) ) THEN
147  rout = 'PDGEQRF'
148  routchk = 'PDGEQRRV'
149  WRITE( nout, fmt = 9986 )
150  $ 'QR factorization tests.'
151  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
152  rout = 'PDGEQLF'
153  routchk = 'PDGEQLRV'
154  WRITE( nout, fmt = 9986 )
155  $ 'QL factorization tests.'
156  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
157  rout = 'PDGELQF'
158  routchk = 'PDGELQRV'
159  WRITE( nout, fmt = 9986 )
160  $ 'LQ factorization tests.'
161  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
162  rout = 'PDGERQF'
163  routchk = 'PDGERQRV'
164  WRITE( nout, fmt = 9986 )
165  $ 'RQ factorization tests.'
166  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
167  rout = 'PDGEQPF'
168  routchk = 'PDGEQRRV'
169  WRITE( nout, fmt = 9986 )
170  $ 'QR factorization with column pivoting tests.'
171  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
172  rout = 'PDTZRZF'
173  routchk = 'PDTZRZRV'
174  WRITE( nout, fmt = 9986 )
175  $ 'Complete orthogonal factorization tests.'
176  END IF
177  WRITE( nout, fmt = * )
178  WRITE( nout, fmt = 9995 )
179  WRITE( nout, fmt = 9994 )
180  WRITE( nout, fmt = * )
181  END IF
182 *
183 * Loop over different process grids
184 *
185  DO 30 j = 1, ngrids
186 *
187  nprow = pval( j )
188  npcol = qval( j )
189 *
190 * Make sure grid information is correct
191 *
192  ierr( 1 ) = 0
193  IF( nprow.LT.1 ) THEN
194  IF( iam.EQ.0 )
195  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
196  ierr( 1 ) = 1
197  ELSE IF( npcol.LT.1 ) THEN
198  IF( iam.EQ.0 )
199  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
200  ierr( 1 ) = 1
201  ELSE IF( nprow*npcol.GT.nprocs ) THEN
202  IF( iam.EQ.0 )
203  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
204  ierr( 1 ) = 1
205  END IF
206 *
207  IF( ierr( 1 ).GT.0 ) THEN
208  IF( iam.EQ.0 )
209  $ WRITE( nout, fmt = 9997 ) 'grid'
210  kskip = kskip + 1
211  GO TO 30
212  END IF
213 *
214 * Define process grid
215 *
216  CALL blacs_get( -1, 0, ictxt )
217  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
218  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
219 *
220 * Go to bottom of loop if this case doesn't use my process
221 *
222  IF( myrow.GE.nprow .OR. mycol.GE.npcol )
223  $ GO TO 30
224 *
225  DO 20 k = 1, nmat
226 *
227  m = mval( k )
228  n = nval( k )
229 *
230 * Make sure matrix information is correct
231 *
232  ierr(1) = 0
233  IF( m.LT.1 ) THEN
234  IF( iam.EQ.0 )
235  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
236  ierr( 1 ) = 1
237  ELSE IF( n.LT.1 ) THEN
238  IF( iam.EQ.0 )
239  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
240  ierr( 1 ) = 1
241  END IF
242 *
243 * Make sure no one had error
244 *
245  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
246 *
247  IF( ierr( 1 ).GT.0 ) THEN
248  IF( iam.EQ.0 )
249  $ WRITE( nout, fmt = 9997 ) 'matrix'
250  kskip = kskip + 1
251  GO TO 20
252  END IF
253 *
254 * Loop over different blocking sizes
255 *
256  DO 10 l = 1, nnb
257 *
258  mb = mbval( l )
259  nb = nbval( l )
260 *
261 * Make sure mb is legal
262 *
263  ierr( 1 ) = 0
264  IF( mb.LT.1 ) THEN
265  ierr( 1 ) = 1
266  IF( iam.EQ.0 )
267  $ WRITE( nout, fmt = 9999 ) 'MB', 'MB', mb
268  END IF
269 *
270 * Check all processes for an error
271 *
272  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
273  $ 0 )
274 *
275  IF( ierr( 1 ).GT.0 ) THEN
276  IF( iam.EQ.0 )
277  $ WRITE( nout, fmt = 9997 ) 'MB'
278  kskip = kskip + 1
279  GO TO 10
280  END IF
281 *
282 * Make sure nb is legal
283 *
284  ierr( 1 ) = 0
285  IF( nb.LT.1 ) THEN
286  ierr( 1 ) = 1
287  IF( iam.EQ.0 )
288  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
289  END IF
290 *
291 * Check all processes for an error
292 *
293  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
294  $ 0 )
295 *
296  IF( ierr( 1 ).GT.0 ) THEN
297  IF( iam.EQ.0 )
298  $ WRITE( nout, fmt = 9997 ) 'NB'
299  kskip = kskip + 1
300  GO TO 10
301  END IF
302 *
303 * Padding constants
304 *
305  mp = numroc( m, mb, myrow, 0, nprow )
306  nq = numroc( n, nb, mycol, 0, npcol )
307  mnp = numroc( min( m, n ), mb, myrow, 0, nprow )
308  mnq = numroc( min( m, n ), nb, mycol, 0, npcol )
309  IF( check ) THEN
310  iprepad = max( mb, mp )
311  imidpad = nb
312  ipostpad = max( nb, nq )
313  ELSE
314  iprepad = 0
315  imidpad = 0
316  ipostpad = 0
317  END IF
318 *
319 * Initialize the array descriptor for the matrix A
320 *
321  CALL descinit( desca, m, n, mb, nb, 0, 0, ictxt,
322  $ max( 1, mp ) + imidpad, ierr( 1 ) )
323 *
324 * Check all processes for an error
325 *
326  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
327  $ 0 )
328 *
329  IF( ierr( 1 ).LT.0 ) THEN
330  IF( iam.EQ.0 )
331  $ WRITE( nout, fmt = 9997 ) 'descriptor'
332  kskip = kskip + 1
333  GO TO 10
334  END IF
335 *
336 * Assign pointers into MEM for ScaLAPACK arrays, A is
337 * allocated starting at position MEM( IPREPAD+1 )
338 *
339  ipa = iprepad+1
340  iptau = ipa + desca( lld_ ) * nq + ipostpad + iprepad
341 *
342  IF( lsamen( 2, fact, 'QR' ) ) THEN
343 *
344  ltau = mnq
345  ipw = iptau + ltau + ipostpad + iprepad
346 *
347 * Figure the amount of workspace required by the QR
348 * factorization
349 *
350  lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )
351  workfct = lwork + ipostpad
352  worksiz = workfct
353 *
354  IF( check ) THEN
355 *
356 * Figure the amount of workspace required by the
357 * checking routines PDLAFCHK, PDGEQRRV and
358 * PDLANGE
359 *
360  worksiz = lwork + mp*desca( nb_ ) + ipostpad
361 *
362  END IF
363 *
364  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
365 *
366  ltau = nq
367  ipw = iptau + ltau + ipostpad + iprepad
368 *
369 * Figure the amount of workspace required by the QL
370 * factorization
371 *
372  lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )
373  workfct = lwork + ipostpad
374  worksiz = workfct
375 *
376  IF( check ) THEN
377 *
378 * Figure the amount of workspace required by the
379 * checking routines PDLAFCHK, PDGEQLRV and
380 * PDLANGE
381 *
382  worksiz = lwork + mp*desca( nb_ ) + ipostpad
383 *
384  END IF
385 *
386  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
387 *
388  ltau = mnp
389  ipw = iptau + ltau + ipostpad + iprepad
390 *
391 * Figure the amount of workspace required by the LQ
392 * factorization
393 *
394  lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
395  workfct = lwork + ipostpad
396  worksiz = workfct
397 *
398  IF( check ) THEN
399 *
400 * Figure the amount of workspace required by the
401 * checking routines PDLAFCHK, PDGELQRV and
402 * PDLANGE
403 *
404  worksiz = lwork +
405  $ max( mp*desca( nb_ ), nq*desca( mb_ )
406  $ ) + ipostpad
407 *
408  END IF
409 *
410  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
411 *
412  ltau = mp
413  ipw = iptau + ltau + ipostpad + iprepad
414 *
415 * Figure the amount of workspace required by the QR
416 * factorization
417 *
418  lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
419  workfct = lwork + ipostpad
420  worksiz = workfct
421 *
422  IF( check ) THEN
423 *
424 * Figure the amount of workspace required by the
425 * checking routines PDLAFCHK, PDGERQRV and
426 * PDLANGE
427 *
428  worksiz = lwork +
429  $ max( mp*desca( nb_ ), nq*desca( mb_ )
430  $ ) + ipostpad
431 *
432  END IF
433 *
434  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
435 *
436  ltau = mnq
437  ippiv = iptau + ltau + ipostpad + iprepad
438  lipiv = iceil( intgsz*nq, dblesz )
439  ipw = ippiv + lipiv + ipostpad + iprepad
440 *
441 * Figure the amount of workspace required by the
442 * factorization i.e from IPW on.
443 *
444  lwork = max( 3, mp + max( 1, nq ) ) + 2 * nq
445  workfct = lwork + ipostpad
446  worksiz = workfct
447 *
448  IF( check ) THEN
449 *
450 * Figure the amount of workspace required by the
451 * checking routines PDLAFCHK, PDGEQRRV,
452 * PDLANGE.
453 *
454  worksiz = max( worksiz - ipostpad,
455  $ desca( nb_ )*( 2*mp + nq + desca( nb_ ) ) ) +
456  $ ipostpad
457  END IF
458 *
459  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
460 *
461  ltau = mp
462  ipw = iptau + ltau + ipostpad + iprepad
463 *
464 * Figure the amount of workspace required by the TZ
465 * factorization
466 *
467  lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
468  workfct = lwork + ipostpad
469  worksiz = workfct
470 *
471  IF( check ) THEN
472 *
473 * Figure the amount of workspace required by the
474 * checking routines PDLAFCHK, PDTZRZRV and
475 * PDLANGE
476 *
477  worksiz = lwork +
478  $ max( mp*desca( nb_ ), nq*desca( mb_ )
479  $ ) + ipostpad
480 *
481  END IF
482 *
483  END IF
484 *
485 * Check for adequate memory for problem size
486 *
487  ierr( 1 ) = 0
488  IF( ipw+worksiz.GT.memsiz ) THEN
489  IF( iam.EQ.0 )
490  $ WRITE( nout, fmt = 9996 )
491  $ fact // ' factorization',
492  $ ( ipw+worksiz )*dblesz
493  ierr( 1 ) = 1
494  END IF
495 *
496 * Check all processes for an error
497 *
498  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
499  $ 0 )
500 *
501  IF( ierr( 1 ).GT.0 ) THEN
502  IF( iam.EQ.0 )
503  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
504  kskip = kskip + 1
505  GO TO 10
506  END IF
507 *
508 * Generate the matrix A
509 *
510  CALL pdmatgen( ictxt, 'N', 'N', desca( m_ ),
511  $ desca( n_ ), desca( mb_ ),
512  $ desca( nb_ ), mem( ipa ),
513  $ desca( lld_ ), desca( rsrc_ ),
514  $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
515  $ myrow, mycol, nprow, npcol )
516 *
517 * Need the Infinity of A for checking
518 *
519  IF( check ) THEN
520  CALL pdfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
521  $ desca( lld_ ), iprepad, ipostpad,
522  $ padval )
523  IF( lsamen( 2, fact, 'QP' ) ) THEN
524  CALL pdfillpad( ictxt, lipiv, 1,
525  $ mem( ippiv-iprepad ), lipiv,
526  $ iprepad, ipostpad, padval )
527  END IF
528  CALL pdfillpad( ictxt, ltau, 1,
529  $ mem( iptau-iprepad ), ltau,
530  $ iprepad, ipostpad, padval )
531  CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
532  $ mem( ipw-iprepad ),
533  $ worksiz-ipostpad,
534  $ iprepad, ipostpad, padval )
535  anorm = pdlange( 'I', m, n, mem( ipa ), 1, 1,
536  $ desca, mem( ipw ) )
537  CALL pdchekpad( ictxt, 'PDLANGE', mp, nq,
538  $ mem( ipa-iprepad ), desca( lld_ ),
539  $ iprepad, ipostpad, padval )
540  CALL pdchekpad( ictxt, 'PDLANGE',
541  $ worksiz-ipostpad, 1,
542  $ mem( ipw-iprepad ),
543  $ worksiz-ipostpad, iprepad,
544  $ ipostpad, padval )
545  CALL pdfillpad( ictxt, workfct-ipostpad, 1,
546  $ mem( ipw-iprepad ),
547  $ workfct-ipostpad,
548  $ iprepad, ipostpad, padval )
549  END IF
550 *
551  CALL slboot()
552  CALL blacs_barrier( ictxt, 'All' )
553 *
554 * Perform QR factorizations
555 *
556  IF( lsamen( 2, fact, 'QR' ) ) THEN
557  CALL sltimer( 1 )
558  CALL pdgeqrf( m, n, mem( ipa ), 1, 1, desca,
559  $ mem( iptau ), mem( ipw ), lwork,
560  $ info )
561  CALL sltimer( 1 )
562  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
563  CALL sltimer( 1 )
564  CALL pdgeqlf( m, n, mem( ipa ), 1, 1, desca,
565  $ mem( iptau ), mem( ipw ), lwork,
566  $ info )
567  CALL sltimer( 1 )
568  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
569  CALL sltimer( 1 )
570  CALL pdgelqf( m, n, mem( ipa ), 1, 1, desca,
571  $ mem( iptau ), mem( ipw ), lwork,
572  $ info )
573  CALL sltimer( 1 )
574  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
575  CALL sltimer( 1 )
576  CALL pdgerqf( m, n, mem( ipa ), 1, 1, desca,
577  $ mem( iptau ), mem( ipw ), lwork,
578  $ info )
579  CALL sltimer( 1 )
580  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
581  CALL sltimer( 1 )
582  CALL pdgeqpf( m, n, mem( ipa ), 1, 1, desca,
583  $ mem( ippiv ), mem( iptau ),
584  $ mem( ipw ), lwork, info )
585  CALL sltimer( 1 )
586  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
587  CALL sltimer( 1 )
588  IF( n.GE.m )
589  $ CALL pdtzrzf( m, n, mem( ipa ), 1, 1, desca,
590  $ mem( iptau ), mem( ipw ), lwork,
591  $ info )
592  CALL sltimer( 1 )
593  END IF
594 *
595  IF( check ) THEN
596 *
597 * Check for memory overwrite in factorization
598 *
599  CALL pdchekpad( ictxt, rout, mp, nq,
600  $ mem( ipa-iprepad ), desca( lld_ ),
601  $ iprepad, ipostpad, padval )
602  CALL pdchekpad( ictxt, rout, ltau, 1,
603  $ mem( iptau-iprepad ), ltau,
604  $ iprepad, ipostpad, padval )
605  IF( lsamen( 2, fact, 'QP' ) ) THEN
606  CALL pdchekpad( ictxt, rout, lipiv, 1,
607  $ mem( ippiv-iprepad ), lipiv,
608  $ iprepad, ipostpad, padval )
609  END IF
610  CALL pdchekpad( ictxt, rout, workfct-ipostpad, 1,
611  $ mem( ipw-iprepad ),
612  $ workfct-ipostpad, iprepad,
613  $ ipostpad, padval )
614  CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
615  $ mem( ipw-iprepad ),
616  $ worksiz-ipostpad,
617  $ iprepad, ipostpad, padval )
618 *
619  IF( lsamen( 2, fact, 'QR' ) ) THEN
620 *
621 * Compute residual = ||A-Q*R|| / (||A||*N*eps)
622 *
623  CALL pdgeqrrv( m, n, mem( ipa ), 1, 1, desca,
624  $ mem( iptau ), mem( ipw ) )
625  CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
626  $ 1, desca, iaseed, anorm, fresid,
627  $ mem( ipw ) )
628  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
629 *
630 * Compute residual = ||A-Q*L|| / (||A||*N*eps)
631 *
632  CALL pdgeqlrv( m, n, mem( ipa ), 1, 1, desca,
633  $ mem( iptau ), mem( ipw ) )
634  CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
635  $ 1, desca, iaseed, anorm, fresid,
636  $ mem( ipw ) )
637  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
638 *
639 * Compute residual = ||A-L*Q|| / (||A||*N*eps)
640 *
641  CALL pdgelqrv( m, n, mem( ipa ), 1, 1, desca,
642  $ mem( iptau ), mem( ipw ) )
643  CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
644  $ 1, desca, iaseed, anorm, fresid,
645  $ mem( ipw ) )
646  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
647 *
648 * Compute residual = ||A-R*Q|| / (||A||*N*eps)
649 *
650  CALL pdgerqrv( m, n, mem( ipa ), 1, 1, desca,
651  $ mem( iptau ), mem( ipw ) )
652  CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
653  $ 1, desca, iaseed, anorm, fresid,
654  $ mem( ipw ) )
655  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
656 *
657 * Compute residual = ||AP-Q*R|| / (||A||*N*eps)
658 *
659  CALL pdgeqrrv( m, n, mem( ipa ), 1, 1, desca,
660  $ mem( iptau ), mem( ipw ) )
661  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
662 *
663 * Compute residual = ||A-T*Z|| / (||A||*N*eps)
664 *
665  IF( n.GE.m ) THEN
666  CALL pdtzrzrv( m, n, mem( ipa ), 1, 1, desca,
667  $ mem( iptau ), mem( ipw ) )
668  END IF
669  CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
670  $ 1, desca, iaseed, anorm, fresid,
671  $ mem( ipw ) )
672  END IF
673 *
674 * Check for memory overwrite
675 *
676  CALL pdchekpad( ictxt, routchk, mp, nq,
677  $ mem( ipa-iprepad ), desca( lld_ ),
678  $ iprepad, ipostpad, padval )
679  CALL pdchekpad( ictxt, routchk, ltau, 1,
680  $ mem( iptau-iprepad ), ltau,
681  $ iprepad, ipostpad, padval )
682  CALL pdchekpad( ictxt, routchk, worksiz-ipostpad,
683  $ 1, mem( ipw-iprepad ),
684  $ worksiz-ipostpad, iprepad,
685  $ ipostpad, padval )
686 *
687  IF( lsamen( 2, fact, 'QP' ) ) THEN
688 *
689  CALL pdqppiv( m, n, mem( ipa ), 1, 1, desca,
690  $ mem( ippiv ) )
691 *
692 * Check for memory overwrite
693 *
694  CALL pdchekpad( ictxt, 'PDQPPIV', mp, nq,
695  $ mem( ipa-iprepad ),
696  $ desca( lld_ ),
697  $ iprepad, ipostpad, padval )
698  CALL pdchekpad( ictxt, 'PDQPPIV', lipiv, 1,
699  $ mem( ippiv-iprepad ), lipiv,
700  $ iprepad, ipostpad, padval )
701 *
702  CALL pdlafchk( 'No', 'No', m, n, mem( ipa ), 1,
703  $ 1, desca, iaseed, anorm, fresid,
704  $ mem( ipw ) )
705 *
706 * Check for memory overwrite
707 *
708  CALL pdchekpad( ictxt, 'PDLAFCHK', mp, nq,
709  $ mem( ipa-iprepad ),
710  $ desca( lld_ ),
711  $ iprepad, ipostpad, padval )
712  CALL pdchekpad( ictxt, 'PDLAFCHK',
713  $ worksiz-ipostpad, 1,
714  $ mem( ipw-iprepad ),
715  $ worksiz-ipostpad, iprepad,
716  $ ipostpad, padval )
717  END IF
718 *
719 * Test residual and detect NaN result
720 *
721  IF( lsamen( 2, fact, 'TZ' ) .AND. n.LT.m ) THEN
722  kskip = kskip + 1
723  passed = 'BYPASS'
724  ELSE
725  IF( fresid.LE.thresh .AND.
726  $ (fresid-fresid).EQ.0.0d+0 ) THEN
727  kpass = kpass + 1
728  passed = 'PASSED'
729  ELSE
730  kfail = kfail + 1
731  passed = 'FAILED'
732  END IF
733  END IF
734 *
735  ELSE
736 *
737 * Don't perform the checking, only timing
738 *
739  kpass = kpass + 1
740  fresid = fresid - fresid
741  passed = 'BYPASS'
742 *
743  END IF
744 *
745 * Gather maximum of all CPU and WALL clock timings
746 *
747  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1, wtime )
748  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1, ctime )
749 *
750 * Print results
751 *
752  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
753 *
754  minmn = min( m, n )
755  maxmn = max( m, n )
756 *
757  IF( lsamen( 2, fact, 'TZ' ) ) THEN
758  IF( m.GE.n ) THEN
759  nops = 0.0d+0
760  ELSE
761 *
762 * 5/2 ( M^2 N - M^3 ) + 5/2 N M + 1/2 M^2 for
763 * complete orthogonal factorization (M <= N).
764 *
765  nops = ( 5.0d+0 * (
766  $ dble( n )*( dble( m )**2 ) -
767  $ dble( m )**3 +
768  $ dble( n )*dble( m ) ) +
769  $ dble( m )**2 ) / 2.0d+0
770  END IF
771 *
772  ELSE
773 *
774 * 2 M N^2 - 2/3 N^2 + M N + N^2 for QR type
775 * factorization when M >= N.
776 *
777  nops = 2.0d+0 * ( dble( minmn )**2 ) *
778  $ ( dble( maxmn )-dble( minmn ) / 3.0d+0 ) +
779  $ ( dble( maxmn )+dble( minmn ) )*dble( minmn )
780  END IF
781 *
782 * Print WALL time
783 *
784  IF( wtime( 1 ).GT.0.0d+0 ) THEN
785  tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
786  ELSE
787  tmflops = 0.0d+0
788  END IF
789  IF( wtime( 1 ).GE.0.0d+0 )
790  $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, mb, nb,
791  $ nprow, npcol, wtime( 1 ), tmflops,
792  $ passed, fresid
793 *
794 * Print CPU time
795 *
796  IF( ctime( 1 ).GT.0.0d+0 ) THEN
797  tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
798  ELSE
799  tmflops = 0.0d+0
800  END IF
801  IF( ctime( 1 ).GE.0.0d+0 )
802  $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, mb, nb,
803  $ nprow, npcol, ctime( 1 ), tmflops,
804  $ passed, fresid
805 *
806  END IF
807 *
808  10 CONTINUE
809 *
810  20 CONTINUE
811 *
812  CALL blacs_gridexit( ictxt )
813 *
814  30 CONTINUE
815 *
816  40 CONTINUE
817 *
818 * Print out ending messages and close output file
819 *
820  IF( iam.EQ.0 ) THEN
821  ktests = kpass + kfail + kskip
822  WRITE( nout, fmt = * )
823  WRITE( nout, fmt = 9992 ) ktests
824  IF( check ) THEN
825  WRITE( nout, fmt = 9991 ) kpass
826  WRITE( nout, fmt = 9989 ) kfail
827  ELSE
828  WRITE( nout, fmt = 9990 ) kpass
829  END IF
830  WRITE( nout, fmt = 9988 ) kskip
831  WRITE( nout, fmt = * )
832  WRITE( nout, fmt = * )
833  WRITE( nout, fmt = 9987 )
834  IF( nout.NE.6 .AND. nout.NE.0 )
835  $ CLOSE ( nout )
836  END IF
837 *
838  CALL blacs_exit( 0 )
839 *
840  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
841  $ '; It should be at least 1' )
842  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
843  $ i4 )
844  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
845  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
846  $ i11 )
847  9995 FORMAT( 'TIME M N MB NB P Q Fact Time ',
848  $ ' MFLOPS CHECK Residual' )
849  9994 FORMAT( '---- ------ ------ --- --- ----- ----- --------- ',
850  $ '----------- ------ --------' )
851  9993 FORMAT( a4, 1x, i6, 1x, i6, 1x, i3, 1x, i3, 1x, i5, 1x, i5, 1x,
852  $ f9.2, 1x, f11.2, 1x, a6, 2x, g8.1 )
853  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
854  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
855  9990 FORMAT( i5, ' tests completed without checking.' )
856  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
857  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
858  9987 FORMAT( 'END OF TESTS.' )
859  9986 FORMAT( a )
860 *
861  stop
862 *
863 * End of PDQRDRIVER
864 *
865  END
866 *
867  SUBROUTINE pdqppiv( M, N, A, IA, JA, DESCA, IPIV )
868 *
869 * -- ScaLAPACK routine (version 1.7) --
870 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
871 * and University of California, Berkeley.
872 * May 1, 1997
873 *
874 * .. Scalar Arguments ..
875  INTEGER IA, JA, M, N
876 * ..
877 * .. Array Arguments ..
878  INTEGER DESCA( * ), IPIV( * )
879  DOUBLE PRECISION A( * )
880 * ..
881 *
882 * Purpose
883 * =======
884 *
885 * PDQPPIV applies to sub( A ) = A(IA:IA+M-1,JA:JA+N-1) the pivots
886 * returned by PDGEQPF in reverse order for checking purposes.
887 *
888 * Notes
889 * =====
890 *
891 * Each global data object is described by an associated description
892 * vector. This vector stores the information required to establish
893 * the mapping between an object element and its corresponding process
894 * and memory location.
895 *
896 * Let A be a generic term for any 2D block cyclicly distributed array.
897 * Such a global array has an associated description vector DESCA.
898 * In the following comments, the character _ should be read as
899 * "of the global array".
900 *
901 * NOTATION STORED IN EXPLANATION
902 * --------------- -------------- --------------------------------------
903 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
904 * DTYPE_A = 1.
905 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
906 * the BLACS process grid A is distribu-
907 * ted over. The context itself is glo-
908 * bal, but the handle (the integer
909 * value) may vary.
910 * M_A (global) DESCA( M_ ) The number of rows in the global
911 * array A.
912 * N_A (global) DESCA( N_ ) The number of columns in the global
913 * array A.
914 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
915 * the rows of the array.
916 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
917 * the columns of the array.
918 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
919 * row of the array A is distributed.
920 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
921 * first column of the array A is
922 * distributed.
923 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
924 * array. LLD_A >= MAX(1,LOCr(M_A)).
925 *
926 * Let K be the number of rows or columns of a distributed matrix,
927 * and assume that its process grid has dimension p x q.
928 * LOCr( K ) denotes the number of elements of K that a process
929 * would receive if K were distributed over the p processes of its
930 * process column.
931 * Similarly, LOCc( K ) denotes the number of elements of K that a
932 * process would receive if K were distributed over the q processes of
933 * its process row.
934 * The values of LOCr() and LOCc() may be determined via a call to the
935 * ScaLAPACK tool function, NUMROC:
936 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
937 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
938 * An upper bound for these quantities may be computed by:
939 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
940 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
941 *
942 * Arguments
943 * =========
944 *
945 * M (global input) INTEGER
946 * The number of rows to be operated on, i.e. the number of rows
947 * of the distributed submatrix sub( A ). M >= 0.
948 *
949 * N (global input) INTEGER
950 * The number of columns to be operated on, i.e. the number of
951 * columns of the distributed submatrix sub( A ). N >= 0.
952 *
953 * A (local input/local output) DOUBLE PRECISION pointer into the
954 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
955 * On entry, the local pieces of the M-by-N distributed matrix
956 * sub( A ) which is to be permuted. On exit, the local pieces
957 * of the distributed permuted submatrix sub( A ) * Inv( P ).
958 *
959 * IA (global input) INTEGER
960 * The row index in the global array A indicating the first
961 * row of sub( A ).
962 *
963 * JA (global input) INTEGER
964 * The column index in the global array A indicating the
965 * first column of sub( A ).
966 *
967 * DESCA (global and local input) INTEGER array of dimension DLEN_.
968 * The array descriptor for the distributed matrix A.
969 *
970 * IPIV (local input) INTEGER array, dimension LOCc(JA+N-1).
971 * On exit, if IPIV(I) = K, the local i-th column of sub( A )*P
972 * was the global K-th column of sub( A ). IPIV is tied to the
973 * distributed matrix A.
974 *
975 * =====================================================================
976 *
977 * .. Parameters ..
978  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
979  $ LLD_, MB_, M_, NB_, N_, RSRC_
980  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
981  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
982  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
983 * ..
984 * .. Local Scalars ..
985  INTEGER IACOL, ICOFFA, ICTXT, IITMP, IPVT, IPCOL,
986  $ IPROW, ITMP, J, JJ, JJA, KK, MYCOL, MYROW,
987  $ NPCOL, NPROW, NQ
988 * ..
989 * .. External Subroutines ..
990  EXTERNAL blacs_gridinfo, igebr2d, igebs2d, igerv2d,
991  $ igesd2d, igamn2d, infog1l, pdswap
992 * ..
993 * .. External Functions ..
994  INTEGER INDXL2G, NUMROC
995  EXTERNAL indxl2g, numroc
996 * ..
997 * .. Intrinsic Functions ..
998  INTRINSIC min, mod
999 * ..
1000 * .. Executable Statements ..
1001 *
1002 * Get grid parameters
1003 *
1004  ictxt = desca( ctxt_ )
1005  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
1006  CALL infog1l( ja, desca( nb_ ), npcol, mycol, desca( csrc_ ), jja,
1007  $ iacol )
1008  icoffa = mod( ja-1, desca( nb_ ) )
1009  nq = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
1010  IF( mycol.EQ.iacol )
1011  $ nq = nq - icoffa
1012 *
1013  DO 20 j = ja, ja+n-2
1014 *
1015  ipvt = ja+n-1
1016  itmp = ja+n
1017 *
1018 * Find first the local minimum candidate for pivoting
1019 *
1020  CALL infog1l( j, desca( nb_ ), npcol, mycol, desca( csrc_ ),
1021  $ jj, iacol )
1022  DO 10 kk = jj, jja+nq-1
1023  IF( ipiv( kk ).LT.ipvt )THEN
1024  iitmp = kk
1025  ipvt = ipiv( kk )
1026  END IF
1027  10 CONTINUE
1028 *
1029 * Find the global minimum pivot
1030 *
1031  CALL igamn2d( ictxt, 'Rowwise', ' ', 1, 1, ipvt, 1, iprow,
1032  $ ipcol, 1, -1, mycol )
1033 *
1034 * Broadcast the corresponding index to the other process columns
1035 *
1036  IF( mycol.EQ.ipcol ) THEN
1037  itmp = indxl2g( iitmp, desca( nb_ ), mycol, desca( csrc_ ),
1038  $ npcol )
1039  CALL igebs2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1 )
1040  IF( ipcol.NE.iacol ) THEN
1041  CALL igerv2d( ictxt, 1, 1, ipiv( iitmp ), 1, myrow,
1042  $ iacol )
1043  ELSE
1044  IF( mycol.EQ.iacol )
1045  $ ipiv( iitmp ) = ipiv( jj )
1046  END IF
1047  ELSE
1048  CALL igebr2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1, myrow,
1049  $ ipcol )
1050  IF( mycol.EQ.iacol .AND. ipcol.NE.iacol )
1051  $ CALL igesd2d( ictxt, 1, 1, ipiv( jj ), 1, myrow, ipcol )
1052  END IF
1053 *
1054 * Swap the columns of A
1055 *
1056  CALL pdswap( m, a, ia, itmp, desca, 1, a, ia, j, desca, 1 )
1057 *
1058  20 CONTINUE
1059 *
1060 * End of PDQPPIV
1061 *
1062  END
max
#define max(A, B)
Definition: pcgemr.c:180
pdqrinfo
subroutine pdqrinfo(SUMMRY, NOUT, NFACT, FACTOR, LDFACT, NMAT, MVAL, LDMVAL, NVAL, LDNVAL, NNB, MBVAL, LDMBVAL, NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pdqrinfo.f:6
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
pdgerqf
subroutine pdgerqf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pdgerqf.f:3
pdchekpad
subroutine pdchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdchekpad.f:3
pdqrdriver
program pdqrdriver
Definition: pdqrdriver.f:1
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
pdgerqrv
subroutine pdgerqrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: pdgerqrv.f:2
pdqppiv
subroutine pdqppiv(M, N, A, IA, JA, DESCA, IPIV)
Definition: pdqrdriver.f:868
pdgeqlf
subroutine pdgeqlf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pdgeqlf.f:3
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pdmatgen
subroutine pdmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pdmatgen.f:4
pdgeqlrv
subroutine pdgeqlrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: pdgeqlrv.f:2
pdlange
double precision function pdlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pdlange.f:3
pdlafchk
subroutine pdlafchk(AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, WORK)
Definition: pdlafchk.f:3
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
lsamen
logical function lsamen(N, CA, CB)
Definition: pblastst.f:1457
pdtzrzrv
subroutine pdtzrzrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: pdtzrzrv.f:2
pdfillpad
subroutine pdfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdfillpad.f:2
pdgeqrf
subroutine pdgeqrf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pdgeqrf.f:3
pdgelqrv
subroutine pdgelqrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: pdgelqrv.f:2
pdgeqpf
subroutine pdgeqpf(M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, LWORK, INFO)
Definition: pdgeqpf.f:3
pdgeqrrv
subroutine pdgeqrrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: pdgeqrrv.f:2
pdgelqf
subroutine pdgelqf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pdgelqf.f:3
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
pdtzrzf
subroutine pdtzrzf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pdtzrzf.f:3
min
#define min(A, B)
Definition: pcgemr.c:181
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2