ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
psqrdriver.f
Go to the documentation of this file.
1  PROGRAM psqrdriver
2 *
3 * -- ScaLAPACK testing driver (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 28, 2001
7 *
8 * Purpose
9 * =======
10 *
11 * PSQRDRIVER is the main test program for the REAL
12 * SCALAPACK QR factorization routines. This test driver performs a QR
13 * QL, LQ, RQ, QP (QR factorization with column pivoting) or TZ
14 * (complete orthogonal factorization) factorization and checks the
15 * results.
16 *
17 * The program must be driven by a short data file. An annotated
18 * example of a data file can be obtained by deleting the first 3
19 * characters from the following 16 lines:
20 * 'ScaLAPACK QR factorizations input file'
21 * 'PVM machine'
22 * 'QR.out' output file name (if any)
23 * 6 device out
24 * 6 number of factorizations
25 * 'QR' 'QL' 'LQ' 'RQ' 'QP' 'TZ' factorization: QR, QL, LQ, RQ, QP, TZ
26 * 4 number of problems sizes
27 * 55 17 31 201 values of M
28 * 5 71 31 201 values of N
29 * 3 number of MB's and NB's
30 * 4 3 5 values of MB
31 * 4 7 3 values of NB
32 * 7 number of process grids (ordered P & Q)
33 * 1 2 1 4 2 3 8 values of P
34 * 7 2 4 1 3 2 1 values of Q
35 * 1.0 threshold
36 *
37 * Internal Parameters
38 * ===================
39 *
40 * TOTMEM INTEGER, default = 2000000
41 * TOTMEM is a machine-specific parameter indicating the
42 * maximum amount of available memory in bytes.
43 * The user should customize TOTMEM to his platform. Remember
44 * to leave room in memory for the operating system, the BLACS
45 * buffer, etc. For example, on a system with 8 MB of memory
46 * per process (e.g., one processor on an Intel iPSC/860), the
47 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
48 * code, BLACS buffer, etc). However, for PVM, we usually set
49 * TOTMEM = 2000000. Some experimenting with the maximum value
50 * of TOTMEM may be required.
51 *
52 * INTGSZ INTEGER, default = 4 bytes.
53 * REALSZ INTEGER, default = 4 bytes.
54 * INTGSZ and REALSZ indicate the length in bytes on the
55 * given platform for an integer and a single precision real.
56 * MEM REAL array, dimension ( TOTMEM / REALSZ )
57 *
58 * All arrays used by SCALAPACK routines are allocated from
59 * this array and referenced by pointers. The integer IPA,
60 * for example, is a pointer to the starting element of MEM for
61 * the matrix A.
62 *
63 * =====================================================================
64 *
65 * .. Parameters ..
66  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
67  $ lld_, mb_, m_, nb_, n_, rsrc_
68  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
69  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
70  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
71  INTEGER intgsz, memsiz, ntests, realsz, totmem
72  REAL padval
73  parameter( intgsz = 4, realsz = 4, totmem = 2000000,
74  $ memsiz = totmem / realsz, ntests = 20,
75  $ padval = -9923.0e+0 )
76 * ..
77 * .. Local Scalars ..
78  CHARACTER*2 fact
79  CHARACTER*6 passed
80  CHARACTER*7 rout
81  CHARACTER*8 routchk
82  CHARACTER*80 outfile
83  LOGICAL check
84  INTEGER i, iam, iaseed, ictxt, imidpad, info, ipa,
85  $ ipostpad, ippiv, iprepad, iptau, ipw, j, k,
86  $ kfail, kpass, kskip, ktests, l, lipiv, ltau,
87  $ lwork, m, maxmn, mb, minmn, mnp, mnq, mp,
88  $ mycol, myrow, n, nb, nfact, ngrids, nmat, nnb,
89  $ nout, npcol, nprocs, nprow, nq, workfct,
90  $ worksiz
91  REAL anorm, fresid, thresh
92  DOUBLE PRECISION nops, tmflops
93 * ..
94 * .. Arrays ..
95  CHARACTER*2 factor( ntests )
96  INTEGER desca( dlen_ ), ierr( 1 ), mbval( ntests ),
97  $ mval( ntests ), nbval( ntests ),
98  $ nval( ntests ), pval( ntests ), qval( ntests )
99  REAL mem( memsiz )
100  DOUBLE PRECISION ctime( 1 ), wtime( 1 )
101 * ..
102 * .. External Subroutines ..
103  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
104  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
105  $ blacs_pinfo, descinit, igsum2d, pschekpad,
112 * ..
113 * .. External Functions ..
114  LOGICAL lsamen
115  INTEGER iceil, numroc
116  REAL pslange
117  EXTERNAL iceil, lsamen, numroc, pslange
118 * ..
119 * .. Intrinsic Functions ..
120  INTRINSIC dble, max, min
121 * ..
122 * .. Data Statements ..
123  DATA ktests, kpass, kfail, kskip /4*0/
124 * ..
125 * .. Executable Statements ..
126 *
127 * Get starting information
128 *
129  CALL blacs_pinfo( iam, nprocs )
130  iaseed = 100
131  CALL psqrinfo( outfile, nout, nfact, factor, ntests, nmat, mval,
132  $ ntests, nval, ntests, nnb, mbval, ntests, nbval,
133  $ ntests, ngrids, pval, ntests, qval, ntests,
134  $ thresh, mem, iam, nprocs )
135  check = ( thresh.GE.0.0e+0 )
136 *
137 * Loop over the different factorization types
138 *
139  DO 40 i = 1, nfact
140 *
141  fact = factor( i )
142 *
143 * Print headings
144 *
145  IF( iam.EQ.0 ) THEN
146  WRITE( nout, fmt = * )
147  IF( lsamen( 2, fact, 'QR' ) ) THEN
148  rout = 'PSGEQRF'
149  routchk = 'PSGEQRRV'
150  WRITE( nout, fmt = 9986 )
151  $ 'QR factorization tests.'
152  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
153  rout = 'PSGEQLF'
154  routchk = 'PSGEQLRV'
155  WRITE( nout, fmt = 9986 )
156  $ 'QL factorization tests.'
157  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
158  rout = 'PSGELQF'
159  routchk = 'PSGELQRV'
160  WRITE( nout, fmt = 9986 )
161  $ 'LQ factorization tests.'
162  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
163  rout = 'PSGERQF'
164  routchk = 'PSGERQRV'
165  WRITE( nout, fmt = 9986 )
166  $ 'RQ factorization tests.'
167  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
168  rout = 'PSGEQPF'
169  routchk = 'PSGEQRRV'
170  WRITE( nout, fmt = 9986 )
171  $ 'QR factorization with column pivoting tests.'
172  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
173  rout = 'PSTZRZF'
174  routchk = 'PSTZRZRV'
175  WRITE( nout, fmt = 9986 )
176  $ 'Complete orthogonal factorization tests.'
177  END IF
178  WRITE( nout, fmt = * )
179  WRITE( nout, fmt = 9995 )
180  WRITE( nout, fmt = 9994 )
181  WRITE( nout, fmt = * )
182  END IF
183 *
184 * Loop over different process grids
185 *
186  DO 30 j = 1, ngrids
187 *
188  nprow = pval( j )
189  npcol = qval( j )
190 *
191 * Make sure grid information is correct
192 *
193  ierr( 1 ) = 0
194  IF( nprow.LT.1 ) THEN
195  IF( iam.EQ.0 )
196  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
197  ierr( 1 ) = 1
198  ELSE IF( npcol.LT.1 ) THEN
199  IF( iam.EQ.0 )
200  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
201  ierr( 1 ) = 1
202  ELSE IF( nprow*npcol.GT.nprocs ) THEN
203  IF( iam.EQ.0 )
204  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
205  ierr( 1 ) = 1
206  END IF
207 *
208  IF( ierr( 1 ).GT.0 ) THEN
209  IF( iam.EQ.0 )
210  $ WRITE( nout, fmt = 9997 ) 'grid'
211  kskip = kskip + 1
212  GO TO 30
213  END IF
214 *
215 * Define process grid
216 *
217  CALL blacs_get( -1, 0, ictxt )
218  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
219  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
220 *
221 * Go to bottom of loop if this case doesn't use my process
222 *
223  IF( myrow.GE.nprow .OR. mycol.GE.npcol )
224  $ GO TO 30
225 *
226  DO 20 k = 1, nmat
227 *
228  m = mval( k )
229  n = nval( k )
230 *
231 * Make sure matrix information is correct
232 *
233  ierr(1) = 0
234  IF( m.LT.1 ) THEN
235  IF( iam.EQ.0 )
236  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
237  ierr( 1 ) = 1
238  ELSE IF( n.LT.1 ) THEN
239  IF( iam.EQ.0 )
240  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
241  ierr( 1 ) = 1
242  END IF
243 *
244 * Make sure no one had error
245 *
246  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
247 *
248  IF( ierr( 1 ).GT.0 ) THEN
249  IF( iam.EQ.0 )
250  $ WRITE( nout, fmt = 9997 ) 'matrix'
251  kskip = kskip + 1
252  GO TO 20
253  END IF
254 *
255 * Loop over different blocking sizes
256 *
257  DO 10 l = 1, nnb
258 *
259  mb = mbval( l )
260  nb = nbval( l )
261 *
262 * Make sure mb is legal
263 *
264  ierr( 1 ) = 0
265  IF( mb.LT.1 ) THEN
266  ierr( 1 ) = 1
267  IF( iam.EQ.0 )
268  $ WRITE( nout, fmt = 9999 ) 'MB', 'MB', mb
269  END IF
270 *
271 * Check all processes for an error
272 *
273  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
274  $ 0 )
275 *
276  IF( ierr( 1 ).GT.0 ) THEN
277  IF( iam.EQ.0 )
278  $ WRITE( nout, fmt = 9997 ) 'MB'
279  kskip = kskip + 1
280  GO TO 10
281  END IF
282 *
283 * Make sure nb is legal
284 *
285  ierr( 1 ) = 0
286  IF( nb.LT.1 ) THEN
287  ierr( 1 ) = 1
288  IF( iam.EQ.0 )
289  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
290  END IF
291 *
292 * Check all processes for an error
293 *
294  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
295  $ 0 )
296 *
297  IF( ierr( 1 ).GT.0 ) THEN
298  IF( iam.EQ.0 )
299  $ WRITE( nout, fmt = 9997 ) 'NB'
300  kskip = kskip + 1
301  GO TO 10
302  END IF
303 *
304 * Padding constants
305 *
306  mp = numroc( m, mb, myrow, 0, nprow )
307  nq = numroc( n, nb, mycol, 0, npcol )
308  mnp = numroc( min( m, n ), mb, myrow, 0, nprow )
309  mnq = numroc( min( m, n ), nb, mycol, 0, npcol )
310  IF( check ) THEN
311  iprepad = max( mb, mp )
312  imidpad = nb
313  ipostpad = max( nb, nq )
314  ELSE
315  iprepad = 0
316  imidpad = 0
317  ipostpad = 0
318  END IF
319 *
320 * Initialize the array descriptor for the matrix A
321 *
322  CALL descinit( desca, m, n, mb, nb, 0, 0, ictxt,
323  $ max( 1, mp ) + imidpad, ierr( 1 ) )
324 *
325 * Check all processes for an error
326 *
327  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
328  $ 0 )
329 *
330  IF( ierr( 1 ).LT.0 ) THEN
331  IF( iam.EQ.0 )
332  $ WRITE( nout, fmt = 9997 ) 'descriptor'
333  kskip = kskip + 1
334  GO TO 10
335  END IF
336 *
337 * Assign pointers into MEM for ScaLAPACK arrays, A is
338 * allocated starting at position MEM( IPREPAD+1 )
339 *
340  ipa = iprepad+1
341  iptau = ipa + desca( lld_ ) * nq + ipostpad + iprepad
342 *
343  IF( lsamen( 2, fact, 'QR' ) ) THEN
344 *
345  ltau = mnq
346  ipw = iptau + ltau + ipostpad + iprepad
347 *
348 * Figure the amount of workspace required by the QR
349 * factorization
350 *
351  lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )
352  workfct = lwork + ipostpad
353  worksiz = workfct
354 *
355  IF( check ) THEN
356 *
357 * Figure the amount of workspace required by the
358 * checking routines PSLAFCHK, PSGEQRRV and
359 * PSLANGE
360 *
361  worksiz = lwork + mp*desca( nb_ ) + ipostpad
362 *
363  END IF
364 *
365  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
366 *
367  ltau = nq
368  ipw = iptau + ltau + ipostpad + iprepad
369 *
370 * Figure the amount of workspace required by the QL
371 * factorization
372 *
373  lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )
374  workfct = lwork + ipostpad
375  worksiz = workfct
376 *
377  IF( check ) THEN
378 *
379 * Figure the amount of workspace required by the
380 * checking routines PSLAFCHK, PSGEQLRV and
381 * PSLANGE
382 *
383  worksiz = lwork + mp*desca( nb_ ) + ipostpad
384 *
385  END IF
386 *
387  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
388 *
389  ltau = mnp
390  ipw = iptau + ltau + ipostpad + iprepad
391 *
392 * Figure the amount of workspace required by the LQ
393 * factorization
394 *
395  lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
396  workfct = lwork + ipostpad
397  worksiz = workfct
398 *
399  IF( check ) THEN
400 *
401 * Figure the amount of workspace required by the
402 * checking routines PSLAFCHK, PSGELQRV and
403 * PSLANGE
404 *
405  worksiz = lwork +
406  $ max( mp*desca( nb_ ), nq*desca( mb_ )
407  $ ) + ipostpad
408 *
409  END IF
410 *
411  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
412 *
413  ltau = mp
414  ipw = iptau + ltau + ipostpad + iprepad
415 *
416 * Figure the amount of workspace required by the QR
417 * factorization
418 *
419  lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
420  workfct = lwork + ipostpad
421  worksiz = workfct
422 *
423  IF( check ) THEN
424 *
425 * Figure the amount of workspace required by the
426 * checking routines PSLAFCHK, PSGERQRV and
427 * PSLANGE
428 *
429  worksiz = lwork +
430  $ max( mp*desca( nb_ ), nq*desca( mb_ )
431  $ ) + ipostpad
432 *
433  END IF
434 *
435  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
436 *
437  ltau = mnq
438  ippiv = iptau + ltau + ipostpad + iprepad
439  lipiv = iceil( intgsz*nq, realsz )
440  ipw = ippiv + lipiv + ipostpad + iprepad
441 *
442 * Figure the amount of workspace required by the
443 * factorization i.e from IPW on.
444 *
445  lwork = max( 3, mp + max( 1, nq ) ) + 2 * nq
446  workfct = lwork + ipostpad
447  worksiz = workfct
448 *
449  IF( check ) THEN
450 *
451 * Figure the amount of workspace required by the
452 * checking routines PSLAFCHK, PSGEQRRV,
453 * PSLANGE.
454 *
455  worksiz = max( worksiz - ipostpad,
456  $ desca( nb_ )*( 2*mp + nq + desca( nb_ ) ) ) +
457  $ ipostpad
458  END IF
459 *
460  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
461 *
462  ltau = mp
463  ipw = iptau + ltau + ipostpad + iprepad
464 *
465 * Figure the amount of workspace required by the TZ
466 * factorization
467 *
468  lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
469  workfct = lwork + ipostpad
470  worksiz = workfct
471 *
472  IF( check ) THEN
473 *
474 * Figure the amount of workspace required by the
475 * checking routines PSLAFCHK, PSTZRZRV and
476 * PSLANGE
477 *
478  worksiz = lwork +
479  $ max( mp*desca( nb_ ), nq*desca( mb_ )
480  $ ) + ipostpad
481 *
482  END IF
483 *
484  END IF
485 *
486 * Check for adequate memory for problem size
487 *
488  ierr( 1 ) = 0
489  IF( ipw+worksiz.GT.memsiz ) THEN
490  IF( iam.EQ.0 )
491  $ WRITE( nout, fmt = 9996 )
492  $ fact // ' factorization',
493  $ ( ipw+worksiz )*realsz
494  ierr( 1 ) = 1
495  END IF
496 *
497 * Check all processes for an error
498 *
499  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
500  $ 0 )
501 *
502  IF( ierr( 1 ).GT.0 ) THEN
503  IF( iam.EQ.0 )
504  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
505  kskip = kskip + 1
506  GO TO 10
507  END IF
508 *
509 * Generate the matrix A
510 *
511  CALL psmatgen( ictxt, 'N', 'N', desca( m_ ),
512  $ desca( n_ ), desca( mb_ ),
513  $ desca( nb_ ), mem( ipa ),
514  $ desca( lld_ ), desca( rsrc_ ),
515  $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
516  $ myrow, mycol, nprow, npcol )
517 *
518 * Need the Infinity of A for checking
519 *
520  IF( check ) THEN
521  CALL psfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
522  $ desca( lld_ ), iprepad, ipostpad,
523  $ padval )
524  IF( lsamen( 2, fact, 'QP' ) ) THEN
525  CALL psfillpad( ictxt, lipiv, 1,
526  $ mem( ippiv-iprepad ), lipiv,
527  $ iprepad, ipostpad, padval )
528  END IF
529  CALL psfillpad( ictxt, ltau, 1,
530  $ mem( iptau-iprepad ), ltau,
531  $ iprepad, ipostpad, padval )
532  CALL psfillpad( ictxt, worksiz-ipostpad, 1,
533  $ mem( ipw-iprepad ),
534  $ worksiz-ipostpad,
535  $ iprepad, ipostpad, padval )
536  anorm = pslange( 'I', m, n, mem( ipa ), 1, 1,
537  $ desca, mem( ipw ) )
538  CALL pschekpad( ictxt, 'PSLANGE', mp, nq,
539  $ mem( ipa-iprepad ), desca( lld_ ),
540  $ iprepad, ipostpad, padval )
541  CALL pschekpad( ictxt, 'PSLANGE',
542  $ worksiz-ipostpad, 1,
543  $ mem( ipw-iprepad ),
544  $ worksiz-ipostpad, iprepad,
545  $ ipostpad, padval )
546  CALL psfillpad( ictxt, workfct-ipostpad, 1,
547  $ mem( ipw-iprepad ),
548  $ workfct-ipostpad,
549  $ iprepad, ipostpad, padval )
550  END IF
551 *
552  CALL slboot()
553  CALL blacs_barrier( ictxt, 'All' )
554 *
555 * Perform QR factorizations
556 *
557  IF( lsamen( 2, fact, 'QR' ) ) THEN
558  CALL sltimer( 1 )
559  CALL psgeqrf( m, n, mem( ipa ), 1, 1, desca,
560  $ mem( iptau ), mem( ipw ), lwork,
561  $ info )
562  CALL sltimer( 1 )
563  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
564  CALL sltimer( 1 )
565  CALL psgeqlf( m, n, mem( ipa ), 1, 1, desca,
566  $ mem( iptau ), mem( ipw ), lwork,
567  $ info )
568  CALL sltimer( 1 )
569  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
570  CALL sltimer( 1 )
571  CALL psgelqf( m, n, mem( ipa ), 1, 1, desca,
572  $ mem( iptau ), mem( ipw ), lwork,
573  $ info )
574  CALL sltimer( 1 )
575  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
576  CALL sltimer( 1 )
577  CALL psgerqf( m, n, mem( ipa ), 1, 1, desca,
578  $ mem( iptau ), mem( ipw ), lwork,
579  $ info )
580  CALL sltimer( 1 )
581  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
582  CALL sltimer( 1 )
583  CALL psgeqpf( m, n, mem( ipa ), 1, 1, desca,
584  $ mem( ippiv ), mem( iptau ),
585  $ mem( ipw ), lwork, info )
586  CALL sltimer( 1 )
587  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
588  CALL sltimer( 1 )
589  IF( n.GE.m )
590  $ CALL pstzrzf( m, n, mem( ipa ), 1, 1, desca,
591  $ mem( iptau ), mem( ipw ), lwork,
592  $ info )
593  CALL sltimer( 1 )
594  END IF
595 *
596  IF( check ) THEN
597 *
598 * Check for memory overwrite in factorization
599 *
600  CALL pschekpad( ictxt, rout, mp, nq,
601  $ mem( ipa-iprepad ), desca( lld_ ),
602  $ iprepad, ipostpad, padval )
603  CALL pschekpad( ictxt, rout, ltau, 1,
604  $ mem( iptau-iprepad ), ltau,
605  $ iprepad, ipostpad, padval )
606  IF( lsamen( 2, fact, 'QP' ) ) THEN
607  CALL pschekpad( ictxt, rout, lipiv, 1,
608  $ mem( ippiv-iprepad ), lipiv,
609  $ iprepad, ipostpad, padval )
610  END IF
611  CALL pschekpad( ictxt, rout, workfct-ipostpad, 1,
612  $ mem( ipw-iprepad ),
613  $ workfct-ipostpad, iprepad,
614  $ ipostpad, padval )
615  CALL psfillpad( ictxt, worksiz-ipostpad, 1,
616  $ mem( ipw-iprepad ),
617  $ worksiz-ipostpad,
618  $ iprepad, ipostpad, padval )
619 *
620  IF( lsamen( 2, fact, 'QR' ) ) THEN
621 *
622 * Compute residual = ||A-Q*R|| / (||A||*N*eps)
623 *
624  CALL psgeqrrv( m, n, mem( ipa ), 1, 1, desca,
625  $ mem( iptau ), mem( ipw ) )
626  CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
627  $ 1, desca, iaseed, anorm, fresid,
628  $ mem( ipw ) )
629  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
630 *
631 * Compute residual = ||A-Q*L|| / (||A||*N*eps)
632 *
633  CALL psgeqlrv( m, n, mem( ipa ), 1, 1, desca,
634  $ mem( iptau ), mem( ipw ) )
635  CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
636  $ 1, desca, iaseed, anorm, fresid,
637  $ mem( ipw ) )
638  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
639 *
640 * Compute residual = ||A-L*Q|| / (||A||*N*eps)
641 *
642  CALL psgelqrv( m, n, mem( ipa ), 1, 1, desca,
643  $ mem( iptau ), mem( ipw ) )
644  CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
645  $ 1, desca, iaseed, anorm, fresid,
646  $ mem( ipw ) )
647  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
648 *
649 * Compute residual = ||A-R*Q|| / (||A||*N*eps)
650 *
651  CALL psgerqrv( m, n, mem( ipa ), 1, 1, desca,
652  $ mem( iptau ), mem( ipw ) )
653  CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
654  $ 1, desca, iaseed, anorm, fresid,
655  $ mem( ipw ) )
656  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
657 *
658 * Compute residual = ||AP-Q*R|| / (||A||*N*eps)
659 *
660  CALL psgeqrrv( m, n, mem( ipa ), 1, 1, desca,
661  $ mem( iptau ), mem( ipw ) )
662  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
663 *
664 * Compute residual = ||A-T*Z|| / (||A||*N*eps)
665 *
666  IF( n.GE.m ) THEN
667  CALL pstzrzrv( m, n, mem( ipa ), 1, 1, desca,
668  $ mem( iptau ), mem( ipw ) )
669  END IF
670  CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
671  $ 1, desca, iaseed, anorm, fresid,
672  $ mem( ipw ) )
673  END IF
674 *
675 * Check for memory overwrite
676 *
677  CALL pschekpad( ictxt, routchk, mp, nq,
678  $ mem( ipa-iprepad ), desca( lld_ ),
679  $ iprepad, ipostpad, padval )
680  CALL pschekpad( ictxt, routchk, ltau, 1,
681  $ mem( iptau-iprepad ), ltau,
682  $ iprepad, ipostpad, padval )
683  CALL pschekpad( ictxt, routchk, worksiz-ipostpad,
684  $ 1, mem( ipw-iprepad ),
685  $ worksiz-ipostpad, iprepad,
686  $ ipostpad, padval )
687 *
688  IF( lsamen( 2, fact, 'QP' ) ) THEN
689 *
690  CALL psqppiv( m, n, mem( ipa ), 1, 1, desca,
691  $ mem( ippiv ) )
692 *
693 * Check for memory overwrite
694 *
695  CALL pschekpad( ictxt, 'PSQPPIV', mp, nq,
696  $ mem( ipa-iprepad ),
697  $ desca( lld_ ),
698  $ iprepad, ipostpad, padval )
699  CALL pschekpad( ictxt, 'PSQPPIV', lipiv, 1,
700  $ mem( ippiv-iprepad ), lipiv,
701  $ iprepad, ipostpad, padval )
702 *
703  CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
704  $ 1, desca, iaseed, anorm, fresid,
705  $ mem( ipw ) )
706 *
707 * Check for memory overwrite
708 *
709  CALL pschekpad( ictxt, 'PSLAFCHK', mp, nq,
710  $ mem( ipa-iprepad ),
711  $ desca( lld_ ),
712  $ iprepad, ipostpad, padval )
713  CALL pschekpad( ictxt, 'PSLAFCHK',
714  $ worksiz-ipostpad, 1,
715  $ mem( ipw-iprepad ),
716  $ worksiz-ipostpad, iprepad,
717  $ ipostpad, padval )
718  END IF
719 *
720 * Test residual and detect NaN result
721 *
722  IF( lsamen( 2, fact, 'TZ' ) .AND. n.LT.m ) THEN
723  kskip = kskip + 1
724  passed = 'BYPASS'
725  ELSE
726  IF( fresid.LE.thresh .AND.
727  $ (fresid-fresid).EQ.0.0e+0 ) THEN
728  kpass = kpass + 1
729  passed = 'PASSED'
730  ELSE
731  kfail = kfail + 1
732  passed = 'FAILED'
733  END IF
734  END IF
735 *
736  ELSE
737 *
738 * Don't perform the checking, only timing
739 *
740  kpass = kpass + 1
741  fresid = fresid - fresid
742  passed = 'BYPASS'
743 *
744  END IF
745 *
746 * Gather maximum of all CPU and WALL clock timings
747 *
748  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1, wtime )
749  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1, ctime )
750 *
751 * Print results
752 *
753  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
754 *
755  minmn = min( m, n )
756  maxmn = max( m, n )
757 *
758  IF( lsamen( 2, fact, 'TZ' ) ) THEN
759  IF( m.GE.n ) THEN
760  nops = 0.0d+0
761  ELSE
762 *
763 * 5/2 ( M^2 N - M^3 ) + 5/2 N M + 1/2 M^2 for
764 * complete orthogonal factorization (M <= N).
765 *
766  nops = ( 5.0d+0 * (
767  $ dble( n )*( dble( m )**2 ) -
768  $ dble( m )**3 +
769  $ dble( n )*dble( m ) ) +
770  $ dble( m )**2 ) / 2.0d+0
771  END IF
772 *
773  ELSE
774 *
775 * 2 M N^2 - 2/3 N^2 + M N + N^2 for QR type
776 * factorization when M >= N.
777 *
778  nops = 2.0d+0 * ( dble( minmn )**2 ) *
779  $ ( dble( maxmn )-dble( minmn ) / 3.0d+0 ) +
780  $ ( dble( maxmn )+dble( minmn ) )*dble( minmn )
781  END IF
782 *
783 * Print WALL time
784 *
785  IF( wtime( 1 ).GT.0.0d+0 ) THEN
786  tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
787  ELSE
788  tmflops = 0.0d+0
789  END IF
790  IF( wtime( 1 ).GE.0.0d+0 )
791  $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, mb, nb,
792  $ nprow, npcol, wtime( 1 ), tmflops,
793  $ passed, fresid
794 *
795 * Print CPU time
796 *
797  IF( ctime( 1 ).GT.0.0d+0 ) THEN
798  tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
799  ELSE
800  tmflops = 0.0d+0
801  END IF
802  IF( ctime( 1 ).GE.0.0d+0 )
803  $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, mb, nb,
804  $ nprow, npcol, ctime( 1 ), tmflops,
805  $ passed, fresid
806 *
807  END IF
808 *
809  10 CONTINUE
810 *
811  20 CONTINUE
812 *
813  CALL blacs_gridexit( ictxt )
814 *
815  30 CONTINUE
816 *
817  40 CONTINUE
818 *
819 * Print out ending messages and close output file
820 *
821  IF( iam.EQ.0 ) THEN
822  ktests = kpass + kfail + kskip
823  WRITE( nout, fmt = * )
824  WRITE( nout, fmt = 9992 ) ktests
825  IF( check ) THEN
826  WRITE( nout, fmt = 9991 ) kpass
827  WRITE( nout, fmt = 9989 ) kfail
828  ELSE
829  WRITE( nout, fmt = 9990 ) kpass
830  END IF
831  WRITE( nout, fmt = 9988 ) kskip
832  WRITE( nout, fmt = * )
833  WRITE( nout, fmt = * )
834  WRITE( nout, fmt = 9987 )
835  IF( nout.NE.6 .AND. nout.NE.0 )
836  $ CLOSE ( nout )
837  END IF
838 *
839  CALL blacs_exit( 0 )
840 *
841  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
842  $ '; It should be at least 1' )
843  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
844  $ i4 )
845  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
846  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
847  $ i11 )
848  9995 FORMAT( 'TIME M N MB NB P Q Fact Time ',
849  $ ' MFLOPS CHECK Residual' )
850  9994 FORMAT( '---- ------ ------ --- --- ----- ----- --------- ',
851  $ '----------- ------ --------' )
852  9993 FORMAT( a4, 1x, i6, 1x, i6, 1x, i3, 1x, i3, 1x, i5, 1x, i5, 1x,
853  $ f9.2, 1x, f11.2, 1x, a6, 2x, g8.1 )
854  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
855  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
856  9990 FORMAT( i5, ' tests completed without checking.' )
857  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
858  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
859  9987 FORMAT( 'END OF TESTS.' )
860  9986 FORMAT( a )
861 *
862  stop
863 *
864 * End of PSQRDRIVER
865 *
866  END
867 *
868  SUBROUTINE psqppiv( M, N, A, IA, JA, DESCA, IPIV )
869 *
870 * -- ScaLAPACK routine (version 1.7) --
871 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
872 * and University of California, Berkeley.
873 * May 1, 1997
874 *
875 * .. Scalar Arguments ..
876  INTEGER IA, JA, M, N
877 * ..
878 * .. Array Arguments ..
879  INTEGER DESCA( * ), IPIV( * )
880  REAL A( * )
881 * ..
882 *
883 * Purpose
884 * =======
885 *
886 * PSQPPIV applies to sub( A ) = A(IA:IA+M-1,JA:JA+N-1) the pivots
887 * returned by PSGEQPF in reverse order for checking purposes.
888 *
889 * Notes
890 * =====
891 *
892 * Each global data object is described by an associated description
893 * vector. This vector stores the information required to establish
894 * the mapping between an object element and its corresponding process
895 * and memory location.
896 *
897 * Let A be a generic term for any 2D block cyclicly distributed array.
898 * Such a global array has an associated description vector DESCA.
899 * In the following comments, the character _ should be read as
900 * "of the global array".
901 *
902 * NOTATION STORED IN EXPLANATION
903 * --------------- -------------- --------------------------------------
904 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
905 * DTYPE_A = 1.
906 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
907 * the BLACS process grid A is distribu-
908 * ted over. The context itself is glo-
909 * bal, but the handle (the integer
910 * value) may vary.
911 * M_A (global) DESCA( M_ ) The number of rows in the global
912 * array A.
913 * N_A (global) DESCA( N_ ) The number of columns in the global
914 * array A.
915 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
916 * the rows of the array.
917 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
918 * the columns of the array.
919 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
920 * row of the array A is distributed.
921 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
922 * first column of the array A is
923 * distributed.
924 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
925 * array. LLD_A >= MAX(1,LOCr(M_A)).
926 *
927 * Let K be the number of rows or columns of a distributed matrix,
928 * and assume that its process grid has dimension p x q.
929 * LOCr( K ) denotes the number of elements of K that a process
930 * would receive if K were distributed over the p processes of its
931 * process column.
932 * Similarly, LOCc( K ) denotes the number of elements of K that a
933 * process would receive if K were distributed over the q processes of
934 * its process row.
935 * The values of LOCr() and LOCc() may be determined via a call to the
936 * ScaLAPACK tool function, NUMROC:
937 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
938 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
939 * An upper bound for these quantities may be computed by:
940 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
941 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
942 *
943 * Arguments
944 * =========
945 *
946 * M (global input) INTEGER
947 * The number of rows to be operated on, i.e. the number of rows
948 * of the distributed submatrix sub( A ). M >= 0.
949 *
950 * N (global input) INTEGER
951 * The number of columns to be operated on, i.e. the number of
952 * columns of the distributed submatrix sub( A ). N >= 0.
953 *
954 * A (local input/local output) REAL pointer into the
955 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
956 * On entry, the local pieces of the M-by-N distributed matrix
957 * sub( A ) which is to be permuted. On exit, the local pieces
958 * of the distributed permuted submatrix sub( A ) * Inv( P ).
959 *
960 * IA (global input) INTEGER
961 * The row index in the global array A indicating the first
962 * row of sub( A ).
963 *
964 * JA (global input) INTEGER
965 * The column index in the global array A indicating the
966 * first column of sub( A ).
967 *
968 * DESCA (global and local input) INTEGER array of dimension DLEN_.
969 * The array descriptor for the distributed matrix A.
970 *
971 * IPIV (local input) INTEGER array, dimension LOCc(JA+N-1).
972 * On exit, if IPIV(I) = K, the local i-th column of sub( A )*P
973 * was the global K-th column of sub( A ). IPIV is tied to the
974 * distributed matrix A.
975 *
976 * =====================================================================
977 *
978 * .. Parameters ..
979  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
980  $ LLD_, MB_, M_, NB_, N_, RSRC_
981  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
982  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
983  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
984 * ..
985 * .. Local Scalars ..
986  INTEGER IACOL, ICOFFA, ICTXT, IITMP, IPVT, IPCOL,
987  $ IPROW, ITMP, J, JJ, JJA, KK, MYCOL, MYROW,
988  $ NPCOL, NPROW, NQ
989 * ..
990 * .. External Subroutines ..
991  EXTERNAL blacs_gridinfo, igebr2d, igebs2d, igerv2d,
992  $ igesd2d, igamn2d, infog1l, psswap
993 * ..
994 * .. External Functions ..
995  INTEGER INDXL2G, NUMROC
996  EXTERNAL indxl2g, numroc
997 * ..
998 * .. Intrinsic Functions ..
999  INTRINSIC min, mod
1000 * ..
1001 * .. Executable Statements ..
1002 *
1003 * Get grid parameters
1004 *
1005  ictxt = desca( ctxt_ )
1006  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
1007  CALL infog1l( ja, desca( nb_ ), npcol, mycol, desca( csrc_ ), jja,
1008  $ iacol )
1009  icoffa = mod( ja-1, desca( nb_ ) )
1010  nq = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
1011  IF( mycol.EQ.iacol )
1012  $ nq = nq - icoffa
1013 *
1014  DO 20 j = ja, ja+n-2
1015 *
1016  ipvt = ja+n-1
1017  itmp = ja+n
1018 *
1019 * Find first the local minimum candidate for pivoting
1020 *
1021  CALL infog1l( j, desca( nb_ ), npcol, mycol, desca( csrc_ ),
1022  $ jj, iacol )
1023  DO 10 kk = jj, jja+nq-1
1024  IF( ipiv( kk ).LT.ipvt )THEN
1025  iitmp = kk
1026  ipvt = ipiv( kk )
1027  END IF
1028  10 CONTINUE
1029 *
1030 * Find the global minimum pivot
1031 *
1032  CALL igamn2d( ictxt, 'Rowwise', ' ', 1, 1, ipvt, 1, iprow,
1033  $ ipcol, 1, -1, mycol )
1034 *
1035 * Broadcast the corresponding index to the other process columns
1036 *
1037  IF( mycol.EQ.ipcol ) THEN
1038  itmp = indxl2g( iitmp, desca( nb_ ), mycol, desca( csrc_ ),
1039  $ npcol )
1040  CALL igebs2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1 )
1041  IF( ipcol.NE.iacol ) THEN
1042  CALL igerv2d( ictxt, 1, 1, ipiv( iitmp ), 1, myrow,
1043  $ iacol )
1044  ELSE
1045  IF( mycol.EQ.iacol )
1046  $ ipiv( iitmp ) = ipiv( jj )
1047  END IF
1048  ELSE
1049  CALL igebr2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1, myrow,
1050  $ ipcol )
1051  IF( mycol.EQ.iacol .AND. ipcol.NE.iacol )
1052  $ CALL igesd2d( ictxt, 1, 1, ipiv( jj ), 1, myrow, ipcol )
1053  END IF
1054 *
1055 * Swap the columns of A
1056 *
1057  CALL psswap( m, a, ia, itmp, desca, 1, a, ia, j, desca, 1 )
1058 *
1059  20 CONTINUE
1060 *
1061 * End of PSQPPIV
1062 *
1063  END
pslafchk
subroutine pslafchk(AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, WORK)
Definition: pslafchk.f:3
pstzrzrv
subroutine pstzrzrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: pstzrzrv.f:2
psgelqrv
subroutine psgelqrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: psgelqrv.f:2
max
#define max(A, B)
Definition: pcgemr.c:180
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
pslange
real function pslange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pslange.f:3
psqrinfo
subroutine psqrinfo(SUMMRY, NOUT, NFACT, FACTOR, LDFACT, NMAT, MVAL, LDMVAL, NVAL, LDNVAL, NNB, MBVAL, LDMBVAL, NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: psqrinfo.f:6
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
psgeqrrv
subroutine psgeqrrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: psgeqrrv.f:2
pschekpad
subroutine pschekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pschekpad.f:3
psgelqf
subroutine psgelqf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: psgelqf.f:3
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
psmatgen
subroutine psmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: psmatgen.f:4
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
lsamen
logical function lsamen(N, CA, CB)
Definition: pblastst.f:1457
psgeqrf
subroutine psgeqrf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: psgeqrf.f:3
psqppiv
subroutine psqppiv(M, N, A, IA, JA, DESCA, IPIV)
Definition: psqrdriver.f:869
psfillpad
subroutine psfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: psfillpad.f:2
psgerqrv
subroutine psgerqrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: psgerqrv.f:2
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
psgeqpf
subroutine psgeqpf(M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, LWORK, INFO)
Definition: psgeqpf.f:3
psgeqlf
subroutine psgeqlf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: psgeqlf.f:3
pstzrzf
subroutine pstzrzf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pstzrzf.f:3
psgeqlrv
subroutine psgeqlrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: psgeqlrv.f:2
min
#define min(A, B)
Definition: pcgemr.c:181
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2
psgerqf
subroutine psgerqf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: psgerqf.f:3
psqrdriver
program psqrdriver
Definition: psqrdriver.f:1