ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcqrdriver.f
Go to the documentation of this file.
1  PROGRAM pcqrdriver
2 *
3 * -- ScaLAPACK testing driver (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 28, 2001
7 *
8 * Purpose
9 * =======
10 *
11 * PCQRDRIVER is the main test program for the COMPLEX
12 * SCALAPACK QR factorization routines. This test driver performs a QR
13 * QL, LQ, RQ, QP (QR factorization with column pivoting) or TZ
14 * (complete unitary factorization) factorization and checks the
15 * results.
16 *
17 * The program must be driven by a short data file. An annotated
18 * example of a data file can be obtained by deleting the first 3
19 * characters from the following 16 lines:
20 * 'ScaLAPACK QR factorizations input file'
21 * 'PVM machine'
22 * 'QR.out' output file name (if any)
23 * 6 device out
24 * 6 number of factorizations
25 * 'QR' 'QL' 'LQ' 'RQ' 'QP' 'TZ' factorization: QR, QL, LQ, RQ, QP, TZ
26 * 4 number of problems sizes
27 * 55 17 31 201 values of M
28 * 5 71 31 201 values of N
29 * 3 number of MB's and NB's
30 * 4 3 5 values of MB
31 * 4 7 3 values of NB
32 * 7 number of process grids (ordered P & Q)
33 * 1 2 1 4 2 3 8 values of P
34 * 7 2 4 1 3 2 1 values of Q
35 * 1.0 threshold
36 *
37 * Internal Parameters
38 * ===================
39 *
40 * TOTMEM INTEGER, default = 2000000
41 * TOTMEM is a machine-specific parameter indicating the
42 * maximum amount of available memory in bytes.
43 * The user should customize TOTMEM to his platform. Remember
44 * to leave room in memory for the operating system, the BLACS
45 * buffer, etc. For example, on a system with 8 MB of memory
46 * per process (e.g., one processor on an Intel iPSC/860), the
47 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
48 * code, BLACS buffer, etc). However, for PVM, we usually set
49 * TOTMEM = 2000000. Some experimenting with the maximum value
50 * of TOTMEM may be required.
51 *
52 * INTGSZ INTEGER, default = 4 bytes.
53 * REALSZ INTEGER, default = 4 bytes.
54 * CPLXSZ INTEGER, default = 8 bytes.
55 * INTGSZ, REALSZ and CPLXSZ indicate the length in bytes on
56 * the given platform for an integer, a single precision real
57 * and a single precision complex.
58 * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ )
59 *
60 * All arrays used by SCALAPACK routines are allocated from
61 * this array and referenced by pointers. The integer IPA,
62 * for example, is a pointer to the starting element of MEM for
63 * the matrix A.
64 *
65 * =====================================================================
66 *
67 * .. Parameters ..
68  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
69  $ lld_, mb_, m_, nb_, n_, rsrc_
70  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
71  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
72  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
73  INTEGER cplxsz, intgsz, memsiz, ntests, realsz, totmem
74  COMPLEX padval
75  parameter( cplxsz = 8, intgsz = 4, realsz = 4,
76  $ totmem = 2000000, memsiz = totmem / cplxsz,
77  $ ntests = 20,
78  $ padval = ( -9923.0e+0, -9923.0e+0 ) )
79 * ..
80 * .. Local Scalars ..
81  CHARACTER*2 fact
82  CHARACTER*6 passed
83  CHARACTER*7 rout
84  CHARACTER*8 routchk
85  CHARACTER*80 outfile
86  LOGICAL check
87  INTEGER i, iam, iaseed, ictxt, imidpad, info, ipa,
88  $ ipostpad, ippiv, iprepad, iptau, iprw, ipw, j,
89  $ k, kfail, kpass, kskip, ktests, l, lipiv,
90  $ lrwork, ltau, lwork, m, maxmn, mb, minmn, mnp,
91  $ mnq, mp, mycol, myrow, n, nb, nfact, ngrids,
92  $ nmat, nnb, nout, npcol, nprocs, nprow, nq,
93  $ workfct, workrfct, worksiz
94  REAL anorm, fresid, thresh
95  DOUBLE PRECISION nops, tmflops
96 * ..
97 * .. Arrays ..
98  CHARACTER*2 factor( ntests )
99  INTEGER desca( dlen_ ), ierr( 1 ), mbval( ntests ),
100  $ mval( ntests ), nbval( ntests ),
101  $ nval( ntests ), pval( ntests ), qval( ntests )
102  DOUBLE PRECISION ctime( 1 ), wtime( 1 )
103  COMPLEX mem( memsiz )
104 * ..
105 * .. External Subroutines ..
106  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
107  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
108  $ blacs_pinfo, descinit, igsum2d, pcchekpad,
115 * ..
116 * .. External Functions ..
117  LOGICAL lsamen
118  INTEGER iceil, numroc
119  REAL pclange
120  EXTERNAL iceil, lsamen, numroc, pclange
121 * ..
122 * .. Intrinsic Functions ..
123  INTRINSIC dble, max, min
124 * ..
125 * .. Data Statements ..
126  DATA ktests, kpass, kfail, kskip /4*0/
127 * ..
128 * .. Executable Statements ..
129 *
130 * Get starting information
131 *
132  CALL blacs_pinfo( iam, nprocs )
133  iaseed = 100
134  CALL pcqrinfo( outfile, nout, nfact, factor, ntests, nmat, mval,
135  $ ntests, nval, ntests, nnb, mbval, ntests, nbval,
136  $ ntests, ngrids, pval, ntests, qval, ntests,
137  $ thresh, mem, iam, nprocs )
138  check = ( thresh.GE.0.0e+0 )
139 *
140 * Loop over the different factorization types
141 *
142  DO 40 i = 1, nfact
143 *
144  fact = factor( i )
145 *
146 * Print headings
147 *
148  IF( iam.EQ.0 ) THEN
149  WRITE( nout, fmt = * )
150  IF( lsamen( 2, fact, 'QR' ) ) THEN
151  rout = 'PCGEQRF'
152  routchk = 'PCGEQRRV'
153  WRITE( nout, fmt = 9986 )
154  $ 'QR factorization tests.'
155  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
156  rout = 'PCGEQLF'
157  routchk = 'PCGEQLRV'
158  WRITE( nout, fmt = 9986 )
159  $ 'QL factorization tests.'
160  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
161  rout = 'PCGELQF'
162  routchk = 'PCGELQRV'
163  WRITE( nout, fmt = 9986 )
164  $ 'LQ factorization tests.'
165  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
166  rout = 'PCGERQF'
167  routchk = 'PCGERQRV'
168  WRITE( nout, fmt = 9986 )
169  $ 'RQ factorization tests.'
170  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
171  rout = 'PCGEQPF'
172  routchk = 'PCGEQRRV'
173  WRITE( nout, fmt = 9986 )
174  $ 'QR factorization with column pivoting tests.'
175  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
176  rout = 'PCTZRZF'
177  routchk = 'PCTZRZRV'
178  WRITE( nout, fmt = 9986 )
179  $ 'Complete unitary factorization tests.'
180  END IF
181  WRITE( nout, fmt = * )
182  WRITE( nout, fmt = 9995 )
183  WRITE( nout, fmt = 9994 )
184  WRITE( nout, fmt = * )
185  END IF
186 *
187 * Loop over different process grids
188 *
189  DO 30 j = 1, ngrids
190 *
191  nprow = pval( j )
192  npcol = qval( j )
193 *
194 * Make sure grid information is correct
195 *
196  ierr( 1 ) = 0
197  IF( nprow.LT.1 ) THEN
198  IF( iam.EQ.0 )
199  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
200  ierr( 1 ) = 1
201  ELSE IF( npcol.LT.1 ) THEN
202  IF( iam.EQ.0 )
203  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
204  ierr( 1 ) = 1
205  ELSE IF( nprow*npcol.GT.nprocs ) THEN
206  IF( iam.EQ.0 )
207  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
208  ierr( 1 ) = 1
209  END IF
210 *
211  IF( ierr( 1 ).GT.0 ) THEN
212  IF( iam.EQ.0 )
213  $ WRITE( nout, fmt = 9997 ) 'grid'
214  kskip = kskip + 1
215  GO TO 30
216  END IF
217 *
218 * Define process grid
219 *
220  CALL blacs_get( -1, 0, ictxt )
221  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
222  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
223 *
224 * Go to bottom of loop if this case doesn't use my process
225 *
226  IF( myrow.GE.nprow .OR. mycol.GE.npcol )
227  $ GO TO 30
228 *
229  DO 20 k = 1, nmat
230 *
231  m = mval( k )
232  n = nval( k )
233 *
234 * Make sure matrix information is correct
235 *
236  ierr(1) = 0
237  IF( m.LT.1 ) THEN
238  IF( iam.EQ.0 )
239  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
240  ierr( 1 ) = 1
241  ELSE IF( n.LT.1 ) THEN
242  IF( iam.EQ.0 )
243  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
244  ierr( 1 ) = 1
245  END IF
246 *
247 * Make sure no one had error
248 *
249  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
250 *
251  IF( ierr( 1 ).GT.0 ) THEN
252  IF( iam.EQ.0 )
253  $ WRITE( nout, fmt = 9997 ) 'matrix'
254  kskip = kskip + 1
255  GO TO 20
256  END IF
257 *
258 * Loop over different blocking sizes
259 *
260  DO 10 l = 1, nnb
261 *
262  mb = mbval( l )
263  nb = nbval( l )
264 *
265 * Make sure mb is legal
266 *
267  ierr( 1 ) = 0
268  IF( mb.LT.1 ) THEN
269  ierr( 1 ) = 1
270  IF( iam.EQ.0 )
271  $ WRITE( nout, fmt = 9999 ) 'MB', 'MB', mb
272  END IF
273 *
274 * Check all processes for an error
275 *
276  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
277  $ 0 )
278 *
279  IF( ierr( 1 ).GT.0 ) THEN
280  IF( iam.EQ.0 )
281  $ WRITE( nout, fmt = 9997 ) 'MB'
282  kskip = kskip + 1
283  GO TO 10
284  END IF
285 *
286 * Make sure nb is legal
287 *
288  ierr( 1 ) = 0
289  IF( nb.LT.1 ) THEN
290  ierr( 1 ) = 1
291  IF( iam.EQ.0 )
292  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
293  END IF
294 *
295 * Check all processes for an error
296 *
297  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
298  $ 0 )
299 *
300  IF( ierr( 1 ).GT.0 ) THEN
301  IF( iam.EQ.0 )
302  $ WRITE( nout, fmt = 9997 ) 'NB'
303  kskip = kskip + 1
304  GO TO 10
305  END IF
306 *
307 * Padding constants
308 *
309  mp = numroc( m, mb, myrow, 0, nprow )
310  nq = numroc( n, nb, mycol, 0, npcol )
311  mnp = numroc( min( m, n ), mb, myrow, 0, nprow )
312  mnq = numroc( min( m, n ), nb, mycol, 0, npcol )
313  IF( check ) THEN
314  iprepad = max( mb, mp )
315  imidpad = nb
316  ipostpad = max( nb, nq )
317  ELSE
318  iprepad = 0
319  imidpad = 0
320  ipostpad = 0
321  END IF
322 *
323 * Initialize the array descriptor for the matrix A
324 *
325  CALL descinit( desca, m, n, mb, nb, 0, 0, ictxt,
326  $ max( 1, mp ) + imidpad, ierr( 1 ) )
327 *
328 * Check all processes for an error
329 *
330  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
331  $ 0 )
332 *
333  IF( ierr( 1 ).LT.0 ) THEN
334  IF( iam.EQ.0 )
335  $ WRITE( nout, fmt = 9997 ) 'descriptor'
336  kskip = kskip + 1
337  GO TO 10
338  END IF
339 *
340 * Assign pointers into MEM for ScaLAPACK arrays, A is
341 * allocated starting at position MEM( IPREPAD+1 )
342 *
343  ipa = iprepad+1
344  iptau = ipa + desca( lld_ ) * nq + ipostpad + iprepad
345 *
346  IF( lsamen( 2, fact, 'QR' ) ) THEN
347 *
348  ltau = mnq
349  ipw = iptau + ltau + ipostpad + iprepad
350 *
351 * Figure the amount of workspace required by the QR
352 * factorization
353 *
354  lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )
355  workfct = lwork + ipostpad
356  worksiz = workfct
357 *
358  IF( check ) THEN
359 *
360 * Figure the amount of workspace required by the
361 * checking routines PCLAFCHK, PCGEQRRV and
362 * PCLANGE
363 *
364  worksiz = lwork + mp*desca( nb_ ) + ipostpad
365 *
366  END IF
367 *
368  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
369 *
370  ltau = nq
371  ipw = iptau + ltau + ipostpad + iprepad
372 *
373 * Figure the amount of workspace required by the QL
374 * factorization
375 *
376  lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )
377  workfct = lwork + ipostpad
378  worksiz = workfct
379 *
380  IF( check ) THEN
381 *
382 * Figure the amount of workspace required by the
383 * checking routines PCLAFCHK, PCGEQLRV and
384 * PCLANGE
385 *
386  worksiz = lwork + mp*desca( nb_ ) + ipostpad
387 *
388  END IF
389 *
390  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
391 *
392  ltau = mnp
393  ipw = iptau + ltau + ipostpad + iprepad
394 *
395 * Figure the amount of workspace required by the LQ
396 * factorization
397 *
398  lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
399  workfct = lwork + ipostpad
400  worksiz = workfct
401 *
402  IF( check ) THEN
403 *
404 * Figure the amount of workspace required by the
405 * checking routines PCLAFCHK, PCGELQRV and
406 * PCLANGE
407 *
408  worksiz = lwork +
409  $ max( mp*desca( nb_ ), nq*desca( mb_ )
410  $ ) + ipostpad
411 *
412  END IF
413 *
414  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
415 *
416  ltau = mp
417  ipw = iptau + ltau + ipostpad + iprepad
418 *
419 * Figure the amount of workspace required by the QR
420 * factorization
421 *
422  lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
423  workfct = lwork + ipostpad
424  worksiz = workfct
425 *
426  IF( check ) THEN
427 *
428 * Figure the amount of workspace required by the
429 * checking routines PCLAFCHK, PCGERQRV and
430 * PCLANGE
431 *
432  worksiz = lwork +
433  $ max( mp*desca( nb_ ), nq*desca( mb_ )
434  $ ) + ipostpad
435 *
436  END IF
437 *
438  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
439 *
440  ltau = mnq
441  ippiv = iptau + ltau + ipostpad + iprepad
442  lipiv = iceil( intgsz*nq, cplxsz )
443  ipw = ippiv + lipiv + ipostpad + iprepad
444 *
445 * Figure the amount of workspace required by the
446 * factorization i.e from IPW on.
447 *
448  lwork = max( 3, mp + max( 1, nq ) )
449  workfct = lwork + ipostpad
450  lrwork = max( 1, 2 * nq )
451  workrfct = iceil( lrwork*realsz, cplxsz ) +
452  $ ipostpad
453  iprw = ipw + workfct + iprepad
454  worksiz = workfct + iprepad + workrfct
455 *
456  IF( check ) THEN
457 *
458 * Figure the amount of workspace required by the
459 * checking routines PCLAFCHK, PCGEQRRV,
460 * PCLANGE.
461 *
462  worksiz = max( worksiz - ipostpad,
463  $ desca( nb_ )*( 2*mp + nq + desca( nb_ ) ) ) +
464  $ ipostpad
465  END IF
466 *
467  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
468 *
469  ltau = mp
470  ipw = iptau + ltau + ipostpad + iprepad
471 *
472 * Figure the amount of workspace required by the TZ
473 * factorization
474 *
475  lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
476  workfct = lwork + ipostpad
477  worksiz = workfct
478 *
479  IF( check ) THEN
480 *
481 * Figure the amount of workspace required by the
482 * checking routines PCLAFCHK, PCTZRZRV and
483 * PCLANGE
484 *
485  worksiz = lwork +
486  $ max( mp*desca( nb_ ), nq*desca( mb_ )
487  $ ) + ipostpad
488 *
489  END IF
490 *
491  END IF
492 *
493 * Check for adequate memory for problem size
494 *
495  ierr( 1 ) = 0
496  IF( ipw+worksiz.GT.memsiz ) THEN
497  IF( iam.EQ.0 )
498  $ WRITE( nout, fmt = 9996 )
499  $ fact // ' factorization',
500  $ ( ipw+worksiz )*cplxsz
501  ierr( 1 ) = 1
502  END IF
503 *
504 * Check all processes for an error
505 *
506  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
507  $ 0 )
508 *
509  IF( ierr( 1 ).GT.0 ) THEN
510  IF( iam.EQ.0 )
511  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
512  kskip = kskip + 1
513  GO TO 10
514  END IF
515 *
516 * Generate the matrix A
517 *
518  CALL pcmatgen( ictxt, 'N', 'N', desca( m_ ),
519  $ desca( n_ ), desca( mb_ ),
520  $ desca( nb_ ), mem( ipa ),
521  $ desca( lld_ ), desca( rsrc_ ),
522  $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
523  $ myrow, mycol, nprow, npcol )
524 *
525 * Need the Infinity of A for checking
526 *
527  IF( check ) THEN
528  CALL pcfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
529  $ desca( lld_ ), iprepad, ipostpad,
530  $ padval )
531  IF( lsamen( 2, fact, 'QP' ) ) THEN
532  CALL pcfillpad( ictxt, lipiv, 1,
533  $ mem( ippiv-iprepad ), lipiv,
534  $ iprepad, ipostpad, padval )
535  END IF
536  CALL pcfillpad( ictxt, ltau, 1,
537  $ mem( iptau-iprepad ), ltau,
538  $ iprepad, ipostpad, padval )
539  CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
540  $ mem( ipw-iprepad ),
541  $ worksiz-ipostpad,
542  $ iprepad, ipostpad, padval )
543  anorm = pclange( 'I', m, n, mem( ipa ), 1, 1,
544  $ desca, mem( ipw ) )
545  CALL pcchekpad( ictxt, 'PCLANGE', mp, nq,
546  $ mem( ipa-iprepad ), desca( lld_ ),
547  $ iprepad, ipostpad, padval )
548  CALL pcchekpad( ictxt, 'PCLANGE',
549  $ worksiz-ipostpad, 1,
550  $ mem( ipw-iprepad ),
551  $ worksiz-ipostpad, iprepad,
552  $ ipostpad, padval )
553  IF( lsamen( 2, fact, 'QP' ) ) THEN
554  CALL pcfillpad( ictxt, workrfct-ipostpad, 1,
555  $ mem( iprw-iprepad ),
556  $ workrfct-ipostpad,
557  $ iprepad, ipostpad, padval )
558  END IF
559  CALL pcfillpad( ictxt, workfct-ipostpad, 1,
560  $ mem( ipw-iprepad ),
561  $ workfct-ipostpad,
562  $ iprepad, ipostpad, padval )
563  END IF
564 *
565  CALL slboot()
566  CALL blacs_barrier( ictxt, 'All' )
567 *
568 * Perform QR factorizations
569 *
570  IF( lsamen( 2, fact, 'QR' ) ) THEN
571  CALL sltimer( 1 )
572  CALL pcgeqrf( m, n, mem( ipa ), 1, 1, desca,
573  $ mem( iptau ), mem( ipw ), lwork,
574  $ info )
575  CALL sltimer( 1 )
576  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
577  CALL sltimer( 1 )
578  CALL pcgeqlf( m, n, mem( ipa ), 1, 1, desca,
579  $ mem( iptau ), mem( ipw ), lwork,
580  $ info )
581  CALL sltimer( 1 )
582  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
583  CALL sltimer( 1 )
584  CALL pcgelqf( m, n, mem( ipa ), 1, 1, desca,
585  $ mem( iptau ), mem( ipw ), lwork,
586  $ info )
587  CALL sltimer( 1 )
588  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
589  CALL sltimer( 1 )
590  CALL pcgerqf( m, n, mem( ipa ), 1, 1, desca,
591  $ mem( iptau ), mem( ipw ), lwork,
592  $ info )
593  CALL sltimer( 1 )
594  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
595  CALL sltimer( 1 )
596  CALL pcgeqpf( m, n, mem( ipa ), 1, 1, desca,
597  $ mem( ippiv ), mem( iptau ),
598  $ mem( ipw ), lwork, mem( iprw ),
599  $ lrwork, info )
600  CALL sltimer( 1 )
601  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
602  CALL sltimer( 1 )
603  IF( n.GE.m )
604  $ CALL pctzrzf( m, n, mem( ipa ), 1, 1, desca,
605  $ mem( iptau ), mem( ipw ), lwork,
606  $ info )
607  CALL sltimer( 1 )
608  END IF
609 *
610  IF( check ) THEN
611 *
612 * Check for memory overwrite in factorization
613 *
614  CALL pcchekpad( ictxt, rout, mp, nq,
615  $ mem( ipa-iprepad ), desca( lld_ ),
616  $ iprepad, ipostpad, padval )
617  CALL pcchekpad( ictxt, rout, ltau, 1,
618  $ mem( iptau-iprepad ), ltau,
619  $ iprepad, ipostpad, padval )
620  IF( lsamen( 2, fact, 'QP' ) ) THEN
621  CALL pcchekpad( ictxt, rout, lipiv, 1,
622  $ mem( ippiv-iprepad ), lipiv,
623  $ iprepad, ipostpad, padval )
624  CALL pcchekpad( ictxt, rout, workrfct-ipostpad,
625  $ 1, mem( iprw-iprepad ),
626  $ workrfct-ipostpad,
627  $ iprepad, ipostpad, padval )
628  END IF
629  CALL pcchekpad( ictxt, rout, workfct-ipostpad, 1,
630  $ mem( ipw-iprepad ),
631  $ workfct-ipostpad, iprepad,
632  $ ipostpad, padval )
633  CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
634  $ mem( ipw-iprepad ),
635  $ worksiz-ipostpad,
636  $ iprepad, ipostpad, padval )
637 *
638  IF( lsamen( 2, fact, 'QR' ) ) THEN
639 *
640 * Compute residual = ||A-Q*R|| / (||A||*N*eps)
641 *
642  CALL pcgeqrrv( m, n, mem( ipa ), 1, 1, desca,
643  $ mem( iptau ), mem( ipw ) )
644  CALL pclafchk( 'No', 'No', m, n, mem( ipa ), 1,
645  $ 1, desca, iaseed, anorm, fresid,
646  $ mem( ipw ) )
647  ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
648 *
649 * Compute residual = ||A-Q*L|| / (||A||*N*eps)
650 *
651  CALL pcgeqlrv( m, n, mem( ipa ), 1, 1, desca,
652  $ mem( iptau ), mem( ipw ) )
653  CALL pclafchk( 'No', 'No', m, n, mem( ipa ), 1,
654  $ 1, desca, iaseed, anorm, fresid,
655  $ mem( ipw ) )
656  ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
657 *
658 * Compute residual = ||A-L*Q|| / (||A||*N*eps)
659 *
660  CALL pcgelqrv( m, n, mem( ipa ), 1, 1, desca,
661  $ mem( iptau ), mem( ipw ) )
662  CALL pclafchk( 'No', 'No', m, n, mem( ipa ), 1,
663  $ 1, desca, iaseed, anorm, fresid,
664  $ mem( ipw ) )
665  ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
666 *
667 * Compute residual = ||A-R*Q|| / (||A||*N*eps)
668 *
669  CALL pcgerqrv( m, n, mem( ipa ), 1, 1, desca,
670  $ mem( iptau ), mem( ipw ) )
671  CALL pclafchk( 'No', 'No', m, n, mem( ipa ), 1,
672  $ 1, desca, iaseed, anorm, fresid,
673  $ mem( ipw ) )
674  ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
675 *
676 * Compute residual = ||AP-Q*R|| / (||A||*N*eps)
677 *
678  CALL pcgeqrrv( m, n, mem( ipa ), 1, 1, desca,
679  $ mem( iptau ), mem( ipw ) )
680  ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
681 *
682 * Compute residual = ||A-T*Z|| / (||A||*N*eps)
683 *
684  IF( n.GE.m ) THEN
685  CALL pctzrzrv( m, n, mem( ipa ), 1, 1, desca,
686  $ mem( iptau ), mem( ipw ) )
687  END IF
688  CALL pclafchk( 'No', 'No', m, n, mem( ipa ), 1,
689  $ 1, desca, iaseed, anorm, fresid,
690  $ mem( ipw ) )
691  END IF
692 *
693 * Check for memory overwrite
694 *
695  CALL pcchekpad( ictxt, routchk, mp, nq,
696  $ mem( ipa-iprepad ), desca( lld_ ),
697  $ iprepad, ipostpad, padval )
698  CALL pcchekpad( ictxt, routchk, ltau, 1,
699  $ mem( iptau-iprepad ), ltau,
700  $ iprepad, ipostpad, padval )
701  CALL pcchekpad( ictxt, routchk, worksiz-ipostpad,
702  $ 1, mem( ipw-iprepad ),
703  $ worksiz-ipostpad, iprepad,
704  $ ipostpad, padval )
705 *
706  IF( lsamen( 2, fact, 'QP' ) ) THEN
707 *
708  CALL pcqppiv( m, n, mem( ipa ), 1, 1, desca,
709  $ mem( ippiv ) )
710 *
711 * Check for memory overwrite
712 *
713  CALL pcchekpad( ictxt, 'PCQPPIV', mp, nq,
714  $ mem( ipa-iprepad ),
715  $ desca( lld_ ),
716  $ iprepad, ipostpad, padval )
717  CALL pcchekpad( ictxt, 'PCQPPIV', lipiv, 1,
718  $ mem( ippiv-iprepad ), lipiv,
719  $ iprepad, ipostpad, padval )
720 *
721  CALL pclafchk( 'No', 'No', m, n, mem( ipa ), 1,
722  $ 1, desca, iaseed, anorm, fresid,
723  $ mem( ipw ) )
724 *
725 * Check for memory overwrite
726 *
727  CALL pcchekpad( ictxt, 'PCLAFCHK', mp, nq,
728  $ mem( ipa-iprepad ),
729  $ desca( lld_ ),
730  $ iprepad, ipostpad, padval )
731  CALL pcchekpad( ictxt, 'PCLAFCHK',
732  $ worksiz-ipostpad, 1,
733  $ mem( ipw-iprepad ),
734  $ worksiz-ipostpad, iprepad,
735  $ ipostpad, padval )
736  END IF
737 *
738 * Test residual and detect NaN result
739 *
740  IF( lsamen( 2, fact, 'TZ' ) .AND. n.LT.m ) THEN
741  kskip = kskip + 1
742  passed = 'BYPASS'
743  ELSE
744  IF( fresid.LE.thresh .AND.
745  $ (fresid-fresid).EQ.0.0e+0 ) THEN
746  kpass = kpass + 1
747  passed = 'PASSED'
748  ELSE
749  kfail = kfail + 1
750  passed = 'FAILED'
751  END IF
752  END IF
753 *
754  ELSE
755 *
756 * Don't perform the checking, only timing
757 *
758  kpass = kpass + 1
759  fresid = fresid - fresid
760  passed = 'BYPASS'
761 *
762  END IF
763 *
764 * Gather maximum of all CPU and WALL clock timings
765 *
766  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1, wtime )
767  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1, ctime )
768 *
769 * Print results
770 *
771  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
772 *
773  minmn = min( m, n )
774  maxmn = max( m, n )
775 *
776  IF( lsamen( 2, fact, 'TZ' ) ) THEN
777  IF( m.GE.n ) THEN
778  nops = 0.0d+0
779  ELSE
780 *
781 * 9 ( M^2 N - M^3 ) + 13 M N - M^2 for
782 * complete unitary factorization (M <= N).
783 *
784  nops = 9.0d+0 * (
785  $ dble( n )*( dble( m )**2 ) -
786  $ dble( m )**3 ) +
787  $ 13.0d+0*dble( n )*dble( m ) -
788  $ dble( m )**2
789  END IF
790 *
791  ELSE
792 *
793 * 8 M N^2 - 8/3 N^2 + 6 M N + 8 N^2 for QR type
794 * factorization when M >= N.
795 *
796  nops = 8.0d+0 * ( dble( minmn )**2 ) *
797  $ ( dble( maxmn )-dble( minmn ) / 3.0d+0 ) +
798  $ ( 6.0d+0 * dble( maxmn ) +
799  $ 8.0d+0 * dble( minmn ) ) *
800  $ dble( minmn )
801  END IF
802 *
803 * Print WALL time
804 *
805  IF( wtime( 1 ).GT.0.0d+0 ) THEN
806  tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
807  ELSE
808  tmflops = 0.0d+0
809  END IF
810  IF( wtime( 1 ).GE.0.0d+0 )
811  $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, mb, nb,
812  $ nprow, npcol, wtime( 1 ), tmflops,
813  $ passed, fresid
814 *
815 * Print CPU time
816 *
817  IF( ctime( 1 ).GT.0.0d+0 ) THEN
818  tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
819  ELSE
820  tmflops = 0.0d+0
821  END IF
822  IF( ctime( 1 ).GE.0.0d+0 )
823  $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, mb, nb,
824  $ nprow, npcol, ctime( 1 ), tmflops,
825  $ passed, fresid
826 *
827  END IF
828 *
829  10 CONTINUE
830 *
831  20 CONTINUE
832 *
833  CALL blacs_gridexit( ictxt )
834 *
835  30 CONTINUE
836 *
837  40 CONTINUE
838 *
839 * Print out ending messages and close output file
840 *
841  IF( iam.EQ.0 ) THEN
842  ktests = kpass + kfail + kskip
843  WRITE( nout, fmt = * )
844  WRITE( nout, fmt = 9992 ) ktests
845  IF( check ) THEN
846  WRITE( nout, fmt = 9991 ) kpass
847  WRITE( nout, fmt = 9989 ) kfail
848  ELSE
849  WRITE( nout, fmt = 9990 ) kpass
850  END IF
851  WRITE( nout, fmt = 9988 ) kskip
852  WRITE( nout, fmt = * )
853  WRITE( nout, fmt = * )
854  WRITE( nout, fmt = 9987 )
855  IF( nout.NE.6 .AND. nout.NE.0 )
856  $ CLOSE ( nout )
857  END IF
858 *
859  CALL blacs_exit( 0 )
860 *
861  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
862  $ '; It should be at least 1' )
863  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
864  $ i4 )
865  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
866  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
867  $ i11 )
868  9995 FORMAT( 'TIME M N MB NB P Q Fact Time ',
869  $ ' MFLOPS CHECK Residual' )
870  9994 FORMAT( '---- ------ ------ --- --- ----- ----- --------- ',
871  $ '----------- ------ --------' )
872  9993 FORMAT( a4, 1x, i6, 1x, i6, 1x, i3, 1x, i3, 1x, i5, 1x, i5, 1x,
873  $ f9.2, 1x, f11.2, 1x, a6, 2x, g8.1 )
874  9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
875  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
876  9990 FORMAT( i5, ' tests completed without checking.' )
877  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
878  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
879  9987 FORMAT( 'END OF TESTS.' )
880  9986 FORMAT( a )
881 *
882  stop
883 *
884 * End of PCQRDRIVER
885 *
886  END
887 *
888  SUBROUTINE pcqppiv( M, N, A, IA, JA, DESCA, IPIV )
889 *
890 * -- ScaLAPACK routine (version 1.7) --
891 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
892 * and University of California, Berkeley.
893 * May 1, 1997
894 *
895 * .. Scalar Arguments ..
896  INTEGER IA, JA, M, N
897 * ..
898 * .. Array Arguments ..
899  INTEGER DESCA( * ), IPIV( * )
900  COMPLEX A( * )
901 * ..
902 *
903 * Purpose
904 * =======
905 *
906 * PCQPPIV applies to sub( A ) = A(IA:IA+M-1,JA:JA+N-1) the pivots
907 * returned by PCGEQPF in reverse order for checking purposes.
908 *
909 * Notes
910 * =====
911 *
912 * Each global data object is described by an associated description
913 * vector. This vector stores the information required to establish
914 * the mapping between an object element and its corresponding process
915 * and memory location.
916 *
917 * Let A be a generic term for any 2D block cyclicly distributed array.
918 * Such a global array has an associated description vector DESCA.
919 * In the following comments, the character _ should be read as
920 * "of the global array".
921 *
922 * NOTATION STORED IN EXPLANATION
923 * --------------- -------------- --------------------------------------
924 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
925 * DTYPE_A = 1.
926 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
927 * the BLACS process grid A is distribu-
928 * ted over. The context itself is glo-
929 * bal, but the handle (the integer
930 * value) may vary.
931 * M_A (global) DESCA( M_ ) The number of rows in the global
932 * array A.
933 * N_A (global) DESCA( N_ ) The number of columns in the global
934 * array A.
935 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
936 * the rows of the array.
937 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
938 * the columns of the array.
939 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
940 * row of the array A is distributed.
941 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
942 * first column of the array A is
943 * distributed.
944 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
945 * array. LLD_A >= MAX(1,LOCr(M_A)).
946 *
947 * Let K be the number of rows or columns of a distributed matrix,
948 * and assume that its process grid has dimension p x q.
949 * LOCr( K ) denotes the number of elements of K that a process
950 * would receive if K were distributed over the p processes of its
951 * process column.
952 * Similarly, LOCc( K ) denotes the number of elements of K that a
953 * process would receive if K were distributed over the q processes of
954 * its process row.
955 * The values of LOCr() and LOCc() may be determined via a call to the
956 * ScaLAPACK tool function, NUMROC:
957 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
958 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
959 * An upper bound for these quantities may be computed by:
960 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
961 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
962 *
963 * Arguments
964 * =========
965 *
966 * M (global input) INTEGER
967 * The number of rows to be operated on, i.e. the number of rows
968 * of the distributed submatrix sub( A ). M >= 0.
969 *
970 * N (global input) INTEGER
971 * The number of columns to be operated on, i.e. the number of
972 * columns of the distributed submatrix sub( A ). N >= 0.
973 *
974 * A (local input/local output) COMPLEX pointer into the
975 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
976 * On entry, the local pieces of the M-by-N distributed matrix
977 * sub( A ) which is to be permuted. On exit, the local pieces
978 * of the distributed permuted submatrix sub( A ) * Inv( P ).
979 *
980 * IA (global input) INTEGER
981 * The row index in the global array A indicating the first
982 * row of sub( A ).
983 *
984 * JA (global input) INTEGER
985 * The column index in the global array A indicating the
986 * first column of sub( A ).
987 *
988 * DESCA (global and local input) INTEGER array of dimension DLEN_.
989 * The array descriptor for the distributed matrix A.
990 *
991 * IPIV (local input) INTEGER array, dimension LOCc(JA+N-1).
992 * On exit, if IPIV(I) = K, the local i-th column of sub( A )*P
993 * was the global K-th column of sub( A ). IPIV is tied to the
994 * distributed matrix A.
995 *
996 * =====================================================================
997 *
998 * .. Parameters ..
999  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
1000  $ LLD_, MB_, M_, NB_, N_, RSRC_
1001  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
1002  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
1003  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
1004 * ..
1005 * .. Local Scalars ..
1006  INTEGER IACOL, ICOFFA, ICTXT, IITMP, IPVT, IPCOL,
1007  $ IPROW, ITMP, J, JJ, JJA, KK, MYCOL, MYROW,
1008  $ NPCOL, NPROW, NQ
1009 * ..
1010 * .. External Subroutines ..
1011  EXTERNAL blacs_gridinfo, igebr2d, igebs2d, igerv2d,
1012  $ igesd2d, igamn2d, infog1l, pcswap
1013 * ..
1014 * .. External Functions ..
1015  INTEGER INDXL2G, NUMROC
1016  EXTERNAL indxl2g, numroc
1017 * ..
1018 * .. Intrinsic Functions ..
1019  INTRINSIC min, mod
1020 * ..
1021 * .. Executable Statements ..
1022 *
1023 * Get grid parameters
1024 *
1025  ictxt = desca( ctxt_ )
1026  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
1027  CALL infog1l( ja, desca( nb_ ), npcol, mycol, desca( csrc_ ), jja,
1028  $ iacol )
1029  icoffa = mod( ja-1, desca( nb_ ) )
1030  nq = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
1031  IF( mycol.EQ.iacol )
1032  $ nq = nq - icoffa
1033 *
1034  DO 20 j = ja, ja+n-2
1035 *
1036  ipvt = ja+n-1
1037  itmp = ja+n
1038 *
1039 * Find first the local minimum candidate for pivoting
1040 *
1041  CALL infog1l( j, desca( nb_ ), npcol, mycol, desca( csrc_ ),
1042  $ jj, iacol )
1043  DO 10 kk = jj, jja+nq-1
1044  IF( ipiv( kk ).LT.ipvt )THEN
1045  iitmp = kk
1046  ipvt = ipiv( kk )
1047  END IF
1048  10 CONTINUE
1049 *
1050 * Find the global minimum pivot
1051 *
1052  CALL igamn2d( ictxt, 'Rowwise', ' ', 1, 1, ipvt, 1, iprow,
1053  $ ipcol, 1, -1, mycol )
1054 *
1055 * Broadcast the corresponding index to the other process columns
1056 *
1057  IF( mycol.EQ.ipcol ) THEN
1058  itmp = indxl2g( iitmp, desca( nb_ ), mycol, desca( csrc_ ),
1059  $ npcol )
1060  CALL igebs2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1 )
1061  IF( ipcol.NE.iacol ) THEN
1062  CALL igerv2d( ictxt, 1, 1, ipiv( iitmp ), 1, myrow,
1063  $ iacol )
1064  ELSE
1065  IF( mycol.EQ.iacol )
1066  $ ipiv( iitmp ) = ipiv( jj )
1067  END IF
1068  ELSE
1069  CALL igebr2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1, myrow,
1070  $ ipcol )
1071  IF( mycol.EQ.iacol .AND. ipcol.NE.iacol )
1072  $ CALL igesd2d( ictxt, 1, 1, ipiv( jj ), 1, myrow, ipcol )
1073  END IF
1074 *
1075 * Swap the columns of A
1076 *
1077  CALL pcswap( m, a, ia, itmp, desca, 1, a, ia, j, desca, 1 )
1078 *
1079  20 CONTINUE
1080 *
1081 * End of PCQPPIV
1082 *
1083  END
pcgerqf
subroutine pcgerqf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pcgerqf.f:3
pcgeqrrv
subroutine pcgeqrrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: pcgeqrrv.f:2
pcqrinfo
subroutine pcqrinfo(SUMMRY, NOUT, NFACT, FACTOR, LDFACT, NMAT, MVAL, LDMVAL, NVAL, LDNVAL, NNB, MBVAL, LDMBVAL, NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pcqrinfo.f:6
pclafchk
subroutine pclafchk(AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, ANORM, FRESID, WORK)
Definition: pclafchk.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
pcgeqpf
subroutine pcgeqpf(M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, LWORK, RWORK, LRWORK, INFO)
Definition: pcgeqpf.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
pctzrzrv
subroutine pctzrzrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: pctzrzrv.f:2
pcgelqrv
subroutine pcgelqrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: pcgelqrv.f:2
pcgelqf
subroutine pcgelqf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pcgelqf.f:3
pcgerqrv
subroutine pcgerqrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: pcgerqrv.f:2
pcchekpad
subroutine pcchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcchekpad.f:3
pcmatgen
subroutine pcmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pcmatgen.f:4
pcgeqlf
subroutine pcgeqlf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pcgeqlf.f:3
pcqppiv
subroutine pcqppiv(M, N, A, IA, JA, DESCA, IPIV)
Definition: pcqrdriver.f:889
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pclange
real function pclange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pclange.f:3
pcfillpad
subroutine pcfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pcfillpad.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
lsamen
logical function lsamen(N, CA, CB)
Definition: pblastst.f:1457
pcgeqlrv
subroutine pcgeqlrv(M, N, A, IA, JA, DESCA, TAU, WORK)
Definition: pcgeqlrv.f:2
pcgeqrf
subroutine pcgeqrf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pcgeqrf.f:3
pcqrdriver
program pcqrdriver
Definition: pcqrdriver.f:1
pctzrzf
subroutine pctzrzf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pctzrzf.f:3
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2