14 LOGICAL balance, comphess, compresi,
16 LOGICAL debug, prn, timesteps, barr,
18 INTEGER slv_min, slv_max
19 parameter( debug = .false.,
28 $ slv_min = 2, slv_max = 2,
29 $ uni_lapack = .true. )
30 INTEGER n, nb, arsrc, acsrc
35 $ arsrc = 0, acsrc = 0 )
36 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dt_,
37 $ lld_, mb_, m_, nb_, n_, rsrc_
38 parameter( block_cyclic_2d = 1, dlen_ = 9, dt_ = 1,
39 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
40 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
41 INTEGER dpalloc, intallc
42 INTEGER dpsiz, intsz, nout, izero
43 parameter( dpsiz = 8, dpalloc = 8 000 000,
44 $ intsz = 4, intallc = 8 000 000,
45 $ nout = 6, izero = 0)
47 parameter( zero = 0.0, one = 1.0, two = 2.0 )
50 INTEGER ictxt, iam, nprow, npcol, myrow, mycol,
51 $ sys_nprocs, nprocs, arows, acols, temp_ictxt
53 INTEGER info, ktop, kbot, ilo, ihi, i
54 INTEGER ipa, ipacpy, ipq, wr1, wi1, wr2, wi2, ipw1,
56 INTEGER totit, sweeps, totns, hess
58 DOUBLE PRECISION stamp, tottime, t_ba, t_gen, t_hs, t_sch, t_qr,
59 $ t_res, itpereig, swpspeig, nspeig, speedup,
61 REAL rnorm, anorm, r1, orth, o1, o2, dpdum, elem1,
67 INTEGER desca( dlen_ ), descq( dlen_ ), descvec( dlen_ )
69 REAL,
ALLOCATABLE :: mem(:)
70 INTEGER,
ALLOCATABLE :: imem(:)
73 INTRINSIC int, float, sqrt,
max,
min
78 DOUBLE PRECISION mpi_wtime
79 EXTERNAL blacs_pinfo, blacs_get, blacs_gridinit,
80 $ blacs_gridinfo, blacs_gridexit, blacs_exit
82 EXTERNAL sgebal, sgehrd
89 CALL blacs_pinfo( iam, sys_nprocs )
90 nprow = int( sqrt( float(sys_nprocs) ) )
91 npcol = sys_nprocs / nprow
92 CALL blacs_get( 0, 0, ictxt )
93 CALL blacs_gridinit( ictxt,
'2D', nprow, npcol )
94 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
97 IF ( ictxt.LT.0 )
GO TO 777
103 tottime = mpi_wtime()
111 ALLOCATE ( mem( dpalloc ), stat = info )
113 WRITE(*,*)
'% Could not allocate MEM. INFO = ', info
116 ALLOCATE ( imem( intallc ), stat = info )
118 WRITE(*,*)
'% Could not allocate IMEM. INFO = ', info
121 mem( 1:dpalloc ) = zero
122 imem( 1:intallc ) = izero
126 eps =
pslamch( ictxt,
'Epsilon' )
132 WRITE(*,*)
'ScaLAPACK Test for PSHSEQR'
134 WRITE(*,*)
'epsilon = ', eps
135 WRITE(*,*)
'threshold = ', thresh
137 WRITE(*,*)
'Residual and Orthogonality Residual computed by:'
139 WRITE(*,*)
'Residual = ',
140 $
' || T - Q^T*A*Q ||_F / ( ||A||_F * eps * sqrt(N) )'
142 WRITE(*,*)
'Orthogonality = ',
143 $
' MAX( || I - Q^T*Q ||_F, || I - Q*Q^T ||_F ) / ',
147 $
'Test passes if both residuals are less then threshold'
149 WRITE( nout, fmt = 9995 )
150 WRITE( nout, fmt = 9994 )
157 DO solver = slv_max, slv_min, -1
169 arows =
numroc( n, nb, myrow, 0, nprow )
170 acols =
numroc( n, nb, mycol, 0, npcol )
174 IF( debug )
WRITE(*,*)
'% #', iam,
': Set up descriptors...'
175 IF( barr )
CALL blacs_barrier(ictxt,
'A')
176 CALL descinit( desca, n, n, nb, nb,
min(arsrc,nprow-1),
177 $
min(npcol-1,acsrc), temp_ictxt,
max(1, arows), info )
178 IF ( info.NE.0 )
THEN
179 WRITE(*,*)
"% DESCINIT DESCA failed, INFO =", info
182 CALL descinit( descq, n, n, nb, nb,
min(arsrc,nprow-1),
183 $
min(npcol-1,acsrc), temp_ictxt,
max(1, arows), info )
184 IF ( info.NE.0 )
THEN
185 WRITE(*,*)
"% DESCINIT DESCQ failed, INFO =", info
188 CALL descinit( descvec, n, 1, n, 1,
min(arsrc,nprow-1),
189 $
min(npcol-1,acsrc), temp_ictxt, n, info )
190 IF ( info.NE.0 )
THEN
191 WRITE(*,*)
"% DESCINIT DESCVEC failed, INFO =", info
197 IF( debug )
WRITE(*,*)
'% #', iam,
': Assign pointers...'
199 ipacpy = ipa + desca( lld_ ) * acols
200 ipq = ipacpy + desca( lld_ ) * acols
201 wr1 = ipq + descq( lld_ ) * acols
206 ipw2 = ipw1 + desca( lld_ ) * acols
207 IF( debug )
WRITE(*,*)
'% (IPW2,DPALLOC):', ipw2, dpalloc
209 IF( ipw2+desca(lld_)*acols .GT. dpalloc+1 )
THEN
210 WRITE(*,*)
'% Not enough DP memory!'
220 IF( barr )
CALL blacs_barrier(ictxt,
'A')
221 CALL pslaset(
'All over', n, n, zero, one, mem(ipq), 1, 1,
223 CALL psmatgen2( temp_ictxt,
'Random',
'NoDiagDominant',
224 $ n, n, nb, nb, mem(ipa), desca( lld_ ), 0, 0, 7, 0,
225 $ arows, 0, acols, myrow, mycol, nprow, npcol )
226 IF( .NOT. comphess )
THEN
227 CALL pslaset(
'Lower triangular', n-2, n-2, zero, zero,
228 $ mem(ipa), 3, 1, desca )
229 CALL pslaset(
'All over', n, n, zero, one, mem(ipq),
232 $
CALL pslaset(
'Lower triangular', ktop-1, ktop-1,
233 $ zero, zero, mem(ipa), 2, 1, descq )
235 $
CALL pslaset(
'Lower triangular', n-kbot, n-kbot,
236 $ zero, zero, mem(ipa), kbot+1, kbot, descq )
241 IF( barr )
CALL blacs_barrier(ictxt,
'A')
243 IF( comphess .AND. balance )
THEN
244 IF( nprocs.EQ.1 .AND. solver.NE.2 .AND. uni_lapack )
THEN
245 IF( debug )
WRITE(*,*)
'% #', iam,
': == dgebal =='
246 CALL sgebal(
'Both', n, mem(ipa), desca(lld_), ilo,
248 IF ( info.NE.0 )
THEN
249 WRITE(*,*)
"% SGEBAL failed, INFO =", info
253 IF( debug )
WRITE(*,*)
'% #', iam,
': == pdgebal =='
254 CALL psgebal(
'Both', n, mem(ipa), desca, ilo, ihi,
256 IF ( info.NE.0 )
THEN
257 WRITE(*,*)
"% PSGEBAL failed, INFO =", info
261 ELSEIF( comphess )
THEN
268 t_ba = mpi_wtime() - t_ba
271 IF( debug )
WRITE(*,*)
'% #', iam,
': ILO,IHI=',ilo,ihi
275 IF( barr )
CALL blacs_barrier(ictxt,
'A')
276 IF( debug )
WRITE(*,*)
'% #', iam,
': Copy matrix A'
277 CALL pslacpy(
'All', n, n, mem(ipa), 1, 1, desca, mem(ipacpy),
283 $
CALL pslaprnt( n, n, mem(ipacpy), 1, 1, desca, 0, 0,
284 $
'A', nout, mem(ipw1) )
285 t_gen = t_gen + mpi_wtime() - stamp - t_ba
292 IF( .NOT. comphess )
GO TO 30
296 IF( barr )
CALL blacs_barrier(ictxt,
'A')
297 IF( debug )
WRITE(*,*)
'% #', iam,
298 $
': Reduce to Hessenberg form...N=',n, ilo,ihi
300 IF( nprocs.EQ.1 .AND. solver.NE.2 .AND. uni_lapack )
THEN
301 IF( debug )
WRITE(*,*)
'% #', iam,
': == dgehrd =='
302 CALL sgehrd( n, ilo, ihi, mem(ipa), desca(lld_),
303 $ mem(ipw1), mem(ipw2), -1, info )
304 IF (dpalloc-ipw2.LT.mem(ipw2))
THEN
305 WRITE(*,*)
"% Not enough memory for SGEHRD"
308 CALL sgehrd( n, ilo, ihi, mem(ipa), desca(lld_),
309 $ mem(ipw1), mem(ipw2), dpalloc-ipw2, info )
310 IF ( info.NE.0 )
THEN
311 WRITE(*,*)
"% SGEHRD failed, INFO =", info
315 IF( debug )
WRITE(*,*)
'% #', iam,
': == pdgehrd =='
316 CALL psgehrd( n, ilo, ihi, mem(ipa), 1, 1, desca, mem(ipw1),
317 $ mem(ipw2), -1, info )
318 IF (dpalloc-ipw2.LT.mem(ipw2))
THEN
319 WRITE(*,*)
"% Not enough memory for PSGEHRD"
322 CALL psgehrd( n, ilo, ihi, mem(ipa), 1, 1, desca, mem(ipw1),
323 $ mem(ipw2), dpalloc-ipw2, info )
324 IF ( info.NE.0 )
THEN
325 WRITE(*,*)
"% PSGEHRD failed, INFO =", info
332 IF( barr )
CALL blacs_barrier(ictxt,
'A')
333 IF( debug )
WRITE(*,*)
'% #', iam,
':Form Q explicitly'
335 IF( debug )
WRITE(*,*)
'% #', iam,
': == pdormhr =='
336 CALL psormhr(
'L',
'N', n, n, ilo, ihi, mem(ipa), 1, 1,
337 $ desca, mem(ipw1), mem(ipq), 1, 1, descq, mem(ipw2),
339 IF (dpalloc-ipw2.LT.mem(ipw2))
THEN
340 WRITE(*,*)
"% Not enough memory for PSORMHR"
343 CALL psormhr(
'L',
'N', n, n, ilo, ihi, mem(ipa), 1, 1,
344 $ desca, mem(ipw1), mem(ipq), 1, 1, descq, mem(ipw2),
345 $ dpalloc-ipw2, info )
346 IF ( info.NE.0 )
THEN
347 WRITE(*,*)
"% PSORMHR failed, INFO =", info
353 CALL pslaset(
'Lower triangular', n-2, n-2, zero, zero,
354 $ mem(ipa), 3, 1, desca )
359 CALL pslaprnt( n, n, mem(ipa), 1, 1, desca, 0, 0,
'H', nout,
361 CALL pslaprnt( n, n, mem(ipq), 1, 1, descq, 0, 0,
'Q', nout,
366 t_hs = mpi_wtime() - t_hs
372 IF( barr )
CALL blacs_barrier(ictxt,
'A')
374 IF( solver.EQ.1 )
THEN
375 IF( debug )
WRITE(*,*)
'% #', iam,
': == pdlaqr1 =='
377 CALL pslaqr1( .true., .true., n, ilo, ihi, mem(ipa), desca,
378 $ mem(wr1), mem(wi1), ilo, ihi, mem(ipq), descq,
379 $ mem(ipw1), -1, imem, -1, info )
380 IF (dpalloc-ipw1.LT.mem(ipw1))
THEN
381 WRITE(*,*)
"% Not enough DP memory for PSLAQR1"
384 IF (intallc.LT.imem(1))
THEN
385 WRITE(*,*)
"% Not enough INT memory for PSLAQR1"
388 CALL pslaqr1( .true., .true., n, ilo, ihi, mem(ipa), desca,
389 $ mem(wr1), mem(wi1), ilo, ihi, mem(ipq), descq,
390 $ mem(ipw1), dpalloc-ipw1+1, imem, intallc, info )
392 WRITE(*,*)
"% PSLAQR1: INFO =", info
394 ELSEIF( solver.EQ.2 )
THEN
395 IF( debug )
WRITE(*,*)
'% #', iam,
': == pdhseqr =='
397 IF( barr )
CALL blacs_barrier(ictxt,
'A')
398 CALL pshseqr(
'Schur',
'Vectors', n, ilo, ihi, mem(ipa),
399 $ desca, mem(wr2), mem(wi2), mem(ipq), descq, mem(ipw1),
400 $ -1, imem, -1, info )
401 IF( barr )
CALL blacs_barrier(ictxt,
'A')
402 IF (dpalloc-ipw1.LT.mem(ipw1))
THEN
403 WRITE(*,*)
"% Not enough DP memory for PSHSEQR"
406 IF (intallc.LT.imem(1))
THEN
407 WRITE(*,*)
"% Not enough INT memory for PSHSEQR"
410 IF( barr )
CALL blacs_barrier(ictxt,
'A')
411 CALL pshseqr(
'Schur',
'Vectors', n, ilo, ihi, mem(ipa),
412 $ desca, mem(wr2), mem(wi2), mem(ipq), descq, mem(ipw1),
413 $ dpalloc-ipw1+1, imem, intallc, info )
415 WRITE(*,*)
"% PSHSEQR: INFO =", info
418 WRITE(*,*)
'% ERROR: Illegal SOLVER number!'
421 t_qr = mpi_wtime() - t_qr
424 t_sch = t_sch + t_qr + t_hs + t_ba
428 itpereig = float(totit) / float(n)
429 swpspeig = float(sweeps) / float(n)
430 nspeig = float(totns) / float(n)
435 CALL pslaprnt( n, n, mem(ipa), 1, 1, desca, 0, 0,
'T',
437 CALL pslaprnt( n, n, mem(ipq), 1, 1, descq, 0, 0,
'Z',
447 CALL pselget(
'All',
'1-Tree', elem1, mem(ipa), i, i-1,
452 CALL pselget(
'All',
'1-Tree', elem2, mem(ipa), i+1, i,
455 CALL pselget(
'All',
'1-Tree', elem3, mem(ipa), i+2, i+1,
460 IF( elem2.NE.zero .AND. abs(elem1)+abs(elem2)+abs(elem3).GT.
461 $ abs(elem2) ) hess = hess + 1
472 IF( debug )
WRITE(*,*)
'% #', iam,
': Compute residuals 1'
473 IF( debug )
WRITE(*,*)
'% #', iam,
': pdgemm 3'
474 CALL psgemm(
'N',
'N', n, n, n, one, mem(ipacpy), 1, 1,
475 $ desca, mem(ipq), 1, 1, descq, zero, mem(ipw1), 1, 1,
477 IF( debug )
WRITE(*,*)
'% #', iam,
': pdgemm 4'
478 IF( debug )
WRITE(*,*)
'% #', iam,
': N=',n
479 IF( debug )
WRITE(*,*)
'% #', iam,
': DESCA=',desca(1:dlen_)
480 IF( debug )
WRITE(*,*)
'% #', iam,
': DESCQ=',descq(1:dlen_)
481 CALL psgemm(
'T',
'N', n, n, n, -one, mem(ipq), 1, 1,
482 $ descq, mem(ipw1), 1, 1, desca, one, mem(ipa), 1, 1,
484 r1 =
pslange(
'Frobenius', n, n, mem(ipa), 1, 1, desca,
486 anorm =
pslange(
'Frobenius', n, n, mem(ipacpy), 1, 1,
488 IF( anorm.GT.zero )
THEN
489 rnorm = r1 / (anorm*eps*sqrt(float(n)))
498 IF( debug )
WRITE(*,*)
'% #', iam,
': Compute residuals 2'
499 CALL pslaset(
'All', n, n, zero, one, mem(ipw1), 1, 1,
501 CALL pslacpy(
'All', n, n, mem(ipq), 1, 1, descq, mem(ipw2),
503 CALL psgemm(
'T',
'N', n, n, n, -one, mem(ipq), 1, 1, descq,
504 $ mem(ipw2), 1, 1, descq, one, mem(ipw1), 1, 1, descq )
505 o1 =
pslange(
'Frobenius', n, n, mem(ipw1), 1, 1, descq,
507 CALL pslaset(
'All', n, n, zero, one, mem(ipw1), 1, 1,
509 CALL psgemm(
'N',
'T', n, n, n, -one, mem(ipq), 1, 1, descq,
510 $ mem(ipw2), 1, 1, descq, one, mem(ipw1), 1, 1, descq )
511 o2 =
pslange(
'Frobenius', n, n, mem(ipw1), 1, 1, descq,
513 orth =
max(o1,o2) / (eps*float(n))
518 t_res = t_res + mpi_wtime() - stamp
521 tottime = mpi_wtime() - tottime
528 IF( (orth.GT.thresh).OR.(rnorm.GT.thresh) )
THEN
533 IF( debug )
WRITE(*,*)
'% #', iam,
': Print results...'
535 WRITE( nout, fmt = 9993 ) n, nb, nprow, npcol, t_qr, passed
537 CALL blacs_barrier( ictxt,
'All' )
545 DEALLOCATE( mem, imem )
547 CALL blacs_gridexit( ictxt )
555 6666
FORMAT(a2,a3,a6,a4,a5,a6,a3,a3,a3,a9,a9,a9,a8,a8,a9,a9,a9,a9,a9,
556 $ a9,a9,a9,a9,a9,a9,a5,a5,a8,a5,a5)
557 7777
FORMAT(a2,i3,i6,i4,i5,i6,i3,i3,i3,f9.2,f9.2,f9.2,f8.2,f8.2,f9.2,
558 $ f9.2,f9.2,f9.2,f9.2,f9.2,f9.2,f9.2,e9.2,e9.2,e9.2,i5,i5,
560 9995
FORMAT(
' N NB P Q QR Time CHECK' )
561 9994
FORMAT(
'----- --- ---- ---- -------- ------' )
562 9993
FORMAT( i5, 1x, i3, 1x, i4, 1x, i4, 1x, f8.2, 1x, a6 )