14 LOGICAL balance, comphess, compresi,
16 LOGICAL debug, prn, timesteps, barr,
18 INTEGER slv_min, slv_max
19 parameter( debug = .false.,
28 $ slv_min = 2, slv_max = 2,
29 $ uni_lapack = .true. )
30 INTEGER n, nb, arsrc, acsrc
35 $ arsrc = 0, acsrc = 0 )
36 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dt_,
37 $ lld_, mb_, m_, nb_, n_, rsrc_
38 parameter( block_cyclic_2d = 1, dlen_ = 9, dt_ = 1,
39 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
40 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
41 INTEGER dpalloc, intallc
42 INTEGER dpsiz, intsz, nout, izero
43 parameter( dpsiz = 8, dpalloc = 8 000 000,
44 $ intsz = 4, intallc = 8 000 000,
45 $ nout = 6, izero = 0)
46 DOUBLE PRECISION zero, one, two
47 parameter( zero = 0.0d+00, one = 1.0d+00, two = 2.0d+00 )
50 INTEGER ictxt, iam, nprow, npcol, myrow, mycol,
51 $ sys_nprocs, nprocs, arows, acols, temp_ictxt
53 INTEGER info, ktop, kbot, ilo, ihi, i
54 INTEGER ipa, ipacpy, ipq, wr1, wi1, wr2, wi2, ipw1,
56 INTEGER totit, sweeps, totns, hess
57 DOUBLE PRECISION eps, thresh
58 DOUBLE PRECISION stamp, tottime, t_ba, t_gen, t_hs, t_sch, t_qr,
59 $ t_res, itpereig, swpspeig, nspeig, speedup,
61 DOUBLE PRECISION rnorm, anorm, r1, orth, o1, o2, dpdum, elem1,
67 INTEGER desca( dlen_ ), descq( dlen_ ), descvec( dlen_ )
68 DOUBLE PRECISION scale( n )
69 DOUBLE PRECISION,
ALLOCATABLE :: mem(:)
70 INTEGER,
ALLOCATABLE :: imem(:)
73 INTRINSIC int, dble, sqrt,
max,
min
78 EXTERNAL blacs_pinfo, blacs_get, blacs_gridinit,
79 $ blacs_gridinfo, blacs_gridexit, blacs_exit
81 EXTERNAL dgebal, dgehrd
88 CALL blacs_pinfo( iam, sys_nprocs )
89 nprow = int( sqrt( dble(sys_nprocs) ) )
90 npcol = sys_nprocs / nprow
91 CALL blacs_get( 0, 0, ictxt )
92 CALL blacs_gridinit( ictxt,
'2D', nprow, npcol )
93 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
96 IF ( ictxt.LT.0 )
GO TO 777
102 tottime = mpi_wtime()
110 ALLOCATE ( mem( dpalloc ), stat = info )
112 WRITE(*,*)
'% Could not allocate MEM. INFO = ', info
115 ALLOCATE ( imem( intallc ), stat = info )
117 WRITE(*,*)
'% Could not allocate IMEM. INFO = ', info
120 mem( 1:dpalloc ) = zero
121 imem( 1:intallc ) = izero
125 eps =
pdlamch( ictxt,
'Epsilon' )
131 WRITE(*,*)
'ScaLAPACK Test for PDHSEQR'
133 WRITE(*,*)
'epsilon = ', eps
134 WRITE(*,*)
'threshold = ', thresh
136 WRITE(*,*)
'Residual and Orthogonality Residual computed by:'
138 WRITE(*,*)
'Residual = ',
139 $
' || T - Q^T*A*Q ||_F / ( ||A||_F * eps * sqrt(N) )'
141 WRITE(*,*)
'Orthogonality = ',
142 $
' MAX( || I - Q^T*Q ||_F, || I - Q*Q^T ||_F ) / ',
146 $
'Test passes if both residuals are less then threshold'
148 WRITE( nout, fmt = 9995 )
149 WRITE( nout, fmt = 9994 )
156 DO solver = slv_max, slv_min, -1
168 arows =
numroc( n, nb, myrow, 0, nprow )
169 acols =
numroc( n, nb, mycol, 0, npcol )
173 IF( debug )
WRITE(*,*)
'% #', iam,
': Set up descriptors...'
174 IF( barr )
CALL blacs_barrier(ictxt,
'A')
175 CALL descinit( desca, n, n, nb, nb,
min(arsrc,nprow-1),
176 $
min(npcol-1,acsrc), temp_ictxt,
max(1, arows), info )
177 IF ( info.NE.0 )
THEN
178 WRITE(*,*)
"% DESCINIT DESCA failed, INFO =", info
181 CALL descinit( descq, n, n, nb, nb,
min(arsrc,nprow-1),
182 $
min(npcol-1,acsrc), temp_ictxt,
max(1, arows), info )
183 IF ( info.NE.0 )
THEN
184 WRITE(*,*)
"% DESCINIT DESCQ failed, INFO =", info
187 CALL descinit( descvec, n, 1, n, 1,
min(arsrc,nprow-1),
188 $
min(npcol-1,acsrc), temp_ictxt, n, info )
189 IF ( info.NE.0 )
THEN
190 WRITE(*,*)
"% DESCINIT DESCVEC failed, INFO =", info
196 IF( debug )
WRITE(*,*)
'% #', iam,
': Assign pointers...'
198 ipacpy = ipa + desca( lld_ ) * acols
199 ipq = ipacpy + desca( lld_ ) * acols
200 wr1 = ipq + descq( lld_ ) * acols
205 ipw2 = ipw1 + desca( lld_ ) * acols
206 IF( debug )
WRITE(*,*)
'% (IPW2,DPALLOC):', ipw2, dpalloc
208 IF( ipw2+desca(lld_)*acols .GT. dpalloc+1 )
THEN
209 WRITE(*,*)
'% Not enough DP memory!'
219 IF( barr )
CALL blacs_barrier(ictxt,
'A')
220 CALL pdlaset(
'All over', n, n, zero, one, mem(ipq), 1, 1,
222 CALL pdmatgen2( temp_ictxt,
'Random',
'NoDiagDominant',
223 $ n, n, nb, nb, mem(ipa), desca( lld_ ), 0, 0, 7, 0,
224 $ arows, 0, acols, myrow, mycol, nprow, npcol )
225 IF( .NOT. comphess )
THEN
226 CALL pdlaset(
'Lower triangular', n-2, n-2, zero, zero,
227 $ mem(ipa), 3, 1, desca )
228 CALL pdlaset(
'All over', n, n, zero, one, mem(ipq),
231 $
CALL pdlaset(
'Lower triangular', ktop-1, ktop-1,
232 $ zero, zero, mem(ipa), 2, 1, descq )
234 $
CALL pdlaset(
'Lower triangular', n-kbot, n-kbot,
235 $ zero, zero, mem(ipa), kbot+1, kbot, descq )
240 IF( barr )
CALL blacs_barrier(ictxt,
'A')
242 IF( comphess .AND. balance )
THEN
243 IF( nprocs.EQ.1 .AND. solver.NE.2 .AND. uni_lapack )
THEN
244 IF( debug )
WRITE(*,*)
'% #', iam,
': == dgebal =='
245 CALL dgebal(
'Both', n, mem(ipa), desca(lld_), ilo,
247 IF ( info.NE.0 )
THEN
248 WRITE(*,*)
"% DGEBAL failed, INFO =", info
252 IF( debug )
WRITE(*,*)
'% #', iam,
': == pdgebal =='
253 CALL pdgebal(
'Both', n, mem(ipa), desca, ilo, ihi,
255 IF ( info.NE.0 )
THEN
256 WRITE(*,*)
"% PDGEBAL failed, INFO =", info
260 ELSEIF( comphess )
THEN
267 t_ba = mpi_wtime() - t_ba
270 IF( debug )
WRITE(*,*)
'% #', iam,
': ILO,IHI=',ilo,ihi
274 IF( barr )
CALL blacs_barrier(ictxt,
'A')
275 IF( debug )
WRITE(*,*)
'% #', iam,
': Copy matrix A'
276 CALL pdlacpy(
'All', n, n, mem(ipa), 1, 1, desca, mem(ipacpy),
282 $
CALL pdlaprnt( n, n, mem(ipacpy), 1, 1, desca, 0, 0,
283 $
'A', nout, mem(ipw1) )
284 t_gen = t_gen + mpi_wtime() - stamp - t_ba
291 IF( .NOT. comphess )
GO TO 30
295 IF( barr )
CALL blacs_barrier(ictxt,
'A')
296 IF( debug )
WRITE(*,*)
'% #', iam,
297 $
': Reduce to Hessenberg form...N=',n, ilo,ihi
299 IF( nprocs.EQ.1 .AND. solver.NE.2 .AND. uni_lapack )
THEN
300 IF( debug )
WRITE(*,*)
'% #', iam,
': == dgehrd =='
301 CALL dgehrd( n, ilo, ihi, mem(ipa), desca(lld_),
302 $ mem(ipw1), mem(ipw2), -1, info )
303 IF (dpalloc-ipw2.LT.mem(ipw2))
THEN
304 WRITE(*,*)
"% Not enough memory for DGEHRD"
307 CALL dgehrd( n, ilo, ihi, mem(ipa), desca(lld_),
308 $ mem(ipw1), mem(ipw2), dpalloc-ipw2, info )
309 IF ( info.NE.0 )
THEN
310 WRITE(*,*)
"% DGEHRD failed, INFO =", info
314 IF( debug )
WRITE(*,*)
'% #', iam,
': == pdgehrd =='
315 CALL pdgehrd( n, ilo, ihi, mem(ipa), 1, 1, desca, mem(ipw1),
316 $ mem(ipw2), -1, info )
317 IF (dpalloc-ipw2.LT.mem(ipw2))
THEN
318 WRITE(*,*)
"% Not enough memory for PDGEHRD"
321 CALL pdgehrd( n, ilo, ihi, mem(ipa), 1, 1, desca, mem(ipw1),
322 $ mem(ipw2), dpalloc-ipw2, info )
323 IF ( info.NE.0 )
THEN
324 WRITE(*,*)
"% PDGEHRD failed, INFO =", info
331 IF( barr )
CALL blacs_barrier(ictxt,
'A')
332 IF( debug )
WRITE(*,*)
'% #', iam,
':Form Q explicitly'
334 IF( debug )
WRITE(*,*)
'% #', iam,
': == pdormhr =='
335 CALL pdormhr(
'L',
'N', n, n, ilo, ihi, mem(ipa), 1, 1,
336 $ desca, mem(ipw1), mem(ipq), 1, 1, descq, mem(ipw2),
338 IF (dpalloc-ipw2.LT.mem(ipw2))
THEN
339 WRITE(*,*)
"% Not enough memory for PDORMHR"
342 CALL pdormhr(
'L',
'N', n, n, ilo, ihi, mem(ipa), 1, 1,
343 $ desca, mem(ipw1), mem(ipq), 1, 1, descq, mem(ipw2),
344 $ dpalloc-ipw2, info )
345 IF ( info.NE.0 )
THEN
346 WRITE(*,*)
"% PDORMHR failed, INFO =", info
352 CALL pdlaset(
'Lower triangular', n-2, n-2, zero, zero,
353 $ mem(ipa), 3, 1, desca )
358 CALL pdlaprnt( n, n, mem(ipa), 1, 1, desca, 0, 0,
'H', nout,
360 CALL pdlaprnt( n, n, mem(ipq), 1, 1, descq, 0, 0,
'Q', nout,
365 t_hs = mpi_wtime() - t_hs
371 IF( barr )
CALL blacs_barrier(ictxt,
'A')
373 IF( solver.EQ.1 )
THEN
374 IF( debug )
WRITE(*,*)
'% #', iam,
': == pdlaqr1 =='
376 CALL pdlaqr1( .true., .true., n, ilo, ihi, mem(ipa), desca,
377 $ mem(wr1), mem(wi1), ilo, ihi, mem(ipq), descq,
378 $ mem(ipw1), -1, imem, -1, info )
379 IF (dpalloc-ipw1.LT.mem(ipw1))
THEN
380 WRITE(*,*)
"% Not enough DP memory for PDLAQR1"
383 IF (intallc.LT.imem(1))
THEN
384 WRITE(*,*)
"% Not enough INT memory for PDLAQR1"
387 CALL pdlaqr1( .true., .true., n, ilo, ihi, mem(ipa), desca,
388 $ mem(wr1), mem(wi1), ilo, ihi, mem(ipq), descq,
389 $ mem(ipw1), dpalloc-ipw1+1, imem, intallc, info )
391 WRITE(*,*)
"% PDLAQR1: INFO =", info
393 ELSEIF( solver.EQ.2 )
THEN
394 IF( debug )
WRITE(*,*)
'% #', iam,
': == pdhseqr =='
396 IF( barr )
CALL blacs_barrier(ictxt,
'A')
397 CALL pdhseqr(
'Schur',
'Vectors', n, ilo, ihi, mem(ipa),
398 $ desca, mem(wr2), mem(wi2), mem(ipq), descq, mem(ipw1),
399 $ -1, imem, -1, info )
400 IF( barr )
CALL blacs_barrier(ictxt,
'A')
401 IF (dpalloc-ipw1.LT.mem(ipw1))
THEN
402 WRITE(*,*)
"% Not enough DP memory for PDHSEQR"
405 IF (intallc.LT.imem(1))
THEN
406 WRITE(*,*)
"% Not enough INT memory for PDHSEQR"
409 IF( barr )
CALL blacs_barrier(ictxt,
'A')
410 CALL pdhseqr(
'Schur',
'Vectors', n, ilo, ihi, mem(ipa),
411 $ desca, mem(wr2), mem(wi2), mem(ipq), descq, mem(ipw1),
412 $ dpalloc-ipw1+1, imem, intallc, info )
414 WRITE(*,*)
"% PDHSEQR: INFO =", info
417 WRITE(*,*)
'% ERROR: Illegal SOLVER number!'
420 t_qr = mpi_wtime() - t_qr
423 t_sch = t_sch + t_qr + t_hs + t_ba
427 itpereig = dble(totit) / dble(n)
428 swpspeig = dble(sweeps) / dble(n)
429 nspeig = dble(totns) / dble(n)
434 CALL pdlaprnt( n, n, mem(ipa), 1, 1, desca, 0, 0,
'T',
436 CALL pdlaprnt( n, n, mem(ipq), 1, 1, descq, 0, 0,
'Z',
446 CALL pdelget(
'All',
'1-Tree', elem1, mem(ipa), i, i-1,
451 CALL pdelget(
'All',
'1-Tree', elem2, mem(ipa), i+1, i,
454 CALL pdelget(
'All',
'1-Tree', elem3, mem(ipa), i+2, i+1,
459 IF( elem2.NE.zero .AND. abs(elem1)+abs(elem2)+abs(elem3).GT.
460 $ abs(elem2) ) hess = hess + 1
471 IF( debug )
WRITE(*,*)
'% #', iam,
': Compute residuals 1'
472 IF( debug )
WRITE(*,*)
'% #', iam,
': pdgemm 3'
473 CALL pdgemm(
'N',
'N', n, n, n, one, mem(ipacpy), 1, 1,
474 $ desca, mem(ipq), 1, 1, descq, zero, mem(ipw1), 1, 1,
476 IF( debug )
WRITE(*,*)
'% #', iam,
': pdgemm 4'
477 IF( debug )
WRITE(*,*)
'% #', iam,
': N=',n
478 IF( debug )
WRITE(*,*)
'% #', iam,
': DESCA=',desca(1:dlen_)
479 IF( debug )
WRITE(*,*)
'% #', iam,
': DESCQ=',descq(1:dlen_)
480 CALL pdgemm(
'T',
'N', n, n, n, -one, mem(ipq), 1, 1,
481 $ descq, mem(ipw1), 1, 1, desca, one, mem(ipa), 1, 1,
483 r1 =
pdlange(
'Frobenius', n, n, mem(ipa), 1, 1, desca,
485 anorm =
pdlange(
'Frobenius', n, n, mem(ipacpy), 1, 1,
487 IF( anorm.GT.zero )
THEN
488 rnorm = r1 / (anorm*eps*sqrt(dble(n)))
497 IF( debug )
WRITE(*,*)
'% #', iam,
': Compute residuals 2'
498 CALL pdlaset(
'All', n, n, zero, one, mem(ipw1), 1, 1,
500 CALL pdlacpy(
'All', n, n, mem(ipq), 1, 1, descq, mem(ipw2),
502 CALL pdgemm(
'T',
'N', n, n, n, -one, mem(ipq), 1, 1, descq,
503 $ mem(ipw2), 1, 1, descq, one, mem(ipw1), 1, 1, descq )
504 o1 =
pdlange(
'Frobenius', n, n, mem(ipw1), 1, 1, descq,
506 CALL pdlaset(
'All', n, n, zero, one, mem(ipw1), 1, 1,
508 CALL pdgemm(
'N',
'T', n, n, n, -one, mem(ipq), 1, 1, descq,
509 $ mem(ipw2), 1, 1, descq, one, mem(ipw1), 1, 1, descq )
510 o2 =
pdlange(
'Frobenius', n, n, mem(ipw1), 1, 1, descq,
512 orth =
max(o1,o2) / (eps*dble(n))
517 t_res = t_res + mpi_wtime() - stamp
520 tottime = mpi_wtime() - tottime
527 IF( (orth.GT.thresh).OR.(rnorm.GT.thresh) )
THEN
532 IF( debug )
WRITE(*,*)
'% #', iam,
': Print results...'
534 WRITE( nout, fmt = 9993 ) n, nb, nprow, npcol, t_qr, passed
536 CALL blacs_barrier( ictxt,
'All' )
544 DEALLOCATE( mem, imem )
546 CALL blacs_gridexit( ictxt )
554 6666
FORMAT(a2,a3,a6,a4,a5,a6,a3,a3,a3,a9,a9,a9,a8,a8,a9,a9,a9,a9,a9,
555 $ a9,a9,a9,a9,a9,a9,a5,a5,a8,a5,a5)
556 7777
FORMAT(a2,i3,i6,i4,i5,i6,i3,i3,i3,f9.2,f9.2,f9.2,f8.2,f8.2,f9.2,
557 $ f9.2,f9.2,f9.2,f9.2,f9.2,f9.2,f9.2,e9.2,e9.2,e9.2,i5,i5,
559 9995
FORMAT(
' N NB P Q QR Time CHECK' )
560 9994
FORMAT(
'----- --- ---- ---- -------- ------' )
561 9993
FORMAT( i5, 1x, i3, 1x, i4, 1x, i4, 1x, f8.2, 1x, a6 )