/* Livermore Loops coded in C Latest File Modification 20 Oct 92, * by Tim Peters, Kendall Square Res. Corp. tim@ksr.com, ksr!tim@uunet.uu.net * SUBROUTINE KERNEL( TK) replaces the Fortran routine in LFK Test program. ************************************************************************ * * * KERNEL executes 24 samples of "C" computation * * * * TK(1) - total cpu time to execute only the 24 kernels.* * TK(2) - total Flops executed by the 24 Kernels * * * ************************************************************************ * * * L. L. N. L. " C " K E R N E L S: M F L O P S * * * * These kernels measure " C " numerical computation * * rates for a spectrum of cpu-limited computational * * structures or benchmarks. Mathematical through-put * * is measured in units of millions of floating-point * * operations executed per second, called Megaflops/sec. * * * * Fonzi's Law: There is not now and there never will be a language * * in which it is the least bit difficult to write * * bad programs. * * F.H.MCMAHON 1972 * ************************************************************************ *Originally from Greg Astfalk, AT&T, P.O.Box 900, Princeton, NJ. 08540* * by way of Frank McMahon (LLNL). * * * * REFERENCE * * * * F.H.McMahon, The Livermore Fortran Kernels: * * A Computer Test Of The Numerical Performance Range, * * Lawrence Livermore National Laboratory, * * Livermore, California, UCRL-53745, December 1986. * * * * from: National Technical Information Service * * U.S. Department of Commerce * * 5285 Port Royal Road * * Springfield, VA. 22161 * * * * Changes made to correct many array subscripting problems, * * make more readable (added #define's), include the original * * FORTRAN versions of the runs as comments, and make more * * portable by Kelly O'Hair (LLNL) and Chuck Rasbold (LLNL). * * * ************************************************************************ */ #include #include /* Type-specifiers for function declarations */ #ifdef CRAY #define CALLED_FROM_FORTRAN fortran #define FORTRAN_FUNCTION fortran #else #define CALLED_FROM_FORTRAN #define FORTRAN_FUNCTION extern #endif /* These are the names to be used for the functions */ #ifdef CRAY #define KERNEL kernel #define TRACE trace #define SPACE space #define TEST test #define TRACK track #else #define KERNEL kernel_ #define TRACE trace_ #define SPACE space_ #define TEST test_ #define TRACK track_ #endif /* Type-specifiers for the structs that map to common */ #ifdef CRAY #define COMMON_BLOCK fortran #else #define COMMON_BLOCK extern #endif /* Names of structs (or common blocks) */ #ifdef CRAY #define ALPHA alpha #define BETA beta #define SPACES spaces #define SPACER spacer #define SPACE0 space0 #define SPACEI spacei #define ISPACE ispace #define SPACE1 space1 #define SPACE2 space2 #else #define ALPHA alpha_ #define BETA beta_ #define SPACES spaces_ #define SPACER spacer_ #define SPACE0 space0_ #define SPACEI spacei_ #define ISPACE ispace_ #define SPACE1 space1_ #define SPACE2 space2_ #endif /* Declare the four fortran functions called */ FORTRAN_FUNCTION TRACE(); FORTRAN_FUNCTION SPACE(); FORTRAN_FUNCTION TEST(); FORTRAN_FUNCTION TRACK(); /* Define the structs or COMMON BLOCKS */ COMMON_BLOCK struct { long Mk; long Ik; long Im; long Ml; long Il; long Mruns; long Nruns; long Jr; long Npfs[47][3][8]; } ALPHA ; #define mk ALPHA.Mk #define ik ALPHA.Ik #define im ALPHA.Im #define ml ALPHA.Ml #define il ALPHA.Il #define mruns ALPHA.Mruns; #define nruns ALPHA.Nruns; #define jr ALPHA.Jr #define npfs ALPHA.Npfs COMMON_BLOCK struct { double Tic; double Times[47][3][8]; double See[3][8][3][5]; double Terrs[47][3][8]; double Csums[47][3][8]; double Fopn[47][3][8]; double Dos[47][3][8]; } BETA ; #define tic BETA.Tic #define times BETA.Times #define see BETA.See #define terrs BETA.Terrs #define csums BETA.Csums #define fopn BETA.Fopn #define dos BETA.Dos COMMON_BLOCK struct { long Ion; long J5; long K2; long K3; long MULTI; long Laps; long Loop; long M; long Kr; long It; long N13h; long Ibuf; long Npass; long Nfail; long N; long N1; long N2; long N13; long N213; long N813; long N14; long N16; long N416; long N21; long Nt1; long Nt2; } SPACES ; #define ion SPACES.Ion #define j5 SPACES.J5 #define k2 SPACES.K2 #define k3 SPACES.K3 #define multi SPACES.MULTI #define laps SPACES.Laps #define loop SPACES.Loop #define m SPACES.M #define kr SPACES.Kr #define it SPACES.It #define n13h SPACES.N13h #define ibuf SPACES.Ibuf #define npass SPACES.Npass #define nfail SPACES.Nfail #define n SPACES.N #define n1 SPACES.N1 #define n2 SPACES.N2 #define n13 SPACES.N13 #define n213 SPACES.N213 #define n813 SPACES.N813 #define n14 SPACES.N14 #define n16 SPACES.N16 #define n416 SPACES.N416 #define n21 SPACES.N21 #define nt1 SPACES.Nt1 #define nt2 SPACES.Nt2 COMMON_BLOCK struct { double A11; double A12; double A13; double A21; double A22; double A23; double A31; double A32; double A33; double Ar; double Br; double C0; double Cr; double Di; double Dk; double Dm22; double Dm23; double Dm24; double Dm25; double Dm26; double Dm27; double Dm28; double Dn; double E3; double E6; double Expmax; double Flx; double Q; double Qa; double R; double Ri; double S; double Scale; double Sig; double Stb5; double T; double Xnc; double Xnei; double Xnm; } SPACER ; #define a11 SPACER.A11 #define a12 SPACER.A12 #define a13 SPACER.A13 #define a21 SPACER.A21 #define a22 SPACER.A22 #define a23 SPACER.A23 #define a31 SPACER.A31 #define a32 SPACER.A32 #define a33 SPACER.A33 #define ar SPACER.Ar #define br SPACER.Br #define c0 SPACER.C0 #define cr SPACER.Cr #define di SPACER.Di #define dk SPACER.Dk #define dm22 SPACER.Dm22 #define dm23 SPACER.Dm23 #define dm24 SPACER.Dm24 #define dm25 SPACER.Dm25 #define dm26 SPACER.Dm26 #define dm27 SPACER.Dm27 #define dm28 SPACER.Dm28 #define dn SPACER.Dn #define e3 SPACER.E3 #define e6 SPACER.E6 #define expmax SPACER.Expmax #define flx SPACER.Flx #define q SPACER.Q #define qa SPACER.Qa #define r SPACER.R #define ri SPACER.Ri #define s SPACER.S #define scale SPACER.Scale #define sig SPACER.Sig #define stb5 SPACER.Stb5 #define t SPACER.T #define xnc SPACER.Xnc #define xnei SPACER.Xnei #define xnm SPACER.Xnm COMMON_BLOCK struct { double Time[47]; double Csum[47]; double Ww[47]; double Wt[47]; double Ticks; double Fr[9]; double Terr1[47]; double Sumw[7]; double Start; double Skale[47]; double Bias[47]; double Ws[95]; double Total[47]; double Flopn[47]; long Iq[7]; long Npf; long Npfs1[47]; } SPACE0 ; #define time SPACE0.Time #define csum SPACE0.Csum #define ww SPACE0.Ww #define wt SPACE0.Wt #define ticks SPACE0.Ticks #define fr SPACE0.Fr #define terr1 SPACE0.Terr1 #define sumw SPACE0.Sumw #define start SPACE0.Start #define skale SPACE0.Skale #define bias SPACE0.Bias #define ws SPACE0.Ws #define total SPACE0.Total #define flopn SPACE0.Flopn #define iq SPACE0.Iq #define npf SPACE0.Npf #define npfs1 SPACE0.Npfs1 COMMON_BLOCK struct { double Wtp[3]; long Mult[3]; long Ispan[3][47]; long Ipass[3][47]; } SPACEI ; #define wtp SPACEI.Wtp #define mult SPACEI.Mult #define ispan SPACEI.Ispan #define ipass SPACEI.Ipass COMMON_BLOCK struct { long E[96]; long F[96]; long Ix[1001]; long Ir[1001]; long Zone[300]; } ISPACE ; #define e ISPACE.E #define f ISPACE.F #define ix ISPACE.Ix #define ir ISPACE.Ir #define zone ISPACE.Zone COMMON_BLOCK struct { double U[1001]; double V[1001]; double W[1001]; double X[1001]; double Y[1001]; double Z[1001]; double G[1001]; double Du1[101]; double Du2[101]; double Du3[101]; double Grd[1001]; double Dex[1001]; double Xi[1001]; double Ex[1001]; double Ex1[1001]; double Dex1[1001]; double Vx[1001]; double Xx[1001]; double Rx[1001]; double Rh[2048]; double Vsp[101]; double Vstp[101]; double Vxne[101]; double Vxnd[101]; double Ve3[101]; double Vlr[101]; double Vlin[101]; double B5[101]; double Plan[300]; double D[300]; double Sa[101]; double Sb[101]; } SPACE1 ; #define u SPACE1.U #define v SPACE1.V #define w SPACE1.W #define x SPACE1.X #define y SPACE1.Y #define z SPACE1.Z #define g SPACE1.G #define du1 SPACE1.Du1 #define du2 SPACE1.Du2 #define du3 SPACE1.Du3 #define grd SPACE1.Grd #define dex SPACE1.Dex #define xi SPACE1.Xi #define ex SPACE1.Ex #define ex1 SPACE1.Ex1 #define dex1 SPACE1.Dex1 #define vx SPACE1.Vx #define xx SPACE1.Xx #define rx SPACE1.Rx #define rh SPACE1.Rh #define vsp SPACE1.Vsp #define vstp SPACE1.Vstp #define vxne SPACE1.Vxne #define vxnd SPACE1.Vxnd #define ve3 SPACE1.Ve3 #define vlr SPACE1.Vlr #define vlin SPACE1.Vlin #define b5 SPACE1.B5 #define plan SPACE1.Plan #define d SPACE1.D #define sa SPACE1.Sa #define sb SPACE1.Sb COMMON_BLOCK struct { double P[512][4]; double Px[101][25]; double Cx[101][25]; double Vy[25][101]; double Vh[7][101]; double Vf[7][101]; double Vg[7][101]; double Vs[7][101]; double Za[7][101]; double Zp[7][101]; double Zq[7][101]; double Zr[7][101]; double Zm[7][101]; double Zb[7][101]; double Zu[7][101]; double Zv[7][101]; double Zz[7][101]; double B[64][64]; double C[64][64]; double H[64][64]; double U1[2][101][5]; double U2[2][101][5]; double U3[2][101][5]; } SPACE2 ; #define p SPACE2.P #define px SPACE2.Px #define cx SPACE2.Cx #define vy SPACE2.Vy #define vh SPACE2.Vh #define vf SPACE2.Vf #define vg SPACE2.Vg #define vs SPACE2.Vs #define za SPACE2.Za #define zp SPACE2.Zp #define zq SPACE2.Zq #define zr SPACE2.Zr #define zm SPACE2.Zm #define zb SPACE2.Zb #define zu SPACE2.Zu #define zv SPACE2.Zv #define zz SPACE2.Zz #define b SPACE2.B #define c SPACE2.C #define h SPACE2.H #define u1 SPACE2.U1 #define u2 SPACE2.U2 #define u3 SPACE2.U3 /* KERNEL routine */ CALLED_FROM_FORTRAN KERNEL( TK ) double TK[6]; { #pragma nodyneqv #pragma o=i long argument , k , l , ipnt , ipntp , i; long lw , j , nl1 , nl2 , kx , ky , ip , kn; long i1 , j1 , i2 , j2 , nz , ink , jn , kb5i; long ii , lb , j4 , ng; double tmp , temp, sum, som; char name[8]; /* * Prologue */ for(k=0; k<7; k++) name[k] = "kernel"[k]; TRACE( &name ); SPACE(); argument = 0; TEST( &argument ); /* ******************************************************************* * Kernel 1 -- hydro fragment ******************************************************************* * DO 1 L = 1,Loop * DO 1 k = 1,n * 1 X(k)= Q + Y(k)*(R*ZX(k+10) + T*ZX(k+11)) */ for ( l=1 ; l<=loop ; l++ ) { for ( k=0 ; k0 ); } argument = 2; TEST( &argument ); /* ******************************************************************* * Kernel 3 -- inner product ******************************************************************* * DO 3 L= 1,Loop * Q= 0.0 * DO 3 k= 1,n * 3 Q= Q + Z(k)*X(k) */ for ( l=1 ; l<=loop ; l++ ) { q = 0.0; for ( k=0 ; k= ng ) { vy[j][k] = 0.0; continue; } if ( vh[j+1][k] > vh[j][k] ) { t = ar; } else { t = br; } if ( vf[j][k] < vf[j][k-1] ) { if ( vh[j][k-1] > vh[j+1][k-1] ) r = vh[j][k-1]; else r = vh[j+1][k-1]; s = vf[j][k-1]; } else { if ( vh[j][k] > vh[j+1][k] ) r = vh[j][k]; else r = vh[j+1][k]; s = vf[j][k]; } vy[j][k] = sqrt( vg[j][k]*vg[j][k] + r*r )* t/s; if ( (k+1) >= nz ) { vs[j][k] = 0.0; continue; } if ( vf[j][k] < vf[j-1][k] ) { if ( vg[j-1][k] > vg[j-1][k+1] ) r = vg[j-1][k]; else r = vg[j-1][k+1]; s = vf[j-1][k]; t = br; } else { if ( vg[j][k] > vg[j][k+1] ) r = vg[j][k]; else r = vg[j][k+1]; s = vf[j][k]; t = ar; } vs[j][k] = sqrt( vh[j][k]*vh[j][k] + r*r )* t / s; } } } argument = 15; TEST( &argument ); /* ******************************************************************* * Kernel 16 -- Monte Carlo search loop ******************************************************************* * II= n/3 * LB= II+II * k2= 0 * k3= 0 * DO 485 L= 1,Loop * m= 1 *405 i1= m *410 j2= (n+n)*(m-1)+1 * DO 470 k= 1,n * k2= k2+1 * j4= j2+k+k * j5= ZONE(j4) * IF( j5-n ) 420,475,450 *415 IF( j5-n+II ) 430,425,425 *420 IF( j5-n+LB ) 435,415,415 *425 IF( PLAN(j5)-R) 445,480,440 *430 IF( PLAN(j5)-S) 445,480,440 *435 IF( PLAN(j5)-T) 445,480,440 *440 IF( ZONE(j4-1)) 455,485,470 *445 IF( ZONE(j4-1)) 470,485,455 *450 k3= k3+1 * IF( D(j5)-(D(j5-1)*(T-D(j5-2))**2+(S-D(j5-3))**2 * . +(R-D(j5-4))**2)) 445,480,440 *455 m= m+1 * IF( m-ZONE(1) ) 465,465,460 *460 m= 1 *465 IF( i1-m) 410,480,410 *470 CONTINUE *475 CONTINUE *480 CONTINUE *485 CONTINUE */ ii = n / 3; lb = ii + ii; k3 = k2 = 0; for ( l=1 ; l<=loop ; l++ ) { i1 = m = 1; label410: j2 = ( n + n )*( m - 1 ) + 1; for ( k=1 ; k<=n ; k++ ) { k2++; j4 = j2 + k + k; j5 = zone[j4-1]; if ( j5 < n ) { if ( j5+lb < n ) { /* 420 */ tmp = plan[j5-1] - t; /* 435 */ } else { if ( j5+ii < n ) { /* 415 */ tmp = plan[j5-1] - s; /* 430 */ } else { tmp = plan[j5-1] - r; /* 425 */ } } } else if( j5 == n ) { break; /* 475 */ } else { k3++; /* 450 */ tmp=(d[j5-1]-(d[j5-2]*(t-d[j5-3])*(t-d[j5-3])+(s-d[j5-4])* (s-d[j5-4])+(r-d[j5-5])*(r-d[j5-5]))); } if ( tmp < 0.0 ) { if ( zone[j4-2] < 0 ) /* 445 */ continue; /* 470 */ else if ( !zone[j4-2] ) break; /* 480 */ } else if ( tmp ) { if ( zone[j4-2] > 0 ) /* 440 */ continue; /* 470 */ else if ( !zone[j4-2] ) break; /* 480 */ } else break; /* 485 */ m++; /* 455 */ if ( m > zone[0] ) m = 1; /* 460 */ if ( i1-m ) /* 465 */ goto label410; else break; } } argument = 16; TEST( &argument ); /* ******************************************************************* * Kernel 17 -- implicit, conditional computation ******************************************************************* * DO 62 L= 1,Loop * i= n * j= 1 * INK= -1 * SCALE= 5./3. * XNM= 1./3. * E6= 1.03/3.07 * GO TO 61 *60 E6= XNM*VSP(i)+VSTP(i) * VXNE(i)= E6 * XNM= E6 * VE3(i)= E6 * i= i+INK * IF( i.EQ.j) GO TO 62 *61 E3= XNM*VLR(i) +VLIN(i) * XNEI= VXNE(i) * VXND(i)= E6 * XNC= SCALE*E3 * IF( XNM .GT.XNC) GO TO 60 * IF( XNEI.GT.XNC) GO TO 60 * VE3(i)= E3 * E6= E3+E3-XNM * VXNE(i)= E3+E3-XNEI * XNM= E6 * i= i+INK * IF( i.NE.j) GO TO 61 * 62 CONTINUE */ for ( l=1 ; l<=loop ; l++ ) { i = n-1; j = 0; ink = -1; scale = 5.0 / 3.0; xnm = 1.0 / 3.0; e6 = 1.03 / 3.07; goto l61; l60: e6 = xnm*vsp[i] + vstp[i]; vxne[i] = e6; xnm = e6; ve3[i] = e6; i += ink; if ( i==j ) goto l62; l61: e3 = xnm*vlr[i] + vlin[i]; xnei = vxne[i]; vxnd[i] = e6; xnc = scale*e3; if ( xnm > xnc ) goto l60; if ( xnei > xnc ) goto l60; ve3[i] = e3; e6 = e3 + e3 - xnm; vxne[i] = e3 + e3 - xnei; xnm = e6; i += ink; if ( i != j ) goto l61; l62:; } argument = 17; TEST( &argument ); /* ******************************************************************* * Kernel 18 - 2-D explicit hydrodynamics fragment ******************************************************************* * DO 75 L= 1,Loop * T= 0.0037 * S= 0.0041 * KN= 6 * JN= n * DO 70 k= 2,KN * DO 70 j= 2,JN * ZA(j,k)= (ZP(j-1,k+1)+ZQ(j-1,k+1)-ZP(j-1,k)-ZQ(j-1,k)) * . *(ZR(j,k)+ZR(j-1,k))/(ZM(j-1,k)+ZM(j-1,k+1)) * ZB(j,k)= (ZP(j-1,k)+ZQ(j-1,k)-ZP(j,k)-ZQ(j,k)) * . *(ZR(j,k)+ZR(j,k-1))/(ZM(j,k)+ZM(j-1,k)) * 70 CONTINUE * DO 72 k= 2,KN * DO 72 j= 2,JN * ZU(j,k)= ZU(j,k)+S*(ZA(j,k)*(ZZ(j,k)-ZZ(j+1,k)) * . -ZA(j-1,k) *(ZZ(j,k)-ZZ(j-1,k)) * . -ZB(j,k) *(ZZ(j,k)-ZZ(j,k-1)) * . +ZB(j,k+1) *(ZZ(j,k)-ZZ(j,k+1))) * ZV(j,k)= ZV(j,k)+S*(ZA(j,k)*(ZR(j,k)-ZR(j+1,k)) * . -ZA(j-1,k) *(ZR(j,k)-ZR(j-1,k)) * . -ZB(j,k) *(ZR(j,k)-ZR(j,k-1)) * . +ZB(j,k+1) *(ZR(j,k)-ZR(j,k+1))) * 72 CONTINUE * DO 75 k= 2,KN * DO 75 j= 2,JN * ZR(j,k)= ZR(j,k)+T*ZU(j,k) * ZZ(j,k)= ZZ(j,k)+T*ZV(j,k) * 75 CONTINUE */ for ( l=1 ; l<=loop ; l++ ) { t = 0.0037; s = 0.0041; kn = 6; jn = n; for ( k=1 ; k dn ) dn = s; } x[k] = ( ( w[k] + v[k]*dn )* xx[k] + u[k] ) / ( vx[k] + v[k]*dn ); xx[k+1] = ( x[k] - xx[k] )* dn + xx[k]; } } argument = 20; TEST( &argument ); /* ******************************************************************* * Kernel 21 -- matrix*matrix product ******************************************************************* * DO 21 L= 1,Loop * DO 21 k= 1,25 * DO 21 i= 1,25 * DO 21 j= 1,n * PX(i,j)= PX(i,j) +VY(i,k) * CX(k,j) * 21 CONTINUE */ for ( l=1 ; l<=loop ; l++ ) { for ( k=0 ; k<25 ; k++ ) { for ( i=0 ; i<25 ; i++ ) { #pragma nohazard for ( j=0 ; j