/* Header file for super parallel fft library */ #ifndef _spfft_h #define _spfft_h /* OPTIONS: ------- Edit the following parameters to change precision, the sign of the exponential function, and the scaling of forwards and inverse transforms. Remember to recompile everything after changing parameters! */ /* PRECISION: #define float_type 1 for single precision #define float_type 2 for double precision */ #define float_type 1 /* EXPONENTIAL SIGN: #define exp_sign 1 for exp(2*pi*i...) in forwards transform and exp(-2*pi*i...) in the inverse. #define exp_sign -1 for exp(-2*pi*i...) in forwards transform and exp(2*pi*i...) in the inverse. */ #define exp_sign 1 /* SCALING: #define fft_scaling 0 for no scaling in forwards transform and no scaling in inverse transform. #define fft_scaling 1 for no scaling in forwards transform and scaling by 1/N in inverse transform. #define fft_scaling 2 for scaling by 1/N in forwards transform and no scaling in inverse transform. #define fft_scaling 3 for scaling by 1/sqrt(N) in both forwards and inverse transform. */ #define fft_scaling 1 /* -------- END OPTIONS */ /* For later convenience, we label the float type as FLTYP */ #if (float_type == 1) #define FLTYP float #else #define FLTYP double #endif /* c-include files */ #include #include #include #include #include #include #include "p3pack.h" #include "genutil.h" /* Define type PLCPLX (plural complex) as array of two plural FLTYP */ typedef plural FLTYP PLCPLX[2]; /* Define control structure for the FFT computations */ struct FFTCTL { PLCPLX *twdfac; /* list of twiddle factors */ plural unsigned char *twdpt; /* pointer to tw-factor list */ short lsttwp; /* length of list twdpt */ short *actgr,nactgr; /* list of and no. of active groups */ int mb[3], mbits,actcnt; /* mem-config, no. of mem-bits, no. of act bts. */ /* bit-masks for: memory bits, x- and y-proc, active bits, active groups */ unsigned membits,xbits,ybits,actbits,*agrbits; FLTYP scale; /* scaling of inverse transform */ /* arrays to find identities of f-coeffs (see spfft.doc) */ plural unsigned *fc,*ifc; /* Bit linear permuts: unscrambling, scrambl., post permutation, post*unscr, memory_interlaced->memory_bits left, inverse of that */ BL_PERMUT unscr_blp,scr_blp,post_blp,final_blp,mem2l_blp,l2mem_blp; PERMUT_ROUT final_rout; /* routing of final permutation */ char symmetry; /* 0 for none, 1 for real data */ /* routing of real->complex unpacking */ plural unsigned short *sPackRout1, *sPackRout2; }; /* Function declarations */ struct FFTCTL *fft_init(); struct FFTCTL *fft_ainit(); void smb_fft(); void sp_fft(); void fft_srt(); void fft_free(); void fft_permut(); /* in case M_PI is not defined in math.h */ #ifndef M_PI #define M_PI 3.14159265358979323846 #endif /* Macros */ /* for easier optimization we define the crucial parts of the code in the preprocessor (inline expansion instead of funciton call): */ /* define macro to compute the memory and proc. address for a data bit given the long address */ #define mp_adr(lad,mad,pad,mb)\ { unsigned _tmp;\ _tmp = lad;\ mad = _tmp & ((1<<((mb)[0]))-1); /* m0 bits */ \ _tmp >>= (mb)[0];\ pad = _tmp & (nxproc-1); /* nx bits */ \ _tmp >>= lnxproc;\ mad |= (_tmp & ((1<<((mb)[1]))-1)) << ((mb)[0]); /* m1 bits */ \ _tmp >>= (mb)[1];\ pad |= (_tmp &(nyproc-1)) << lnxproc; /* ny bits */ \ _tmp >>= lnyproc;\ mad |= _tmp << (((mb)[0])+((mb)[1])); /* m2 bits */ \ } /* define macro to compute the long address for a data bit on each proc */ #define long_adr(lad,mad,mb) \ { unsigned _tmp;\ _tmp = (mad) >> ((mb)[0]+(mb)[1]); /* find m2 bits */ \ lad = (_tmp << lnyproc) | iyproc; /* lad = (m2,iy) */ \ if ((mb)[1]){\ lad <<= (mb)[1]; /* lad = (m2,iy,0) */ \ _tmp = ((mad) >> ((mb)[0])) & ((1<<((mb)[1])) -1); /* find m1 bits */ \ lad |= _tmp; /* lad = (m2,iy,m1) */ \ }\ lad <<= lnxproc; /* lad = (m2,iy,m1,0) */ \ lad |= ixproc; /* lad = (m2,iy,m1,ix) */ \ if ((mb)[0]){\ lad <<= (mb)[0]; /* lad = (m2,iy,m1,ix,0) */ \ _tmp = (mad) & ((1<<((mb)[0])) -1); /* find m0 bits */ \ lad |= _tmp; /* lad = (m2,iy,m1,ix,m0) */ \ }\ } #endif