为了完成比赛,需要下载到Intel官网下载oneMKL包。
实现方法如下:首先做出随机数
void Random(float *input,int N)
{
VSLStreamStatePtr stream;
vslNewStream(&stream,VSL_BRNG_MT19937,1);
vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD,stream,N*N,input,0.0f,1.0f);
vslDeleteStream(&stream);
}
利用fftw3做fft
void FFTW3(float *input,fftwf_complex *output,int N)
{
int time;
fftw_plan flag;
clock_t start,end;
flag=fftwf_plan_dft_r2c_2d(N,N,input,output,FFTW_MEASURE);
start=clock();
for(time=0;time<1;time++) fftwf_execute(flag);
end=clock();
printf("when time=%-10d , FFTW3 use %f\n",time,(double)(end-start)/CLOCKS_PER_SEC);
start=clock();
for(time=0;time<10;time++) fftwf_execute(flag);
end=clock();
printf("when time=%-10d , FFTW3 use %f\n",time,(double)(end-start)/CLOCKS_PER_SEC);
start=clock();
for(time=0;time<100;time++) fftwf_execute(flag);
end=clock();
printf("when time=%-10d , FFTW3 use %f\n",time,(double)(end-start)/CLOCKS_PER_SEC);
start=clock();
for(time=0;time<1000;time++) fftwf_execute(flag);
end=clock();
printf("when time=%-10d , FFTW3 use %f\n",time,(double)(end-start)/CLOCKS_PER_SEC);
}
利用oneMKL做fftw3
void MKL(float *input,MKL_Complex8 *output,int N)
{
int time;
MKL_LONG sizeN[2]={N,N};
MKL_LONG rs[3]={0,N,1};
MKL_LONG cs[3]={0,N/2+1,1};
DFTI_DESCRIPTOR_HANDLE handle=NULL;
DftiCreateDescriptor(&handle,DFTI_SINGLE,DFTI_REAL,2,sizeN);
DftiSetValue(handle,DFTI_PLACEMENT,DFTI_NOT_INPLACE);
DftiSetValue(handle,DFTI_CONJUGATE_EVEN_STORAGE,DFTI_COMPLEX_COMPLEX);
DftiSetValue(handle,DFTI_INPUT_STRIDES,rs);
DftiSetValue(handle,DFTI_OUTPUT_STRIDES,cs);
DftiCommitDescriptor(handle);
clock_t start,end;
start=clock();
for(time=0;time<1;time++) DftiComputeForward(handle,input,output);
end=clock();
printf("when time=%-10d , oneMKL use %f\n",time,(double)(end-start)/CLOCKS_PER_SEC);
start=clock();
for(time=0;time<10;time++) DftiComputeForward(handle,input,output);
end=clock();
printf("when time=%-10d , oneMKL use %f\n",time,(double)(end-start)/CLOCKS_PER_SEC);
start=clock();
for(time=0;time<100;time++) DftiComputeForward(handle,input,output);
end=clock();
printf("when time=%-10d , oneMKL use %f\n",time,(double)(end-start)/CLOCKS_PER_SEC);
start=clock();
for(time=0;time<1000;time++) DftiComputeForward(handle,input,output);
end=clock();
printf("when time=%-10d , oneMKL use %f\n",time,(double)(end-start)/CLOCKS_PER_SEC);
}
比较性能和残差
void Compare(fftwf_complex *data1,MKL_Complex8 *data2,int N)
{
float Residual=0.00001;
float Rr,Ri;
int match=0;
for(int i=0;i<(N/2+1)*N;i++)
{
Rr=fabs(data2[i].real-data1[i][0]);
Ri=fabs(data2[i].imag-data1[i][1]);
if(Rr>Residual || Ri>Residual)
{
match=1;
}
}
if(match==1)
{
printf("Result Error\n");
}
else
{
printf("Result Correct\n");
}
}
感谢因特尔提供的比赛机会,让我认识到了做FFT的新工具,借助oneMKL强大性能,使得对矩阵运算有了更好的性能。