最近在做地震勘探的全波形反演,用分频反演的方法,需要对地震波场按照特定的频段进行傅里叶变换,这要用到DFT。在CPU上,DFT的计算非常耗时,当处理三维数据时耗时更加严重,所以,本人用CUDA+SU(seismic Unix),在GPU上来做DFT。话不多说,直接上代码:
程序代码:
#include <stdio.h>
#include "time.h"
#include "par.h"
#include "su.h"
#include "segy.h"
#include <cuda.h>
#include <cuda_runtime.h>
#include <helper_cuda.h>
#include "common.h"
#include "cufft.h"
#include <mpi.h>
/****************************** self documentation ****************************/
const char *sdoc[] = {
" ",
" ",
NULL};
/*
* Authors:CUP
*
* References:
*
*/
/****************************** end self doc **********************************/
__global__ void P_fft(int nx,int ny,int nf,cufftComplex *d_cf,float *d_p,
cufftComplex *d_caf)
{
unsigned int ix = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int iy = threadIdx.y + blockIdx.y * blockDim.y;
unsigned int idy = ix * ny + iy;
if (ix<nx && iy<ny )
{
d_p[idy]=idy;
for (int i = 0; i < nf; ++i)
{
d_caf[idy].x += d_cf[i].x*d_p[idy];
d_caf[idy].y += d_cf[i].y*d_p[idy];
}
}
}
int main(int argc, char **argv)
{
time_t start,finish;
double duration;
int it;
int nx,ny,nf,nt;
int dimx=8,dimy=8;
float damp=1;
float dt=0.002;
float t=0;
float *invf;
float *d_p;
/* Transform real field to complex field */
cufftComplex *h_cf;
cufftComplex *d_cf;
cufftComplex *d_caf;
cufftComplex *h_caf;
/* hook up getpar to handle the parameters */
initargs(argc,argv);
requestdoc(0);
/* get required parameters */
if (!getparint("nx",&nx)) err("must specify nx!");
if (!getparint("ny",&ny)) err("must specify ny!");
if (!getparint("nt",&nt)) err("must specify nt!");;
/* get optional parameters */
if (!getparint("nt",&nt)) nt = 0;
if (!getparfloat("dt",&dt)) dt = 1.0;
nf = countparval("invf");
warn(" nx=%d,ny=%d,nt=%d",nx,ny,nt);
warn(" nt=%d;dt=%f",nt,dt);
warn(" nf=%d",nf);
invf = alloc1float(nf);
getparfloat("invf", invf);
for (int iff=0;iff<nf;iff++)
{
warn("invf[%d]=%f",iff,invf[iff]);
}
h_cf = (cufftComplex *)malloc(sizeof(cufftComplex) * nf);
cudaMalloc((void**)&d_cf, sizeof(cufftComplex)*nf);
cudaMalloc((void**)&d_caf, sizeof(cufftComplex)*nx*ny);
cudaMalloc((void**)&d_p, sizeof(float)*nx*ny);
h_caf = (cufftComplex *)malloc(sizeof(cufftComplex)*nx*ny);
CHECK(cudaMemset (d_p, 1, nx*ny*sizeof(float)));
dim3 block(dimx,dimy);
dim3 grid( (nx + block.x - 1) / block.x,(ny + block.y - 1) / block.y);
for ( it = 0,t=0.0; it < nt; ++it,t+=dt)
{
CHECK(cudaMemset (d_caf, 0, nx*ny*sizeof(cufftComplex)));
for (int iff=0;iff<nf;iff++)
{
h_cf[iff].x=exp(-damp*t)*cos(2.0*PI*invf[iff]*t);
h_cf[iff].y=exp(-damp*t)*sin(2.0*PI*invf[iff]*t);
}
CHECK(cudaMemcpy(d_cf,h_cf,sizeof(cufftComplex)*nf,
cudaMemcpyHostToDevice));
P_fft<<<grid,block>>>(nx,ny,nf,d_cf,d_p,d_caf);
CHECK(cudaMemcpy(h_caf,d_caf,sizeof(cufftComplex)*nx*ny,
cudaMemcpyDeviceToHost));
}
for (int i = 0; i < nx*ny; ++i)
{
warn("h_caf[%d].x=%f",i,h_caf[i].x);
warn("h_caf[%d].y=%f",i,h_caf[i].y);
}
free(h_cf);
free(h_caf);
free1float(invf);
CHECK(cudaFree(d_cf));
CHECK(cudaFree(d_caf));
CHECK(cudaFree(d_p));
return(0);
}
运行脚本:
#!/bin/sh
#
mpirun -np 1 SRC/complex_test nx=200 ny=100 nt=1000 dt=0.001 invf=0.5,1,1.5,2,2.5
此代码不限于地震反演,其他方向也可用。希望对大家有帮助,欢迎留言~~