参考:https://stackoverflow.com/questions/8857063/cuda-pycuda-how-to-write-complex-numbers-errorsclass-cucomplex-has-no
#include <pycuda-complex.hpp> template<typename T> __global__ void kernel(const T * x, const T *y, T *z) { int tid = threadIdx.x + blockDim.x * blockIdx.x; z[tid] = x[tid] + y[tid]; } typedef pycuda::complex<float> scmplx; typedef pycuda::complex<double> dcmplx; template void kernel<float>(const float *, const float *, float *); template void kernel<double>(const double *, const double *, double *); template void kernel<scmplx>(const scmplx *, const scmplx *, scmplx *); template void kernel<dcmplx>(const dcmplx *, const dcmplx *, dcmplx *);
This gives you single and double real and complex versions of the trivial kernel and compiles with nvcc something like this:
$ nvcc -arch=sm_20 -Xptxas="-v" -I$HOME/pycuda-2011.1.2/src/cuda -c scmplx.cu
ptxas info : Compiling entry function '_Z6kernelIN6pycuda7complexIdEEEvPKT_S5_PS3_' for 'sm_20'
ptxas info : Used 12 registers, 44 bytes cmem[0], 168 bytes cmem[2], 4 bytes cmem[16]
ptxas info : Compiling entry function '_Z6kernelIN6pycuda7complexIfEEEvPKT_S5_PS3_' for 'sm_20'
ptxas info : Used 8 registers, 44 bytes cmem[0], 168 bytes cmem[2]
ptxas info : Compiling entry function '_Z6kernelIdEvPKT_S2_PS0_' for 'sm_20'
ptxas info : Used 8 registers, 44 bytes cmem[0], 168 bytes cmem[2]
ptxas info : Compiling entry function '_Z6kernelIfEvPKT_S2_PS0_' for 'sm_20'
ptxas info : Used 4 registers, 44 bytes cmem[0], 168 bytes cmem[2]