device全局变量的使用主要用到了两个函数:
cudaMemcpyToSymbol()
cudaMemcpyFromSymbol()
__constant__ float constData[256]; float data[256]; cudaMemcpyToSymbol(constData, data, sizeof(data)); cudaMemcpyFromSymbol(data, constData, sizeof(data));
__device__ float devData; float value = 3.14f; cudaMemcpyToSymbol(devData, &value, sizeof(float));
__device__ float* devPointer; float* ptr; cudaMalloc(&ptr, 256 * sizeof(float)); cudaMemcpyToSymbol(devPointer, &ptr, sizeof(ptr));
假设下面代码保存在var.cu
#include <iostream>
#include <cuda.h>
#include <cuda_runtime.h>
using namespace std;
__device__ int d_data;
__global__ void changeData()
{
d_data = 567;
}
int main()
{
cudaError_t err;
int h_data = 123;
cudaSetDevice(0);
cout<<"before kernel, h_data = "<<h_data<<endl;
changeData<<<1,1>>>();
err = cudaMemcpyFromSymbol((void*)&h_data, d_data, sizeof(int));
if(err != cudaSuccess)
{
cout<<"from symbol error!"<<endl;
exit(0);
}
cout<<h_data<<endl;
cudaDeviceReset();
return 0;
}
编译文件:nvcc var.cu -o var -arch=sm_35
运行可执行文件: ./var
before kernel, h_data =123
567