一个实现用CUDA技术来实现向量相加的小例子: 用每一个线程来计算两个向量相对应的数据: #include<stdio.h> #include<cutil_inline.h> //主机端变量 float* h_A; float* h_B; float* h_C; //设备端变量 float* d_A; float* d_B; float* d_C; bool noprompt = false; //Functions void Cleanup(void); void RandomInit(float*, int); void ParseArguments(int, char**); //Device Code __global__ void VecAdd(const float* A, const float* B, float* C, int N) { int i = blockDim.x * blockIdx.x + threadIdx.x; if(i < N) C[i] = A[i] + B[i]; } //host code int main(int argc, char** argv) { printf("Vector addition/n"); int N = 50000; size_t size = N * sizeof(float); ParseArguments(argc,argv); h_A = (float*)malloc(size); if(h_A == 0) Cleanup(); h_B = (float*)malloc(size); if(h_B == 0) Cleanup(); h_C = (float*)malloc(size); if(h_C == 0) Cleanup(); //initalize input vectors RandomInit(h_A, N); RandomInit(h_B, N); //Allocate vector in device memory cutilSafeCall(cudaMalloc((void**)&d_A, size)); cutilSafeCall(cudaMalloc((void**)&d_B, size)); cutilSafeCall(cudaMalloc((void**)&d_C, size)); //copy vector from host memory to device memory cutilSafeCall(cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice)); cutilSafeCall(cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice)); //invoke kernel int threadsPerBlock = 256; int blocksPerGrid = (N + threadsPerBlock - 1)/threadsPerBlock; VecAdd<<<blocksPerGrid,threadsPerBlock>>>(d_A, d_B, d_C, N); cutilCheckMsg("kernel launch failure"); #ifdef _DEBUG cutilSafeCall(cudaThreadSynchronize()); #endif //copy result from device memory to host memory //h_C contains the result in host memory cutilSafeCall(cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost)); //verify result int i; for(i = 0; i < N; i++) { //printf("%f/t%f/t%f/n", h_A[i], h_B[i], h_C[i]); float sum = h_A[i] + h_B[i]; if(fabs(h_C[i] - sum) > 1e-5) break; } printf("%s /n",(i == N) ? "PASSED":"FAILED"); Cleanup(); } void Cleanup() { //free device memory if(d_A) cudaFree(d_A); if(d_B) cudaFree(d_B); if(d_C) cudaFree(d_C); //free host memory if(h_A) free(h_A); if(h_B) free(h_B); if(h_C) free(h_C); cutilSafeCall(cudaThreadExit()); if(!noprompt) { printf("/nPress ENTER to exit.../n"); fflush(stdout); fflush(stderr); getchar(); } exit(0); } //allocates an array with random float entries void RandomInit(float* data, int n) { for(int i = 0; i < n; i++) { data[i] = rand()/(float)RAND_MAX; } } //parse program arguments void ParseArguments(int argc, char** argv) { for(int i = 0; i < argc; i++) { if(strcmp(argv[i],"--noprompt") == 0 || strcmp(argv[i],"-noprompt") == 0) { noprompt = true; break; } } }