#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#define BLOCK_SIZE 16
static void HandleError(cudaError_t err, const char *file, int line)
{
if (err != cudaSuccess)
{
printf("%s in %s at line %d\n", cudaGetErrorString(err), file, line);
exit(EXIT_FAILURE);
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
#define HANDLE_NULL( a ) {if ((a) == NULL) { \
printf("Host memory failed in %s at line %d\n", \
__FILE__, __LINE__); \
exit(EXIT_FAILURE); }}
static void GenerateNumbers(int *number, int size)
{
for (int i = 0; i < size; i++)
{
//注意int型数据溢出
number[i] = rand() % 10;
}
}
static bool InitCUDA()
{
int count;
cudaGetDeviceCount(&count);
if (count == 0)
{
fprintf(stderr, "There is no device.\n");
return false;
}
int i;
for (i = 0; i < count; i++)
{
cudaDeviceProp prop;
if (cudaGetDeviceProperties(&prop, i) == cudaSuccess)
{
if (prop.major >= 1)
{
break;
}
}
}
if (i >= count)
{
fprintf(stderr, "There is no device supporting CUDA 1.x.\n");
return false;
}
cudaSetDevice(i);
return true;
}
__global__ static void sumOfSquares(int *num, int size, int* result)
{
int sum = 0;
for (int i = 0; i < size; i++)
{
sum += num[i] * num[i];
}
*result = sum;
}
int main(int argc, char *argv[])
{
if (!InitCUDA())
{
return -1;
}
printf("CUDA initialized.\n");
//对一个数组求和:每个元素相加
const int DATA_SIZE = 256;
int data[DATA_SIZE];
GenerateNumbers(data, DATA_SIZE);
int* gpudata, *result;
HANDLE_ERROR(cudaMalloc((void **)&gpudata, sizeof(int)* DATA_SIZE));
HANDLE_ERROR(cudaMalloc((void**)&result, sizeof(int)));
HANDLE_ERROR(cudaMemcpy(gpudata, data, sizeof(int)* DATA_SIZE, cudaMemcpyHostToDevice));
sumOfSquares<<<1, 1, 0 >>>(gpudata, DATA_SIZE, result);
int sum;
HANDLE_ERROR(cudaMemcpy(&sum, result, sizeof(int), cudaMemcpyDeviceToHost));
cudaFree(gpudata);
cudaFree(result);
printf("sum (GPU): %d\n", sum);
int sumCPU = 0;
for (int i = 0; i < DATA_SIZE; i++)
{
sumCPU += data[i] * data[i];
}
printf("sum (CPU): %d\n", sumCPU);
printf("Result %s\n", sum == sumCPU ? "OK" : "Wrong");
//remember to release the device
cudaDeviceReset();
return 0;
}
cuda编程入门示例3---数组求和
最新推荐文章于 2024-06-02 07:45:00 发布