# CUDA入门学习笔记（一）

#### CUDA高性能并行计算

##### 运行第一cuda程序
###### 学习几个概念

<<<Dg，Db>>> (args)
Dg:网格中的线程块数 ，Db:线程块中的线程数目，args:传入参数

  __global__  是标志着和函数的标识符
__host__ 函数从主机端调用在主机执行
__device__ 函数从设备端调用并在设备端执行


CUDA 运行时 API 可以将输入数据传输到设备端和将结果传回到主机端


cudaMalloc()    函数可以分配设备端内存
cudaMemcpy()    将数据传入或者传出设备
cudaFree()      释放掉设备中不再使用的内存
size_t：        代表内存大小的专用变量类型
cudaError_r     错误处理的专用变量


##### 将源程序.cpp转为核函数.cu

dist_v1 中 main.cpp

#include <math.h> //Include standard math library containing sqrt.
#define N 64 // Specify a constant value for array length.

// A scaling function to convert integers 0,1,...,N-1
// to evenly spaced floats ranging from 0 to 1.
float scale(int i, int n)
{
return ((float)i) / (n - 1);
}

// Compute the distance between 2 points on a line.
float distance(float x1, float x2)
{
return sqrt((x2 - x1)*(x2 - x1));
}

int main()
{
// Create an array of N floats (initialized to 0.0).
// We will overwrite these values to store our results.
float out[N] = { 0.0f };

// Choose a reference value from which distances are measured.
const float ref = 0.5f;

/* for loop to scale the index to obtain coordinate value,
* compute the distance from the reference point,
* and store the result in the corresponding entry in out. */
for (int i = 0; i < N; ++i)
{
float x = scale(i, N);
out[i] = distance(x, ref);
}

return 0;
}


distanceKernel<<<N/TPB,TPB>>>(d_out,ref,N)

#include <stdio.h>
#define N 64
#define TPB 32

__device__ float scale(int i,int n)
{
return ((float)i)/(n-1) ;
}

__device__ float distance(float x1,float x2)
{
return sqrt((x2-x1)*(x2-x1));
}

__global__ void distanceKernel(float *d_out,float ref, int len)
{

const int i = blockIdx.x*blockDim.x + threadIdx.x;
const float x = scale(i,len);
d_out[i] = distance(x,ref);
printf("i = %2d: dist from %f to %f is %f.\n",i,ref,x,d_out[i]);
}

int main()
{
const float ref = 0.5f;
// Declare a pointer for an array of floats
float *d_out = 0;
//Allocate device memory to store the output array
cudaMalloc(&d_out,N*sizeof(float));
//Launch kernel to computer and store distance values
distanceKernel<<<N/TPB,TPB>>>(d_out,ref,N);

cudaFree(d_out); //Free the memory
return 0;

}


©️2019 CSDN 皮肤主题: 大白 设计师: CSDN官方博客