自己敲的第一个并行计算程序
/**
实现线程的显示
*/
#include <stdio.h>
#include <iostream>
#include <cuda_runtime.h> // For the CUDA runtime routines (prefixed with "cuda_")
#include <DEVICE_LAUNCH_PARAMETERS.h> //我在查询中找到的头文件 有可能有别的表达方式
/**
* CUDA Kernel Device code
*
*/
__global__ void ShowData(float *x)
{
int tid = threadIdx.x + blockDim.x * blockIdx.x;
x[tid] = (float) threadIdx.x;
//*c = a + b;
}
int main()
{
float *pfHost, *pfDecice;
int nBlocks, nThreads, nSizes, n;
cudaError_t err = cudaSuccess;
nBlocks = 2;
nThreads = 8;
nSizes = nBlocks * nThreads;
//分配主机、设备存储空间
pfHost = (float*)malloc(nSizes * sizeof(float));
cudaMalloc((void**)&pfDecice, nSizes * sizeof(float));
//执行全局函数
ShowData << <nBlocks, nThreads >> > (pfDecice);
err = cudaMemcpy(pfHost, pfDecice, nSizes * sizeof(float), cudaMemcpyDeviceToHost);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to allocate device vector A (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
for (n = 0; n < nSizes; n++) printf("%d %f \n", n, pfHost[n]);
cudaFree(pfDecice);
free(pfHost);
system("pause");
return 0;
}
参考 GPGPU编程技术-从GLSL、CUDA到OpenCl 第四章代码 书134页
用软件nvidia visual profiler分析时,主函数末尾要加上释放线程的函数。如下:
#include <stdio.h>
#include <iostream>
#include <cuda_runtime.h> // For the CUDA runtime routines (prefixed with "cuda_")
#include <DEVICE_LAUNCH_PARAMETERS.h> //我在查询中找到的头文件 有可能有别的表达方式
/**
* CUDA Kernel Device code
*
*/
__global__ void ShowData(float *x)
{
int tid = threadIdx.x + blockDim.x * blockIdx.x;
x[tid] = (float)threadIdx.x;
//*c = a + b;
}
int main()
{
float *pfHost, *pfDecice;
int nBlocks, nThreads, nSizes, n;
cudaError_t err = cudaSuccess;
nBlocks = 2;
nThreads = 8;
nSizes = nBlocks * nThreads;
//分配主机、设备存储空间
pfHost = (float*)malloc(nSizes * sizeof(float));
cudaMalloc((void**)&pfDecice, nSizes * sizeof(float));
//执行全局函数
ShowData << <nBlocks, nThreads >> > (pfDecice);
err = cudaMemcpy(pfHost, pfDecice, nSizes * sizeof(float), cudaMemcpyDeviceToHost);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to allocate device vector A (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
for (n = 0; n < nSizes; n++) printf("%d %f \n", n, pfHost[n]);
cudaFree(pfDecice);
free(pfHost);
cudaThreadExit();//退出线程 在用NVIDIA visual profiler分析时要加入这个
system("pause");
return 0;
}