获取计算机终端运行的.cu文件的指令
Ubuntu如下:
nvcc -arch=sm_50 -ccbin /usr/bin/g++ -o ./build/test test.cu && ./build/test
nvcc是你安装的cuda成功说明可以使用,还有CUDANN
验证安装:
cuda
nvidia-smi
这个应该输出gpu的基础信息,说明安装成功CUDA
cudnn
cat /usr/local/cuda/include/cudnn.h | grep CUDNN_MAJOR -A 2
如果没有输出,直接找找这个文件夹中有没有此头文件。
环境添加
打开文件
sudo gedit ~/.bashrc
添加;每次启动终端,自动把CUDA路径加入,注意对应自己的路径,有时候是cuda
,有时候是cuda-12.0
,cd到目录下查看即可。
#cuda添加
export PATH=/usr/local/cuda/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64
#cpp配置
export CXX=/usr/bin/g++
export CC=/usr/bin/gcc
激活
source ~/.bashrc
指令介绍
介绍一下指令结构,nvcc
是CUDA编程,-arch=sm_50
与gpu型号有关可以不加试试,除非出错;-ccbin /usr/bin/g++
指定g++编译器的位置,-o ./build/test
输出可以是.o后缀也可以省去。&&
同时进行./build/test
这个生成文件。
这里可以直接用最后的测试一下,在配置vscode
VScode 配置包含详细解释
首先编辑任务文件task.json,这个文件只需要设置两个
{
"tasks":[],//这里配置任务
"version": "2.0.0"//默认即可
}
task配置:
[
{
"type": "shell",//任务类型,选在shell中即可
"label": "cuda-cpp-opencv",//这个任务的名字
"command": "nvcc",//使用的编译器,直接用nvcc系统会自己寻找路径,也可以直接写上路径
//"command": "/usr/local/cuda/bin/nvcc",//使用的编译器,直接用nvcc系统会自己寻找路径,找不到的情况下,也可以直接写上路径
"args":[//nvcc 后面的配置,按照终端指令填写即可
"-arch=sm_50",//设置架构,对应gpu型号
"-ccbin","/usr/bin/g++",//这是为了避免vncc找不到g++添加的
"-g","${file}","mytools.cpp",//设置执行文件路径,"${file}"是你自己的test.cu。"mytools.cpp",就是你的需要链接的其他.c(自定义的一个工具包.c库与头文件对应)或者.o(其它项目编译后的输出)文件
"-I","/file/**",//头文件夹的目录,不需要精确到文件名,到上层目录即可,** 表示当前目录和一级子目录一键添加进来
"-L","第三方的lib库",
"-std=c++11",//c++版本,可以改成其他的
"-o", "${workspaceFolder}/build/${fileBasenameNoExtension}",//输出文件
"-Xcompiler", "-pthread" // 将-pthread选项传递给g++,nvcc与c++还是有点区别有的没有,所以用这个把需要用的c++语言库添加到nvcc编译中去
],
},{}//后面还可以添加其他任务
]
最后launch.json,理论上这两个就可以F5编译文件了
需要配置
{
"version": "0.2.0",//json文件用[]和{}交替表示包含关系
"configurations":[],//编译时运行设置,设置launch debuger
"compounds":[//使用此,configurations
"name": "Compound",
"configurations": []//添加前面的configurations名称数组,可以执行多个launch debuger
],
}
对于configurations设置
"configurations": [
{
"name": "opencv4.9.0 debuge",//名字
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/${fileBasenameNoExtension}.out",//这里只是单纯执行这个文件
"args": [],//不知道干嘛的,可能是一些debuger操作用的
"stopAtEntry": true, //这里如果为 false,则说明调试直接运行。(反之则停止)
"cwd": "${workspaceFolder}",
"environment": [ ],//系统实在找不到的库文件用这个设置
"externalConsole": true,//是否调用外部cmd
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": false
},
],
"preLaunchTask": "cuda-opencv4.9.0",//执行此launch前执行的task,必须要
}
]
所有文件,测试可以执行
task
{
"tasks": [
{
"type": "shell",
"label": "cuda-cpp-opencv",
"command": "nvcc",
"args": [
// "-fdiagnostics-color=always",
"-arch=sm_50",//设置架构,对应gpu型号
"-ccbin","/usr/bin/g++",
"-g","${file}",//不能删除, "-g"可以省略
"-std=c++11",
"-o",
"${workspaceFolder}/build/${fileBasenameNoExtension}",//Ubuntu
],
"problemMatcher":[
"$gcc"
],
"options": {
"env": {
}
},
"group": {
"kind": "build",
"isDefault": true
},
"detail": "包含opencv490和hdf5文件,还包含cuda" //项目简介
}
],
"version": "2.0.0"
}
launch
{
"version": "0.2.0",
"configurations": [
{
"name": "cuda-opencv4.9.0 debuge",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/${fileBasenameNoExtension}",
"args": [],
"stopAtEntry": true, //这里如果为 false,则说明调试直接运行。(反之则停止)
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": true,//是否调用外部cmd
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": false
},
],
"preLaunchTask": "cuda-cpp-opencv",
}
],
"compounds": [
{
"name": "Compound",
"configurations": ["cuda-opencv4.9.0 debuge",...]
}
]
}
其他优化,代码补全,高亮
先设置好c_cpp_properties.json文件的c++语言高凉
然后再setting.json中把.cu文件链接c++即可,中间有些不同只要编译没问题就行了。
setting.json文件有usr和workspace两版,这个自己网上搜索即可
"files.associations": {
"*.cu": "cpp",
测试
test。cu文件
#include <cuda_runtime.h>
#include <iostream>
// CUDA kernel for vector addition
__global__ void vec_add(double *x, double *y, double *z, int n) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n) {
z[i] = x[i] + y[i];
}
}
void checkCudaError(cudaError_t err, const char* msg) {
if (err != cudaSuccess) {
std::cerr << "Error: " << msg << " (" << cudaGetErrorString(err) << ")" << std::endl;
exit(EXIT_FAILURE);
}
}
int main() {
int N = 1000000; // Vector length
int bs = 256; // Threads per block
int gs = (N + bs - 1) / bs; // Number of blocks
// Print device info
cudaDeviceProp prop;
int device;
cudaGetDevice(&device);
cudaGetDeviceProperties(&prop, device);
std::cout << "Using GPU device " << device << ": " << prop.name << std::endl;
// 主机参数
double *h_x = new double[N];
double *h_y = new double[N];
double *h_z = new double[N];
// 初始化
for (int i = 0; i < N; ++i) {
h_x[i] = i * 1.0;
h_y[i] = i * 2.0;
}
// Device memory allocation
double *d_x, *d_y, *d_z;
checkCudaError(cudaMalloc(&d_x, N * sizeof(double)), "Allocating d_x");
checkCudaError(cudaMalloc(&d_y, N * sizeof(double)), "Allocating d_y");
checkCudaError(cudaMalloc(&d_z, N * sizeof(double)), "Allocating d_z");
// Copy data from host to device
checkCudaError(cudaMemcpy(d_x, h_x, N * sizeof(double), cudaMemcpyHostToDevice), "Copying h_x to d_x");
checkCudaError(cudaMemcpy(d_y, h_y, N * sizeof(double), cudaMemcpyHostToDevice), "Copying h_y to d_y");
// Kernel call
vec_add<<<gs, bs>>>(d_x, d_y, d_z, N);
checkCudaError(cudaGetLastError(), "Kernel launch");
// Copy result from device to host
checkCudaError(cudaMemcpy(h_z, d_z, N * sizeof(double), cudaMemcpyDeviceToHost), "Copying d_z to h_z");
// Print the first 10 results
for (int i = 9999; i < 10010; ++i) {
std::cout << h_z[i] << " ";
}
std::cout << std::endl;
// Free device memory
checkCudaError(cudaFree(d_x), "Freeing d_x");
checkCudaError(cudaFree(d_y), "Freeing d_y");
checkCudaError(cudaFree(d_z), "Freeing d_z");
// Free host memory
delete[] h_x;
delete[] h_y;
delete[] h_z;
return 0;
}
F5编译执行或者直接终端执行nvcc -arch=sm_50 -ccbin /usr/bin/g++ -o ./build/test test.cu && ./build/test
也可
输出
Using GPU device 0: NVIDIA GeForce 940MX
29997 30000 30003 30006 30009 30012 30015 30018 30021 30024 30027
[1] + Done "/usr/bin/gdb" --interpreter=mi --tty=${DbgTerm} 0<"/tmp/Microsoft-MIEngine-In-lyqqgcbv.lu1" 1>"/tmp/Microsoft-MIEngine-Out-k5hflfpd.w2l"
Press any key to continue...
如果报错如下,就是-arch=sm_50
版本没对上,这个网上搜一下你的Gpu版本即可,
Using GPU device 0: NVIDIA GeForce 940MX
Error: Kernel launch (no kernel image is available for execution on the device)
找不到路径多用which g++
和which nvcc
来找,which+名字
操作主要针对此项目工作空间,最好不要修改usr的配置,项目文件夹下的文件结构