主要用于个人项目的记录,经验仅供参考,保持开源的习惯,如有问题建议请反馈至我修改完善。
方案一:C++绑定Python
缘由
python多用于深度学习算法的设计,c++多用于软件等设计。可以使用c++调用python脚本进行训练,但是在多次重复的训练和识别中,计算冗余过大,因此,希望可以通过c++调用python的模块或者函数,进行设计。
可行性
python自带了一个c的api,允许c c++程序进行调用, simple demo
//初始化python解释器
Py_Initialize();
//运行python代码
PyRun_SimpleString("print('Hello from Python!')");
//结束python解释器
Py_Finalize();
python依然预先完成,在c++工程中,通过调用模块和函数的方法进行代码工程的整合,可以使用第三方的库,Boost.Python、pybind11、SWIG
希望的结果
qt c++中可以调用python模块 函数进行算法 和未知软件的设计
简单demo
#include <pybind11/pybind11.h>
namespace py = pybind11;
int main() {
py::scoped_interpreter guard{}; // 启动Python解释器
py::module_ sys = py::module_::import("sys");
sys.attr("path").attr("append")("/path/to/your/python/deep_learning_scripts"); // 添加脚本路径
py::module_ dlModule = py::module_::import("train_model"); // 导入训练模块
dlModule.attr("train")("my_dataset.csv", "model_output"); // 调用训练函数
return 0;
}
pybind11的地址
pybind11可以实现C++11和Python之间的无缝连接。pybind11是一个轻量级的头文件库,它在 Python 中公开 C++ 类型,反之亦然,主要用于创建现有 C++ 代码的 Python 绑定。
方案二:使用torch的C++的接口(libtorch)
Visual Studio 2019 配置 libtorch
链接:https://blog.csdn.net/qq_44747572/article/details/121510739
配置libtorch 一定要区分cpu还是gpu。debug还是release,因为对应下载的版本不一样
https://blog.csdn.net/weixin_43742643/article/details/115218126
libtorch运行mnist分类 在cpu上运
#include <torch/torch.h>
#include <torch/script.h>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <iomanip>
/* 博客:陨星落云
https://blog.csdn.net/qq_28368377/article/details/122289830
*/
// 定义网络模型
struct Net : torch::nn::Module {
//构造函数(初始化),设计网络结构
Net() {
// 构造并注册了3个全连接层.
fc1 = register_module("fc1", torch::nn::Linear(784, 64));
fc2 = register_module("fc2", torch::nn::Linear(64, 32));
fc3 = register_module("fc3", torch::nn::Linear(32, 10));
}
// 网络前向传播
torch::Tensor forward(torch::Tensor x) {
// Use one of many tensor manipulation functions.
x = torch::relu(fc1->forward(x.reshape({ x.size(0), 784 })));
x = torch::dropout(x, /*p=*/0.5, /*train=*/is_training());
x = torch::relu(fc2->forward(x));
x = torch::log_softmax(fc3->forward(x), /*dim=*/1);
return x;
}
// Use one of many "standard library" modules.
torch::nn::Linear fc1{ nullptr }, fc2{ nullptr }, fc3{ nullptr };
};
int main() {
// 创建新网络.
auto net = std::make_shared<Net>();
// 利用数据加载器,加载MNIST数据集并设置batchsize
auto data_loader = torch::data::make_data_loader(
torch::data::datasets::MNIST("D:\\MNIST_data\\MNIST\\raw\\")
.map(torch::data::transforms::Stack<>()),
/*batch_size=*/64);
// 构建优化器
torch::optim::SGD optimizer(net->parameters(), /*lr=*/0.01);
for (size_t epoch = 1; epoch <= 10; ++epoch) {
size_t batch_index = 0;
// Iterate the data loader to yield batches from the dataset.
for (auto& batch : *data_loader) {
// 梯度清零
optimizer.zero_grad();
// 向网络中输入数据
torch::Tensor prediction = net->forward(batch.data);
// 计算损失函数
torch::Tensor loss = torch::nll_loss(prediction, batch.target);
// 反向传播
loss.backward();
// 更新梯度
optimizer.step();
// 每 100 batches查看损失并保存模型。
if (++batch_index % 100 == 0) {
std::cout << "Epoch: " << epoch << " | Batch: " << batch_index
<< " | Loss: " << loss.item<float>() << std::endl;
// 保存模型
torch::save(net, "net.pt");
}
}
}
}
加OpenCV 以及训练好的模型进行测试demo:
https://oldpan.me/archives/pytorch-windows-libtorch
使用libtorch在c++中进行训练(gpu)(visual studio 2019)
基本环境配置:
https://allentdan.github.io/2020/12/16/pytorch%E9%83%A8%E7%BD%B2torchscript%E7%AF%87/
出现c10::NotImplementedError:
问题1:0x00007FFDC1DF4ED9 处(位于 pytorch-semantic3070.exe 中)有未经处理的异常: Microsoft C++ 异常: c10::NotImplementedError,位于内存位置 0x000000558F3CE790 处。cpu能正常调用,GPU报上面的错。
问题2:出现Could not run ‘atenempty_strided’ with arguments from the ‘CUDA’ backend,
以上问题的解决方案都是:
解决方案:链接器->命令行->参数为以下:
/INCLUDE:?searchsorted_cuda@native@at@@YA?AVTensor@2@AEBV32@0_N1@Z
无法调用cuda
附加依赖项:
opencv_world470d.lib
opencv_world470.lib
c10.lib
caffe2_nvrtc.lib
c10_cuda.lib
torch.lib
torch_cuda.lib
torch_cuda_cu.lib
torch_cuda_cpp.lib
torch_cpu.lib
-INCLUDE:?warp_size@cuda@at@@YAHXZ
#include <torch/torch.h>
#include <cstddef>
#include <cstdio>
#include <iostream>
#include <string>
#include <vector>
// Where to find the MNIST dataset.
const char* kDataRoot = "D:/proj/cpp/deploy_test/deploy_test/data/MNIST/raw";
// The batch size for training.
const int64_t kTrainBatchSize = 64;
// The batch size for testing.
const int64_t kTestBatchSize = 1000;
// The number of epochs to train.
const int64_t kNumberOfEpochs = 10;
// After how many batches to log a new update with the loss value.
const int64_t kLogInterval = 10;
struct Net : torch::nn::Module {
Net()
: conv1(torch::nn::Conv2dOptions(1, 10, /*kernel_size=*/5)),
conv2(torch::nn::Conv2dOptions(10, 20, /*kernel_size=*/5)),
fc1(320, 50),
fc2(50, 10) {
register_module("conv1", conv1);
register_module("conv2", conv2);
register_module("conv2_drop", conv2_drop);
register_module("fc1", fc1);
register_module("fc2", fc2);
}
torch::Tensor forward(torch::Tensor x) {
x = torch::relu(torch::max_pool2d(conv1->forward(x), 2));
x = torch::relu(
torch::max_pool2d(conv2_drop->forward(conv2->forward(x)), 2));
x = x.view({ -1, 320 });
x = torch::relu(fc1->forward(x));
x = torch::dropout(x, /*p=*/0.5, /*training=*/is_training());
x = fc2->forward(x);
return torch::log_softmax(x, /*dim=*/1);
}
torch::nn::Conv2d conv1;
torch::nn::Conv2d conv2;
torch::nn::Dropout2d conv2_drop;
torch::nn::Linear fc1;
torch::nn::Linear fc2;
};
template <typename DataLoader>
void train(
size_t epoch,
Net& model,
torch::Device device,
DataLoader& data_loader,
torch::optim::Optimizer& optimizer,
size_t dataset_size) {
model.train();
size_t batch_idx = 0;
for (auto& batch : data_loader) {
auto data = batch.data.to(device), targets = batch.target.to(device);
optimizer.zero_grad();
auto output = model.forward(data);
auto loss = torch::nll_loss(output, targets);
AT_ASSERT(!std::isnan(loss.template item<float>()));
loss.backward();
optimizer.step();
if (batch_idx++ % kLogInterval == 0) {
std::printf(
"\rTrain Epoch: %ld [%5ld/%5ld] Loss: %.4f",
epoch,
batch_idx * batch.data.size(0),
dataset_size,
loss.template item<float>());
}
}
}
template <typename DataLoader>
void test(
Net& model,
torch::Device device,
DataLoader& data_loader,
size_t dataset_size) {
torch::NoGradGuard no_grad;
model.eval();
double test_loss = 0;
int32_t correct = 0;
for (const auto& batch : data_loader) {
auto data = batch.data.to(device), targets = batch.target.to(device);
auto output = model.forward(data);
test_loss += torch::nll_loss(
output,
targets,
/*weight=*/{},
torch::Reduction::Sum)
.template item<float>();
auto pred = output.argmax(1);
correct += pred.eq(targets).sum().template item<int64_t>();
}
test_loss /= dataset_size;
std::printf(
"\nTest set: Average loss: %.4f | Accuracy: %.3f\n",
test_loss,
static_cast<double>(correct) / dataset_size);
}
auto main() -> int {
torch::manual_seed(1);
torch::DeviceType device_type;
if (torch::cuda::is_available()) {
std::cout << "CUDA available! Training on GPU." << std::endl;
device_type = torch::kCUDA;
}
else {
std::cout << "Training on CPU." << std::endl;
device_type = torch::kCPU;
}
torch::Device device(device_type);
Net model;
model.to(device);
auto train_dataset = torch::data::datasets::MNIST(kDataRoot)
.map(torch::data::transforms::Normalize<>(0.1307, 0.3081))
.map(torch::data::transforms::Stack<>());
const size_t train_dataset_size = train_dataset.size().value();
auto train_loader =
torch::data::make_data_loader<torch::data::samplers::SequentialSampler>(
std::move(train_dataset), kTrainBatchSize);
auto test_dataset = torch::data::datasets::MNIST(
kDataRoot, torch::data::datasets::MNIST::Mode::kTest)
.map(torch::data::transforms::Normalize<>(0.1307, 0.3081))
.map(torch::data::transforms::Stack<>());
const size_t test_dataset_size = test_dataset.size().value();
auto test_loader =
torch::data::make_data_loader(std::move(test_dataset), kTestBatchSize);
torch::optim::SGD optimizer(
model.parameters(), torch::optim::SGDOptions(0.01).momentum(0.5));
for (size_t epoch = 1; epoch <= kNumberOfEpochs; ++epoch) {
train(epoch, model, device, *train_loader, optimizer, train_dataset_size);
test(model, device, *test_loader, test_dataset_size);
}
}
成功训练