cudaMallocHost(void **,size_t)”: 无法将参数 1 从“float **”转换为“void **”

在使用CUDA时遇到cudaMallocHost函数报错,提示无法将参数1从'float **'转换为'void **'。问题源于引用了错误的cuda头文件。修正头文件从'cuda_runtime_api.h'更改为正确的cuda头文件后,解决了转换问题。

cudaMallocHost(void **,size_t)”: 无法将参数 1 从“float **”转换为“void **”

后来发现是cuda头文件引用的不对:

报错引用的头文件:

#include "cuda_runtime_api.h"

改完后的头文件:

#include <cuda_runtime.h>

cuda引用的头文件:

#include <NvInfer.h>

// cuda_runtime
#include <cuda_runtime.h>
#include <cuda.h>

#include <math.h>
#include <stdio.h>
//#include <unistd.h>
#include <chrono>
#include <fstream>
#include <functional>
#include <iostream>
#include <memory>
#include <string>
#include <vector>

#include <iostream> #include <vector> #include <cmath> #include <algorithm> #include <numeric> #include <limits> #include <time.h> #include <fstream> #include <sstream> #include <string> #include <cuda_runtime.h> #define USE_GPU 1 using namespace std; extern "C" void launch_kernel_DPC_computeRho(float* h_data, int* h_NL, int N, int M, int dc); extern "C" void launch_kernel_DPC_computeDeltaAndNearestHigherD(float* h_delta, float* h_nearest_higher, int N, int M, int* h_indices, float* h_data); extern "C" void launch_thrust_findClusterCenters(float* h_rho_delta, int* ord_clusters, int N); extern "C" void launch_thrust_assignClusters(float* h_ord_rho, int* h_ord_indices, int N); extern "C" void launch_kernel_DPC_computeRhoR(float* h_data, int* h_NL, int N, int M, int dc); extern "C" void launch_kernel_DPC_computeRhoM(float* h_data, int* h_NL, int N, int M, int dc, int* h_max_min); struct DataPoint { vector<double> coordinates; double rho = 0.0; // 局部密度 double delta = 0.0; // 到更高密度点的最小距离 int nearest_higher = -1; // 最近更高密度点的索引 int cluster = -1; // 聚类标签,-1表示未分类 }; float* h_NN; int* h_NL; int* h_row; int* h_col; float* dc_2; float* h_data; float DC; float* h_min_dist; int* h_min_idx; // 计算欧氏距离 double euclideanDistance(const DataPoint& a, const DataPoint& b) { double dist = 0.0; for (size_t i = 0; i < a.coordinates.size(); ++i) { dist += pow(a.coordinates[i] - b.coordinates[i], 2); } return sqrt(dist); } // 计算距离矩阵 float* computeDistanceMatrix(const vector<DataPoint>& points) { size_t n = points.size(); float* distMatrix; cudaMallocHost((void**)&distMatrix, n * n * sizeof(float)); for (size_t i = 0; i < n; ++i) { for (size_t j = i + 1; j < n; ++j) { double dist = euclideanDistance(points[i], points[j]); distMatrix[i * n + j] = dist; distMatrix[j * n + i] = dist; } } return distMatrix; } // 计算距离矩阵 float* computeDistanceMatrix_gpu(const vector<DataPoint>& points) { size_t n = points.size(); size_t m = points[0].coordinates.size(); float max[4] = { -9999.9,-9999.9 ,-9999.9 ,-9999.9 }; float min[4] = { 9999.9 ,9999.9 ,9999.9 ,9999.9 }; int max_min[4]; if (0) { for (size_t i = 0; i < n; ++i) { for (size_t j = 0; j < m; ++j) { h_data[i * m + j] = points[i].coordinates[j]; if (max[j] < points[i].coordinates[j]) { max[j] = points[i].coordinates[j]; } if (min[j] > points[i].coordinates[j]) { min[j] = points[i].coordinates[j]; } } } max_min[0] = int((max[0] - min[0]) / DC) + 1; max_min[1] = int((max[1] - min[1]) / DC) + 1; max_min[2] = int((max[2] - min[2]) / DC) + 1; max_min[3] = int((max[3] - min[3]) / DC) + 1; } launch_kernel_DPC_computeRho(h_data, h_NL, n, m, DC); //launch_kernel_DPC_computeRhoR(h_data, h_NL, n, m, DC); //launch_kernel_DPC_computeRhoM(h_data, h_NL, n, m, DC, max_min); return h_NN; } // 使用截断距离法计算局部密度 void computeRho(vector<DataPoint>& points, float* distMatrix, double dc) { size_t n = points.size(); for (size_t i = 0; i < n; ++i) { double rho = 0.0; for (size_t j = 0; j < n; ++j) { if (i != j && distMatrix[i * n + j] < dc) { rho += 1.0; } } points[i].rho = rho; } } // 使用截断距离法计算局部密度 void computeRho_gpu(vector<DataPoint>& points, float* distMatrix, double dc) { size_t n = points.size(); for (size_t i = 0; i < n; ++i) { points[i].rho = h_NL[i]; } } // 计算delta和最近更高密度点 void computeDeltaAndNearestHigher(vector<DataPoint>& points, float* distMatrix) { size_t n = points.size(); vector<size_t> indices(n); iota(indices.begin(), indices.end(), 0); // 按密度降序排序索引 sort(indices.begin(), indices.end(), [&points](size_t a, size_t b) { return points[a].rho > points[b].rho; }); // 找到每个点的最小距离和最近更高密度点 for (size_t i = 0; i < n; ++i) { size_t current_idx = indices[i]; double min_dist = numeric_limits<double>::max(); int nearest = -1; // 检查所有密度更高的点 for (size_t j = 0; j < i; ++j) { size_t higher_idx = indices[j]; double dist = distMatrix[current_idx * n + higher_idx]; if (dist < min_dist) { min_dist = dist; nearest = higher_idx; } } if (nearest == -1) { // 处理密度最高的点 double max_dist = 0.0; for (size_t k = 0; k < n; ++k) { if (k != current_idx && distMatrix[current_idx * n + k] > max_dist) { max_dist = distMatrix[current_idx * n + k]; } } points[current_idx].delta = max_dist; points[current_idx].nearest_higher = -1; } else { points[current_idx].delta = min_dist; points[current_idx].nearest_higher = nearest; } } } // 计算delta和最近更高密度点 void computeDeltaAndNearestHigher_gpu(vector<DataPoint>& points, float* distMatrix) { int m = points[0].coordinates.size(); //vector<size_t> indices(n); //iota(indices.begin(), indices.end(), 0); //// 按密度降序排序索引 //sort(indices.begin(), indices.end(), [&points](size_t a, size_t b) { // return points[a].rho > points[b].rho; // }); int n = points.size(); int* ord_indices; cudaMallocHost((void**)&ord_indices, n * sizeof(int)); for (int i = 0; i < n; i++) { ord_indices[i] = i; } float* ord_rho; cudaMallocHost((void**)&ord_rho, n * sizeof(float)); vector<float> ordrho; for (size_t i = 0; i < points.size(); ++i) { ord_rho[i] = points[i].rho; } launch_thrust_assignClusters(ord_rho, ord_indices, n); int* h_indices; float* h_delta; float* h_nearest_higher; cudaMallocHost((void**)&h_indices, n * sizeof(int)); cudaMallocHost((void**)&h_delta, n * sizeof(float)); cudaMallocHost((void**)&h_nearest_higher, n * sizeof(float)); for (int i = 0; i < n; i++) { h_indices[i] = ord_indices[i];; h_nearest_higher[i] = -1; h_delta[i] = 0.0; } launch_kernel_DPC_computeDeltaAndNearestHigherD(h_delta, h_nearest_higher, n, m, h_indices, h_data); //launch_kernel_DPC_computeDeltaAndNearestHigherR(h_delta, h_nearest_higher, n, m, h_indices, h_data); for (int i = 0; i < n; i++) { points[i].delta = h_delta[i]; points[i].nearest_higher = h_nearest_higher[i]; } cudaFreeHost(h_indices); cudaFreeHost(h_delta); cudaFreeHost(h_nearest_higher); } // 查找聚类中心(按rho*delta乘积排序) vector<int> findClusterCenters(const vector<DataPoint>& points, int n_clusters) { vector<pair<double, int>> products; for (size_t i = 0; i < points.size(); ++i) { products.emplace_back(points[i].rho * points[i].delta, i); } sort(products.rbegin(), products.rend()); vector<int> centers; for (int i = 0; i < n_clusters; ++i) { centers.push_back(products[i].second); } return centers; } // 查找聚类中心(按rho*delta乘积排序) vector<int> findClusterCenters_gpu(const vector<DataPoint>& points, int n_clusters) { int n = points.size(); float* rho_delta; int* ord_clusters; cudaMallocHost((void**)&rho_delta, n * sizeof(float)); cudaMallocHost((void**)&ord_clusters, n * sizeof(int)); for (size_t i = 0; i < points.size(); ++i) { rho_delta[i] = points[i].rho * points[i].delta; } launch_thrust_findClusterCenters(rho_delta, ord_clusters, n); vector<int> centers; for (int i = 0; i < n_clusters; ++i) { centers.push_back(ord_clusters[i]); } return centers; cudaFreeHost(rho_delta); cudaFreeHost(ord_clusters); } // 分配聚类标签 void assignClusters(vector<DataPoint>& points, const vector<int>& centers) { // 标记聚类中心 for (size_t i = 0; i < centers.size(); ++i) { points[centers[i]].cluster = i + 1; } // 按密度降序处理所有点 vector<size_t> indices(points.size()); iota(indices.begin(), indices.end(), 0); sort(indices.begin(), indices.end(), [&points](size_t a, size_t b) { return points[a].rho > points[b].rho; }); // 分配聚类标签 for (size_t idx : indices) { if (points[idx].cluster != -1) continue; if (points[idx].nearest_higher == -1) { points[idx].cluster = 0; // 异常点 } else { points[idx].cluster = points[points[idx].nearest_higher].cluster; } } } // 分配聚类标签 void assignClusters_gpu(vector<DataPoint>& points, const vector<int>& centers) { // 标记聚类中心 for (size_t i = 0; i < centers.size(); ++i) { points[centers[i]].cluster = i + 1; } int n = points.size(); int* ord_indices; cudaMallocHost((void**)&ord_indices, n * sizeof(int)); for (int i = 0; i < n; i++) { ord_indices[i] = i; } float* ord_rho; cudaMallocHost((void**)&ord_rho, n * sizeof(float)); for (size_t i = 0; i < points.size(); ++i) { ord_rho[i] = points[i].rho; } launch_thrust_assignClusters(ord_rho, ord_indices, n); vector<size_t> indices(points.size()); for (int i = 0; i < n; i++) { indices[i] = ord_indices[i]; } // 分配聚类标签 for (size_t idx : indices) { if (points[idx].cluster != -1) continue; if (points[idx].nearest_higher == -1) { points[idx].cluster = 0; // 异常点 } else { points[idx].cluster = points[points[idx].nearest_higher].cluster; } } } vector<DataPoint> GetData(const string& file_path) { vector<DataPoint> dataPoints; ifstream file(file_path); if (!file.is_open()) { throw runtime_error("无法打开文件: " + file_path); } string line; while (getline(file, line)) { // 跳过空行 if (line.find_first_not_of(" \t\n") == string::npos) continue; // 替换所有逗号为空格(兼容逗号分隔) replace(line.begin(), line.end(), ',', ' '); vector<double> row; DataPoint p; stringstream ss(line); double value; // 解析每行数据 while (ss >> value) { row.push_back(value); } if (!row.empty()) { row.pop_back(); p.coordinates = row; dataPoints.push_back(p); } } return dataPoints; } int main() { clock_t start, end; start = clock(); end = clock(); //cout << "Run time computeDistanceMatrix: " << (double)(end - start) << "(ms)" << endl; vector<DataPoint> points = GetData("C:/Users/admin/AppData/Roaming/feiq/Recv Files/dataset/data_500000_4.txt"); int n = points.size(); int m = points[0].coordinates.size(); cout << "数据量: " << n << endl; cout << "数据维度: " << m << endl; cudaMallocHost((void**)&h_NN, n * n * sizeof(float)); cudaMallocHost((void**)&h_NL, n * sizeof(int)); cudaMallocHost((void**)&h_data, m * n * sizeof(float)); cudaMallocHost((void**)&h_min_dist, n * sizeof(float)); cudaMallocHost((void**)&h_min_idx, n * sizeof(int)); // 参数设置 double dc = 0.1; // 截断距离 int n_clusters = 3; // 聚类数量 DC = dc; #if USE_GPU == 1 cout << "使用GPU"<< endl; #else cout << "没用GPU" << endl; #endif clock_t start_all, end_all; start_all = clock(); start = clock(); // 计算距离矩阵 #if USE_GPU == 1 auto distMatrix = computeDistanceMatrix_gpu(points); #else auto distMatrix = computeDistanceMatrix(points); #endif end = clock(); cout << "Run time computeDistanceMatrix: " << (double)(end - start) << "(ms)" << endl; start = clock(); // 计算局部密度 #if USE_GPU == 1 computeRho_gpu(points, distMatrix, dc); #else computeRho(points, distMatrix, dc); #endif end = clock(); cout << "Run time computeRho: " << (double)(end - start) << "(ms)" << endl; start = clock(); // 计算delta和最近更高密度点 #if USE_GPU == 1 computeDeltaAndNearestHigher_gpu(points, distMatrix); #else computeDeltaAndNearestHigher(points, distMatrix); #endif end = clock(); cout << "Run time computeDeltaAndNearestHigher: " << (double)(end - start) << "(ms)" << endl; start = clock(); // 查找聚类中心 #if USE_GPU == 1 auto centers = findClusterCenters_gpu(points, n_clusters); #else auto centers = findClusterCenters(points, n_clusters); #endif end = clock(); cout << "Run time findClusterCenters: " << (double)(end - start) << "(ms)" << endl; start = clock(); // 分配聚类标签 #if USE_GPU == 1 assignClusters_gpu(points, centers); #else assignClusters(points, centers); #endif end = clock(); cout << "Run time assignClusters: " << (double)(end - start)<< "(ms)" << endl; end_all = clock(); cout << "Run time all: " << (double)(end_all - start_all) << "(ms)" << endl; // 输出结果 cout << "聚类结果(显示前10个点):" << endl; for (size_t i = 0; i < 10; ++i) { cout << "点" << i << " ("; for (auto coord : points[i].coordinates) cout << coord << " "; cout << "): 聚类" << points[i].cluster << endl; } cudaFreeHost(h_NN); cudaFreeHost(h_data); return 0; }逐行分析代码
06-12
CUDA_CHECK(cudaMalloc((void**)&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float))); CUDA_CHECK(cudaMalloc((void**)&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float))); // Create stream cudaStream_t stream; CUDA_CHECK(cudaStreamCreate(&stream)); uint8_t* img_host = nullptr; uint8_t* img_device = nullptr; // prepare input data cache in pinned memory CUDA_CHECK(cudaMallocHost((void**)&img_host, MAX_IMAGE_INPUT_SIZE_THRESH * 3)); // prepare input data cache in device memory CUDA_CHECK(cudaMalloc((void**)&img_device, MAX_IMAGE_INPUT_SIZE_THRESH * 3)); int fcount = 0; std::vector<cv::Mat> imgs_buffer(BATCH_SIZE); for (int f = 0; f < (int)file_names.size(); f++) { fcount++; if (fcount < BATCH_SIZE && f + 1 != (int)file_names.size()) continue; //auto start = std::chrono::system_clock::now(); float *buffer_idx = (float*)buffers[inputIndex]; for (int b = 0; b < fcount; b++) { cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]); if (img.empty()) continue; imgs_buffer[b] = img; size_t size_image = img.cols * img.rows * 3; size_t size_image_dst = INPUT_H * INPUT_W * 3; //copy data to pinned memory memcpy(img_host, img.data, size_image); //copy data to device memory CUDA_CHECK(cudaMemcpyAsync(img_device, img_host, size_image, cudaMemcpyHostToDevice, stream)); preprocess_kernel_img(img_device, img.cols, img.rows, buffer_idx, INPUT_W, INPUT_H, stream); buffer_idx += size_image_dst; cudaStreamSynchronize(stream); } // Run inference auto start = std::chrono::system_clock::now(); doInference(*context, stream, (void**)buffers, prob, BATCH_SIZE); auto end = std::chrono::system_clock::now(); std::cout << "inference time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl; std::vector<std::vector<Yolo::Detection>> batch_res(fcount); for (int b = 0; b < fcount; b++) { auto& res = batch_res[b]; nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH); } for (int b = 0; b < fcount; b++) { auto& res = batch_res[b]; cv::Mat img = imgs_buffer[b]; for (size_t j = 0; j < res.size(); j++) { cv::Rect r = get_rect(img, res[j].bbox); cv::rectangle(img, r, cv::Scalar(0x27, 0xC1, 0x36), 2); cv::putText(img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2); } cv::imwrite("_" + file_names[f - fcount + 1 + b], img); } fcount = 0; } // Release stream and buffers cudaStreamDestroy(stream); CUDA_CHECK(cudaFree(img_device)); CUDA_CHECK(cudaFreeHost(img_host)); CUDA_CHECK(cudaFree(buffers[inputIndex])); CUDA_CHECK(cudaFree(buffers[outputIndex])); // Destroy the engine context->destroy(); engine->destroy(); runtime->destroy(); // Print histogram of the output distribution //std::cout << "\nOutput:\n\n"; //for (unsigned int i = 0; i < OUTPUT_SIZE; i++) //{ // std::cout << prob[i] << ", "; // if (i % 10 == 0) std::cout << std::endl; //} //std::cout << std::endl; return 0; } 帮我看一下哪部分是对图像进行处理的代码
03-08
int main(int argc, char** argv) { cudaSetDevice(DEVICE); std::string wts_name = ""; std::string engine_name = ""; bool is_p6 = false; float gd = 0.0f, gw = 0.0f; //std::string img_dir; if (!parse_args(argc, argv, wts_name, engine_name, is_p6, gd, gw)) { std::cerr << "arguments not right!" << std::endl; std::cerr << "./yolov5_det -s [.wts] [.engine] [n/s/m/l/x/n6/s6/m6/l6/x6 or c/c6 gd gw] // serialize model to plan file" << std::endl; std::cerr << "./yolov5_det -d [.engine] ../samples // deserialize plan file and run inference" << std::endl; return -1; } // create a model using the API directly and serialize it to a stream if (!wts_name.empty()) { IHostMemory* modelStream{ nullptr }; APIToModel(BATCH_SIZE, &modelStream, is_p6, gd, gw, wts_name); assert(modelStream != nullptr); std::ofstream p(engine_name, std::ios::binary); if (!p) { std::cerr << "could not open plan output file" << std::endl; return -1; } p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size()); modelStream->destroy(); return 0; } // deserialize the .engine and run inference std::ifstream file(engine_name, std::ios::binary); if (!file.good()) { std::cerr << "read " << engine_name << " error!" << std::endl; return -1; } char *trtModelStream = nullptr; size_t size = 0; file.seekg(0, file.end); size = file.tellg(); file.seekg(0, file.beg); trtModelStream = new char[size]; assert(trtModelStream); file.read(trtModelStream, size); file.close(); // std::vector<std::string> file_names; // if (read_files_in_dir(img_dir.c_str(), file_names) < 0) { // std::cerr << "read_files_in_dir failed." << std::endl; // return -1; // } static float prob[BATCH_SIZE * OUTPUT_SIZE]; IRuntime* runtime = createInferRuntime(gLogger); assert(runtime != nullptr); ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size); assert(engine != nullptr); IExecutionContext* context = engine->createExecutionContext(); assert(context != nullptr); delete[] trtModelStream; assert(engine->getNbBindings() == 2); float* buffers[2]; // In order to bind the buffers, we need to know the names of the input and output tensors. // Note that indices are guaranteed to be less than IEngine::getNbBindings() const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME); const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME); assert(inputIndex == 0); assert(outputIndex == 1); // Create GPU buffers on device CUDA_CHECK(cudaMalloc((void**)&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float))); CUDA_CHECK(cudaMalloc((void**)&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float))); // Create stream cudaStream_t stream; CUDA_CHECK(cudaStreamCreate(&stream)); uint8_t* img_host = nullptr; uint8_t* img_device = nullptr; // prepare input data cache in pinned memory CUDA_CHECK(cudaMallocHost((void**)&img_host, MAX_IMAGE_INPUT_SIZE_THRESH * 3)); // prepare input data cache in device memory CUDA_CHECK(cudaMalloc((void**)&img_device, MAX_IMAGE_INPUT_SIZE_THRESH * 3)); cv::VideoCapture capture("/media/builderx/UUI/tsy_work/rb002/REC/2024_01_17T14_46_37__stream0.mp4"); //cv::VideoCapture capture(0); cv::Mat frame; int fcount = 0;帮我分析一下每行代码都是什么意思
03-08
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

AI算法网奇

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值