c++ CUDA求解线性方程组Ax=b

输入是三个eigen矩阵, 其中 F = K x F = Kx F=Kx, 已知 F F F k k k, 待求解的变量是 x x x

bool linearSolverQR(Eigen::MatrixXd& m_Keig, Eigen::MatrixXd& m_xeig, Eigen::MatrixXd& m_feig)
{
    cusolverStatus_t status;
    // 获取矩阵数据的指针
    double* k_ptr = m_Keig.data();
    double* f_ptr = m_feig.data();


    double* cuda_k, * cuda_f;
    double* tau_device;
    double* cuda_Rx;
    double* workspace_device;
    int* devInfo;

    // 在CUDA中分配设备内存
    cudaMalloc((void**)&cuda_k, m_Keig.rows() * m_Keig.cols() * sizeof(double));
    cudaMalloc((void**)&cuda_f, m_feig.rows() * m_feig.cols() * sizeof(double));
    cudaMalloc((void**)&tau_device, m_Keig.cols() * sizeof(double));
    cudaMalloc((void**)&cuda_Rx, m_Keig.rows() * m_xeig.cols() * sizeof(double)); //

    // 将数据从主机内存复制到设备内存
    cudaMemcpy(cuda_k, k_ptr, m_Keig.rows() * m_Keig.cols() * sizeof(double), cudaMemcpyHostToDevice);
    cudaMemcpy(cuda_f, f_ptr, m_feig.rows() * m_feig.cols() * sizeof(double), cudaMemcpyHostToDevice);

    cudaMalloc((void**)&devInfo, sizeof(int));


    // 获取cusolverDnDgeqrf函数所需的工作空间大小
    int workspace_size;
    status = cusolverDnDgeqrf_bufferSize(cusolverHandle, m_Keig.rows(), m_Keig.cols(), cuda_k, m_Keig.rows(), &workspace_size);
    //checkCudaErrors(
    if (status != CUSOLVER_STATUS_SUCCESS) {
        std::cerr << "cusolverDnDgeqrf_bufferSize failed with error code: " << status << std::endl;
    }
    // 在设备上为 workspace_device 分配内存
    cudaMalloc(&workspace_device, workspace_size * sizeof(double));

    status = cusolverDnDgeqrf(cusolverHandle, m_Keig.rows(), m_Keig.cols(), cuda_k, m_Keig.rows(), tau_device, workspace_device, workspace_size, devInfo);

    if (status != CUSOLVER_STATUS_SUCCESS) {
        int h_info;
        cudaMemcpy(&h_info, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
        std::cerr << "cusolverDnDgeqrf failed with error code: " << status << std::endl;
    }

    // 计算R*x=Q^T*F, C是R*x
    status = cusolverDnDormqr(cusolverHandle, CUBLAS_SIDE_LEFT, CUBLAS_OP_T, m_feig.rows(),
        m_feig.cols(), m_Keig.cols(), cuda_k, m_Keig.rows(), tau_device,
        cuda_f, m_feig.rows(), workspace_device, workspace_size, devInfo);
    if (status != CUSOLVER_STATUS_SUCCESS) {
        std::cerr << "cusolverDnDormqr failed with error code: " << status << std::endl;
    }


    // 解上三角方程 R * x = Q^T * b
    double alpha = 1.0;
    cublasStatus_t status_cublas = cublasDtrsm(cublasH, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_UPPER,
        CUBLAS_OP_N, CUBLAS_DIAG_NON_UNIT, m_xeig.rows(), m_xeig.cols(),
        &alpha, cuda_k, m_Keig.rows(), cuda_f, m_Keig.rows());
    if (status_cublas != CUBLAS_STATUS_SUCCESS) {
        fprintf(stderr, "cublasDtrsm failed with error code: %d\n", status_cublas);
    }


    Eigen::MatrixXd tmp(m_feig.rows(), m_feig.cols());
    cudaMemcpy(tmp.data(), cuda_f, sizeof(double) * m_feig.rows() * m_feig.cols(), cudaMemcpyDeviceToHost);

    m_xeig = tmp.block(0, 0, m_xeig.rows(), m_xeig.cols());
    //cout << "m_xeig" << endl;
    //cout << m_xeig << endl;


    // 释放设备内存
    cudaFree(cuda_k);
    cudaFree(cuda_f);
    cudaFree(cuda_Rx);
    cudaFree(tau_device);
    cudaFree(workspace_device);
    cudaFree(devInfo);
    return true;
}
  • 7
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值