输入是三个eigen矩阵, 其中 F = K x F = Kx F=Kx, 已知 F F F和 k k k, 待求解的变量是 x x x
bool linearSolverQR(Eigen::MatrixXd& m_Keig, Eigen::MatrixXd& m_xeig, Eigen::MatrixXd& m_feig)
{
cusolverStatus_t status;
// 获取矩阵数据的指针
double* k_ptr = m_Keig.data();
double* f_ptr = m_feig.data();
double* cuda_k, * cuda_f;
double* tau_device;
double* cuda_Rx;
double* workspace_device;
int* devInfo;
// 在CUDA中分配设备内存
cudaMalloc((void**)&cuda_k, m_Keig.rows() * m_Keig.cols() * sizeof(double));
cudaMalloc((void**)&cuda_f, m_feig.rows() * m_feig.cols() * sizeof(double));
cudaMalloc((void**)&tau_device, m_Keig.cols() * sizeof(double));
cudaMalloc((void**)&cuda_Rx, m_Keig.rows() * m_xeig.cols() * sizeof(double)); //
// 将数据从主机内存复制到设备内存
cudaMemcpy(cuda_k, k_ptr, m_Keig.rows() * m_Keig.cols() * sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(cuda_f, f_ptr, m_feig.rows() * m_feig.cols() * sizeof(double), cudaMemcpyHostToDevice);
cudaMalloc((void**)&devInfo, sizeof(int));
// 获取cusolverDnDgeqrf函数所需的工作空间大小
int workspace_size;
status = cusolverDnDgeqrf_bufferSize(cusolverHandle, m_Keig.rows(), m_Keig.cols(), cuda_k, m_Keig.rows(), &workspace_size);
//checkCudaErrors(
if (status != CUSOLVER_STATUS_SUCCESS) {
std::cerr << "cusolverDnDgeqrf_bufferSize failed with error code: " << status << std::endl;
}
// 在设备上为 workspace_device 分配内存
cudaMalloc(&workspace_device, workspace_size * sizeof(double));
status = cusolverDnDgeqrf(cusolverHandle, m_Keig.rows(), m_Keig.cols(), cuda_k, m_Keig.rows(), tau_device, workspace_device, workspace_size, devInfo);
if (status != CUSOLVER_STATUS_SUCCESS) {
int h_info;
cudaMemcpy(&h_info, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
std::cerr << "cusolverDnDgeqrf failed with error code: " << status << std::endl;
}
// 计算R*x=Q^T*F, C是R*x
status = cusolverDnDormqr(cusolverHandle, CUBLAS_SIDE_LEFT, CUBLAS_OP_T, m_feig.rows(),
m_feig.cols(), m_Keig.cols(), cuda_k, m_Keig.rows(), tau_device,
cuda_f, m_feig.rows(), workspace_device, workspace_size, devInfo);
if (status != CUSOLVER_STATUS_SUCCESS) {
std::cerr << "cusolverDnDormqr failed with error code: " << status << std::endl;
}
// 解上三角方程 R * x = Q^T * b
double alpha = 1.0;
cublasStatus_t status_cublas = cublasDtrsm(cublasH, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_UPPER,
CUBLAS_OP_N, CUBLAS_DIAG_NON_UNIT, m_xeig.rows(), m_xeig.cols(),
&alpha, cuda_k, m_Keig.rows(), cuda_f, m_Keig.rows());
if (status_cublas != CUBLAS_STATUS_SUCCESS) {
fprintf(stderr, "cublasDtrsm failed with error code: %d\n", status_cublas);
}
Eigen::MatrixXd tmp(m_feig.rows(), m_feig.cols());
cudaMemcpy(tmp.data(), cuda_f, sizeof(double) * m_feig.rows() * m_feig.cols(), cudaMemcpyDeviceToHost);
m_xeig = tmp.block(0, 0, m_xeig.rows(), m_xeig.cols());
//cout << "m_xeig" << endl;
//cout << m_xeig << endl;
// 释放设备内存
cudaFree(cuda_k);
cudaFree(cuda_f);
cudaFree(cuda_Rx);
cudaFree(tau_device);
cudaFree(workspace_device);
cudaFree(devInfo);
return true;
}