// hello_lu_without_pivot_cpu_simple.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#include <iostream>
#include <stdio.h>
#include <random>
#define FLOAT_MIN 0
#define FLOAT_MAX 10
int cpu_blasSgetrf_(float* A, int N, int lda) {
int i, j, k, imax;
float maxA, absA;
for (i = 0; i < N; i++) {//LL:: 从A[0][0] ~ A[N-1][N-1]
//第i列中,从第i+1 ~ N-1 个全部元素都要除以 A[i][i],作为L[][]
for (j = i + 1; j < N; j++) { //第j行
A[j + i * lda] /= A[i + i * lda]; //更新A(i+1,i)~ A(N-1, i)的元素, 即 L(j,i), j=i+1,...,N-1
for (k = i + 1; k < N; k++) {//第j行,第k列元素 - 系数*第i行,第k列元素
A[j + k * lda] -= A[j + i * lda] * A[i + k * lda];
}
}
}
return 0; //decomposition done
}
void LU_L_U(float* LU, float* L, float* U, int n) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (i == j) {
L[i + j * n] = 1.0f;
}
else if (i > j) {
L[i + j * n] = LU[i + j * n];
}
else {
L[i + j * n] = 0.0;
}
if (i <= j) {
U[i + j * n] = LU[i + j * n];
}
else {
U[i + j * n] = 0.0f;
}
}
}
}
void sgemm_col_major(float* L, float* U, float* L_x_U, int n) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
L_x_U[i + j * n] = 0.0f;
for (int k = 0; k < n; k++) {
L_x_U[i + j * n] += L[i + k * n] * U[k + j * n];
}
}
}
}
int main() {
int n = 3;
float* A = nullptr;
float* L = nullptr;
float* U = nullptr;
float* LxU = nullptr;
A = (float*)malloc(n * n * sizeof(float));
L = (float*)malloc(n * n * sizeof(float));
U = (float*)malloc(n * n * sizeof(float));
LxU = (float*)malloc(n * n * sizeof(float));
std::random_device rd;
std::default_random_engine eng(rd());
eng.seed(2023);
std::uniform_real_distribution<float> distr(FLOAT_MIN, FLOAT_MAX);
printf("A =\n\n");
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
A[i + j * n] = distr(eng);
printf(" %4.2f", A[i + j * n]);
}
printf("\n\n");
}
cpu_blasSgetrf_(A, n, n);
LU_L_U(A, L, U, n);
sgemm_col_major(L, U, LxU, n);
printf("\n");
printf("L*U =\n\n");
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
printf(" %4.2f", LxU[i + j * n]);
}
printf("\n\n");
}
return 0;
}
CPU 矩阵的 LU分解 without pivot
于 2022-09-13 02:02:20 首次发布