g++ lu_decompose.cpp
opencl@opencl-host:~/ex/LUDecomposition_pivot$ cat lu_pivot.cpp
#include <stdio.h>
#include <math.h>
#define NA 3
int LUPDecompose(float *A, int N, int lda, float Tol, int *P);
void printFloatMatrix(float* A, int N, int lda);
void printIntVector(int* pivot_h, int n);
int main(){
float A[NA*NA]=// column major
{ 3, 2, 6, 17, 4,18, 10, -2,-12 };
int pivot[NA];
int lda = NA;
LUPDecompose(A, NA, lda, 0.00001, pivot);
printf("A_LU=\n");
printFloatMatrix(A, NA, lda);
printf("\n");
printIntVector(pivot, NA);
printf("\n");
return 0;
}
/* INPUT: A - array of pointers to elements of a square matrix having dimension N, column major,
* Tol - small tolerance number to detect failure when the matrix is near degenerate
* OUTPUT: Matrix A is changed, it contains a copy of both matrices L-E and U as A=(L-E)+U such that P*A=L*U.
* The permutation matrix is not stored as a matrix, but in an integer vector P of size N+1
* containing column indexes where the permutation matrix has "1". The last element P[N]=S+N,
* where S is the number of row exchanges needed for determinant computation, det(P)=(-1)^S
*/
int LUPDecompose(float *A, int N, int lda, float Tol, int *P) {
int i, j, k, imax;
float maxA, *ptr, absA;
for (i = 0; i <= N; i++)
P[i] = i; //Unit permutation matrix, P[N] initialized with N
for (int dia = 0; dia < N; dia++) {
maxA = 0.0;
imax = dia;
for (k = dia; k < N; k++)
if ((absA = fabs(A[k + dia*lda])) > maxA) { // find out the max element in dia-th col
maxA = absA;
imax = k;
}
if (maxA < Tol) return 0; //failure, matrix is degenerate
if (imax != dia) {
//pivoting P
//j = P[dia];P[dia] = P[imax];P[imax] = j;//another style
P[dia] = imax + 1;
//pivoting rows of A
for(int col=0; col<N; col++){
float tmp;
tmp = A[dia + col*lda];
A[dia + col*lda] = A[imax + col*lda];
A[imax + col*lda] = tmp;
}
//counting pivots starting from N (for determinant)
P[N]++;
}
for (j = dia + 1; j < N; j++) {
A[j + dia*lda] /= A[dia + dia*lda];
for (k = dia + 1; k < N; k++)
A[j + k*lda] -= A[j + dia*lda] * A[dia + k*lda];
}
}
return 1; //decomposition done
}
void printFloatMatrix(float* A, int N, int lda){
for(int i=0; i<N; i++){
for(int j=0; j<N; j++){
printf(" %7.3f", A[i + j*lda]);
}
printf("\n");
}
printf("\n\n");
}
void printIntVector(int* pivot_h, int n){
for(int idx=0; idx<n; idx++){
printf("%d ",pivot_h[idx]);
}
printf("\n\n");
}