基于LU分解的矩阵求逆定点化版本,由于需要频繁移位,因此定点比浮点还耗时
浮点版本参考上一篇博客:http://blog.csdn.net/xidianzhimeng/article/details/51284206
话不过多,直接上代码,有不明白的地方留言
typedef char S8;
typedef short S16;
typedef int S32;
typedef unsigned char U8;
typedef unsigned short U16;
typedef unsigned int U32;
typedef int CPLX16;
typedef float F32;
typedef long long S64;
typedef unsigned long long U64;
typedef long long S40;
typedef unsigned long U40;
typedef int BOOL;
#define INVERT_EXPAND1 10
#define INVERT_EXPAND1_N (1<<INVERT_EXPAND1)
#define INVERT_EXPAND2 20
#define INVERT_EXPAND2_N (1<<INVERT_EXPAND2)
#define INVERT_EXPAND3 30
#define INVERT_EXPAND3_N (1<<INVERT_EXPAND3)
#define INVERT_EXPAND4 40
#define MAX_SD_ROW 200
#define MAX_SD_COL 200
S64 matrixT[MAX_SD_ROW][MAX_SD_COL];
S64 matrixB[MAX_SD_ROW][MAX_SD_COL];
// A(symmetry) : diagonal element is 18+8=26bit , other element is 18bit
BOOL MatrixInvLUFixed(S32 A[MAX_SD_ROW][MAX_SD_COL], const U32 n)
{
S32 ii;
U32 i, j, k;
F32 d;
S64 dd, s, alpha;
for (i = 0; i < n; i++)
{
// 由于已知输入为对称矩阵并且对角线上元素最大,所以省去选择主元
if (_abs(matrixT[i][i] >> 32) == 0 && _abs(matrixT[i][i] & 0xffffffff) == 0)
return FALSE;
d = _rcpsp((F32)matrixT[i][i]);
dd = (S64)(d * INVERT_EXPAND3_N);
for (j = i + 1; j < n; j++)
{
alpha = matrixT[j][i] * dd;
for (k = i + 1; k < n; k++)
{
matrixT[j][k] = matrixT[j][k] - (alpha*matrixT[i][k] >> INVERT_EXPAND3);
matrixB[j][k] = matrixB[j][k] - (alpha*matrixB[i][k] >> INVERT_EXPAND3);
}
for (k = 0; k <= i; k++)
matrixB[j][k] = matrixB[j][k] - (alpha*matrixB[i][k] >> INVERT_EXPAND3);
}
matrixT[i][i] = dd;
}
// A occupy 10bit
for (ii = n - 1; ii >= 0; ii--)
for (j = 0; j < n; j++)
{
s = matrixB[ii][j] << INVERT_EXPAND3;
for (k = ii + 1; k < n; k++)
s -= (matrixT[ii][k] * matrixB[k][j]);
matrixB[ii][j] = (s >> INVERT_EXPAND3) * matrixT[ii][ii];
A[ii][j] = (S32)(matrixB[ii][j] >> INVERT_EXPAND2);
}
return TRUE;
}