共轭梯度法求解对称正定矩阵线性方程组及推广到一般方程

闪电彬彬

已于 2022-05-16 20:28:33 修改

阅读量1.4k

点赞数

分类专栏：数学文章标签：线性代数算法共轭梯度法线性方程组

于 2022-05-16 20:15:49 首次发布

本文链接：https://blog.csdn.net/chenbb1989/article/details/124796482

版权

数学专栏收录该内容

37 篇文章 0 订阅

订阅专栏

欢迎关注更多精彩
关注我，学习常用算法与数据结构，一题多解，降维打击。

共轭梯度法求解对称正定矩阵线性方程组

原理

共轭梯度法是一种搜索算法。他与传统的高斯消元不同。
共轭梯度法原本是用来求解型下的多元二次型极小值的。

$f(x)=\frac 1 2 x^THx-b^Tx$

$H 为对称正定矩阵， b 为行向量， x 为行向量变量$

$由于二次型的极值可以求导 = 0 解得解析解， H x = b$

$故f(x)极小值对应的x^*也是Hx=b的解$

$x=H^{-1}b, 传统做法是求逆，求逆是比较复杂的操作，\\对于稀疏矩阵更是浪费空间和时间$

$共轭梯度法的思路就是通过搜索的方法直接求解 f (x) 的极值从而等价求得方程的解$

步骤

代码实现


#include <iostream>
#include <stdio.h>
#include <vector>
#include <math.h>
#include <algorithm>

using namespace std;

const double esp = 1e-7;

class Matrix {
private:
    vector<vector<double>> data;
    int row, col;

public:
    Matrix() {}

    Matrix(pair<int, int> size) {
        row = size.first;
        col = size.second;
        data.assign(row, vector<double>(col));
    }

    Matrix(pair<int, int> size, double v) : Matrix(size) {
        for(int i=0;i<row;++i)for(int j=0;j<col;++j)data[i][j]=v;
    }

    Matrix(pair<int, int> size, vector<double> v) : Matrix(size) {
        set(v);
    }

    void set(pair<int, int> p, double v) {
        data[p.first][p.second] = v;
    }

    void set(vector<double> &v) {
        assert(v.size() == row * col);
        for (int i = 0, s = 0; i < row; ++i) {
            for (int j = 0; j < col; ++j) {
                data[i][j] = v[s++];
            }
        }
    }

    double get(pair<int, int> p) const {
        return data[p.first][p.second];
    }

    pair<int, int> getSize() const {
        return pair<int, int>({row, col});
    }

    Matrix t() {
        Matrix a({col, row});
        for (int i = 0; i < col; ++i) {
            for (int j = 0; j < row; ++j) {
                a.set({i, j}, data[j][i]);
            }
        }

        return a;
    }

    Matrix operator*(const Matrix &b) {
        auto bsize = b.getSize();
        Matrix a({row, bsize.second});

        for (int i = 0; i < row; ++i) {
            for (int j = 0; j < bsize.second; ++j) {
                a.set({i, j}, 0);
                for (int k = 0; k < col; ++k) {
                    a.set({i, j}, a.get({i, j}) + data[i][k] * b.get({k, j}));
                }
            }
        }

        return a;
    }

    Matrix operator-(const Matrix &b) {
        Matrix a({row, col});
        // cout<<"r:"<<row<<"c:"<<col<<endl;
        for (int i = 0; i < row; ++i) {
            for (int j = 0; j < col; ++j) {
                // cout<<"i "<<i<<" j "<<j<<endl;
                a.set({i, j}, data[i][j] - b.get({i, j}));
            }
        }

        return a;
    }

    Matrix operator+(const Matrix &b) {
        Matrix a({row, col});

        for (int i = 0; i < row; ++i) {
            for (int j = 0; j < col; ++j) {
                a.set({i, j}, data[i][j] + b.get({i, j}));
            }
        }

        return a;
    }

    Matrix operator*(double b) {
        Matrix a = *this;

        for (int i = 0; i < row; ++i) {
            for (int j = 0; j < col; ++j) {
                a.set({i, j}, b * data[i][j]);
            }
        }

        return a;
    }

    double operator/(const Matrix &b) {
        return data[0][0] / b.get({0, 0});
    }

    bool isSymmetric() {
        if (row != col)return false;
        for (int i = 0; i < row; ++i) {
            for (int j = 0; j < col; ++j) {
                if (data[i][j] != data[j][i])return false;
            }
        }

        return true;
    }

    double norm() {
        double ans = 0;
        for (int i = 0; i < row; ++i) {
            ans += data[i][0] * data[i][0];
        }

        return sqrt(ans);
    }

    void out() {
        for (int i = 0; i < row; ++i) {
            for (int j = 0; j < col; ++j) {
                printf("%lf ", data[i][j]);
            }

            puts("");
        }
    }
};

Matrix ConjGradQuad(Matrix &b, Matrix &H, Matrix &start) {
    int nc = H.getSize().first;
    Matrix x0 = start;
    if (!H.isSymmetric())return x0;

    if (start.getSize() != b.getSize())return x0;

    Matrix r0 = b - H * x0;
    Matrix d0 = r0;
    // d0.out();
    
    for (int i = 0; i < nc; ++i) {
        auto d0t = d0.t();
        double alpha = d0t * r0 / (d0t * H * d0);
        auto x = x0 + d0 * alpha;
        auto r = r0 - H * alpha * d0;
        if (r.norm() < esp) return x;

        auto beta = r.t() * H * d0 / (d0t * H * d0);
        auto d = r - d0 * beta;

        x0 = x;
        r0 = r;
        d0 = d;
    }

    return x0;
}

void test1() {
    puts("test1");
    Matrix b({2, 1}, {3, 4});
    Matrix H({2, 2}, {1, 2, 2, 2});
    Matrix start({2, 1}, {0, 0});

    auto res = ConjGradQuad(b, H, start);
    puts("H");
    H.out();
    puts("b");
    b.out();
    puts("solution");
    res.out();
}


void test2() {
    puts("test2");
    Matrix b({4, 1}, {1, 2, 16, 8});
    Matrix H({4, 4},
             {
                     9, 18, 9, -27,
                     18, 45, 0, -45,
                     9, 0, 126, 9,
                     -27, -45, 9, 135
             });
    Matrix start({4, 1}, {0, 0,0,0});

    auto res = ConjGradQuad(b, H, start);
    puts("H");
    H.out();
    puts("b");
    b.out();
    puts("solution");
    res.out();
}


void test3() {
    puts("test3");
    int n=200;
    Matrix b({n, 1}, 0);
    b.set({0,0}, -1);
    b.set({n-1,0}, -1);

    Matrix H({n, n},0);
    for(int i=0;i<n;++i) {
        H.set({i,i},-2);
        if(i-1>=0)H.set({i,i-1},1);
        if(i+1<n)H.set({i,i+1},1);
    }

    Matrix start({n, 1}, 0);

    auto res = ConjGradQuad(b, H, start);
    puts("H");
    H.out();
    puts("b");
    b.out();
    puts("solution");
    res.out();
}


int main() {
    test3();
    return 0;
}
/*

1 2 | 5
2 1 | 3


1 2 | 3
2 2 | 4
 */

应用到一般方程

上述分析中有一个很重要的条件就是H必须是对称正定矩阵。
但是一般情况大多都不是对称的。怎么办。

转化成对称

利用公式
$A^TA是对称矩阵$
证明：
设
$A=\begin {bmatrix} a_{11}&a_{12}&...&a_{1n} \\a_{21}&a_{22}&...&a_{2n} \\ ...&...&...&...\\a_{n1}&a_{n2}&...&a_{nn} & \end {bmatrix}$

$则A^T = \begin {bmatrix} a_{11}&a_{21}&...&a_{n1} \\a_{12}&a_{22}&...&a_{n2} \\ ...&...&...&...\\a_{1n}&a_{2n}&...&a_{nn} & \end {bmatrix}$

$B=A^TA$
$(a_{1i}, a_{2i}, ...,a_{ni}) \cdot (a_{1j}, a_{2j}, ...,a_{nj})$
$a_{1i}*a_{1j}+a_{2i}*a_{2j}+ ... +a_{ni}*a_{nj}$
$(a_{1j}, a_{2j}, ...,a_{nj}) \cdot (a_{1i}, a_{2i}, ...,a_{ni})$
$a_{1j}*a_{1i}+a_{2j}*a_{2i}+ ... +a_{nj}*a_{ni}$

$\therefore B[i][j]=B[j][i], A^TA 是一个对称矩阵$

基于上述分述，可以先对方程2边都剩以A的转置。

$Hx=b ==> H^THx=H^Tb$

$H^TH 作为矩阵来求解$

改进算法


Matrix ConjGradQuadCommonH(Matrix &b, Matrix &H, Matrix &start) {
    Matrix newH = H.t()*H;
    Matrix newB = H.t()*b;
    puts("new h");
    newH.out();
    puts("new b");
    newB.out();

    return ConjGradQuad(newB, newH, start);
}

void testcommon1() {
    puts("testcommon1");

    Matrix b({2, 1}, {3,7});
    Matrix H({2,2},
             {
                     1,2,
                     3,4,
             });
    Matrix start({2, 1}, {0, 0});

    auto res = ConjGradQuadCommonH(b, H, start);
    puts("H");
    H.out();
    puts("b");
    b.out();
    puts("solution");
    res.out();
}



void testcommon2() {
    puts("testcommon2");

    Matrix b({2, 1}, {100,23});
    Matrix H({2,2},
             {
                     10,33,
                     7,92,
             });
    Matrix start({2, 1}, {0, 0});

    auto res = ConjGradQuadCommonH(b, H, start);
    puts("H");
    H.out();
    puts("b");
    b.out();
    puts("solution");
    res.out();
}

int main() {
    testcommon2();
    return 0;
}
/*

1 2 | 3
3 4 | 7
1.000000
1.000000

10 33 | 100
 7 92 | 23
 
 12.251089 
-0.682148 
 */