矩阵论补坑—— SVD分解

小刀来啦

已于 2022-04-29 21:13:12 修改

阅读量519

点赞数 1

分类专栏： python 矩阵论代码实践算法小课堂

于 2022-04-29 17:50:40 首次发布

本文链接：https://blog.csdn.net/weixin_40519529/article/details/124500976

版权

矩阵线性代数 python

算法小课堂同时被 3 个专栏收录

11 篇文章 0 订阅

订阅专栏

python

7 篇文章 0 订阅

订阅专栏

矩阵论代码实践

7 篇文章 2 订阅

订阅专栏

矩阵论补坑—— SVD分解

前言
代码实现
test example
写在最后

前言

下面的code是根据矩阵论教材中关于矩阵奇异值分解部分而写的初步代码，原理讲解的blog已经烂大街了，真正能实现的不多。算是填上去年挖的坑，有些函数可以在矩阵论专题中的其他文章中找到，初步实现了求特征值（包括实数和复数特征根）、特征向量、SVD分解等函数，适用阶数不大于10阶。本身没有优化，超过10阶太费时了，不至于，当个demo去看即可。基本原理是QR分解迭代求解特征值，再求每个特征值对应的特征向量，然后再构建矩阵 $U$ 和 $V^H$ ，比较成熟的算法是双位移的QR迭代方法，个人技拙写了个折中版本，大家笑纳@-@。

代码实现

@staticmethod
def solve_square_root(a, b, c):
	# 求二次函数根
    temp = b**2-4*a*c
    return [(-b+np.sqrt(temp+0j))/(2*a), (-b-np.sqrt(temp+0j))/(2*a)]

@classmethod
def solve_complex_root_QR_eig(self, target):
    return self.solve_square_root(1, -target[0, 0]-target[1, 1], target[0, 0]*target[1, 1]-target[0, 1]*target[1, 0])

@classmethod
def eig_QR(self, target, mode='householder', epoch=20, eps=1e-4, rnd=3, test=False):
    """eig_QR use QR_Fact to get all eig_values of target M
    *reference: https://blog.csdn.net/xfijun/article/details/109464005
    *通过不断迭代至对角线元素基本保持不变，或者只剩下2*2方块时停止，但是很耗时，暂时没有实现双位移的方法！！
    *此实现方法对于利用上Hessenberg矩阵化简的优化过程没有明显体现
    *size > 10*10 还是考虑用np的内置方法

    Args:
        target ([float or complex]]): [matrix to get eig_values]
        mode (str, optional): [transform methods]. Defaults to 'householder'.
        epoch (int, optional): [thershold to stop loops]. Defaults to 20.
        eps ([float], optional): [thershold to stop loops]. Defaults to 1e-4.
        rnd([int], optional): [The number of decimal places reserved]. Defaults to 3.
        test (bool, optional): [show checking information]. Defaults to False.
	
	Return:
		eig_list (list): eig_values of input matrix
	
	Author: Junno
	Date: 2022-04-29
    """

    assert len(target.shape) == 2
    M, N = target.shape
    assert M == N
    cnt = 0
    # one loop for QR_Fact_Eig

    def QR_epoch(A):
        nonlocal cnt, epoch, mode, eps
        Ak = deepcopy(A)
        flag = 1
        while flag:
            Ak0 = Ak.copy()
            Q, R = self.QR_Fact(Ak, mode)
            Ak = R@Q
            if np.linalg.norm(np.diag(np.abs(Ak-Ak0))) <= eps:
                flag = 0
            cnt += 1
            if cnt > epoch:
                flag = 0
        return Ak

    Ak = QR_epoch(target)
    cnt += epoch

    # check result to reloop or stop
    iter_flag = True
    while iter_flag:
        check_complex_eig = False
        eig_list = []

        for i in range(M):
            if check_complex_eig:
                check_complex_eig = False
                continue

            # check 2*2 block to get roots or continue QR_epoch loops
            if (i+1) < M and abs(Ak[i+1, i]) >= eps:
                if i+2 < M and (abs(np.sum(Ak[i+2:, i])) >= eps or abs(np.sum(Ak[i+2:, i+1])) >= eps):
                    Ak = QR_epoch(Ak)
                    break
                else:
                    check_complex_eig = True
                    if test:
                        print(Ak[i:i+2, i:i+2])
                    c_eig = self.solve_complex_root_QR_eig(
                        Ak[i:i+2, i:i+2])
                    eig_list += c_eig

            else:
                eig_list.append(Ak[i, i])

        # stop when get to the last row or last two rows is a 2*2 block
        if i == M-1 or (i == M-2 and check_complex_eig == True):
            iter_flag = False

    if test:
        print("Iteration times: ", cnt)
        print(Ak)

    # sort
    eig_list = sorted(
        eig_list, key=lambda x: np.linalg.norm(x), reverse=True)
    return np.round(eig_list, rnd)


@staticmethod
def solve_eig_vec(target, eig, dtype=np.complex64, eps=1e-3, test=False):
    """solve_eig_vec: solve according eig_vector given an eig

    Args:
        target ([float or complex]]): [matrix to get eig_vec]
        eig ([float or complex]): [one eig of target]
        dtype ([type], optional): [description]. Defaults to np.complex64.
        eps ([float]): numerical threshold.
        test (bool, optional): [show checking information]. Defaults to False.

    Returns:
        [np.array]: [eig_vector]
    
	Author: Junno
	Date: 2022-04-29
    """
    M, N = target.shape
    A = target-np.eye(M, dtype=dtype)*eig
    if test:
        print(A)
    for i in range(M):
        if test:
            print('During process in row:{}, col:{}'.format(i, i))
        if sum(abs(A[i:, i])) > eps:
            zero_index = []
            non_zero_index = []
            for k in range(i, M):
                if abs(A[k, i]):
                    non_zero_index.append(k)
                else:
                    zero_index.append(k)

            non_zero_index = sorted(
                non_zero_index, key=lambda x: abs(A[x, i]))
            sort_cols = non_zero_index+zero_index
            if test:
                print("Sort_cols index: {}".format(sort_cols))

            A[i:, :] = A[sort_cols, :]

            if test:
                print('After resorting')
                print(A)

            if np.sum(abs(A[i+1:, :])):
                prefix = -A[i+1:, i]/A[i, i]
                temp = (np.array(prefix).reshape(-1, 1)
                        )@A[i, :].reshape((1, -1))
                A[i+1:, :] += temp

            # eliminate elements with the same col index above
            # if i != 0:
            #     for j in range(i):
            #         A[j, :] += -(A[j, i]/A[i, i])*A[i, :]

            if test:
                print('After primary row transformation:')
                print(A)

        else:
            continue

    # 如果秩不是n-1，即有k重根，需要另外计算，否则所有的特征值给出的特征向量均是一样的，

    # the equation must be non-full-rank, the last row will be left
    for m in range(M-1):
        for n in range(m, N):
            if abs(A[m, n]) > eps:
                A[m, :] *= 1./A[m, n]
                break

    if test:
        print(A)

    x = np.zeros((1, M), dtype=dtype)
    for i in range(M-1, -1, -1):
        if i < M-1:
            if np.abs(A[i, i]) < eps:
                x[0, i] = 0
            else:
                x[0, i] = -np.sum(A[i, i+1:]*x[0, i+1:])/A[i, i]
        else:
            x[0, i] = 1
        if test:
            print(i, x)

    return x

@classmethod
def eig_vv(self, target, eps=1e-3, test=False):
    """eig_vv 求解矩阵的所有特征值与特征向量

    Args:
        target ([float or complex]): [target matrix]
        eps ([float]): numerical threshold.

    Returns:
        [list and array]: [list of eigs ans array of eig_vectors]

	Author: Junno
	Date: 2022-04-29
    """
    assert target.shape[0] <= 10, 'too large matrix to solve using this imperfect method, it will cost much of time, please use numpy methods instead'
    eigs = self.eig_QR(target, test=test)
    num_eigs = len(eigs)
    eig_vecs = None
    for i in range(num_eigs):
        if test:
            print('solve eigvv')
            print(eigs[i], self.solve_eig_vec(target, eigs[i], eps=eps))
        if i == 0:
            eig_vecs = self.solve_eig_vec(target, eigs[i], eps=eps)
        else:
            eig_vecs = np.concatenate(
                (eig_vecs, self.solve_eig_vec(target, eigs[i], eps=eps)), axis=0)

    return eigs, eig_vecs



@classmethod
def svd(self, target, dtype=np.complex64, eps=1e-4, test=False):
    """svd

    Args:
        target ([float or complex]]): [matrix to get eig_vec]
        dtype ([type], optional): [description]. Defaults to np.complex64.
        eps ([float]): numerical threshold.
        test (bool, optional): [show checking information]. Defaults to False.

    Returns:
        [u,v,d]: [svd components]

	Author: Junno
	Date: 2022-04-29
    """
    import cmath
    assert len(target.shape) == 2
    M, N = target.shape
    AHA = np.conj(target).T@target
    if test:
        print('AHA:')
        print(AHA)
    eigs, eig_vecs = self.eig_vv(AHA, eps=eps, test=test)
    if test:
        print(eigs, '\n', eig_vecs)
    u_v = None
    temp = False
    for j in range(eig_vecs.shape[0]):
        eig_vecs[j, :] /= np.linalg.norm(eig_vecs[j, :])

        # u_i=(1/sigma_i)*(A@eigv_i) if sigma_i>0  sigma_i=1/sqrt(eig_i)
        if np.linalg.norm(eigs[j]) > eps:
            sigma_j = cmath.sqrt(eigs[j])
            if not temp:
                u_v = 1/sigma_j*(target@eig_vecs[j, :]).reshape((-1, 1))
                temp = True
            else:
                u_v = np.concatenate(
                    (u_v, 1/sigma_j*(target@eig_vecs[j, :]).reshape((-1, 1))), axis=1)
    if test:
        print(eigs, '\n', eig_vecs)
        print('u_v')
        print(u_v)

    # expand orthogonal basis
    if u_v.shape[1] < M:
        ortho_index = []
        for i in range(u_v.shape[0]):
            if sum(abs(u_v[i, :])) < eps:
                ortho_index.append(i)
        for i in range(M-u_v.shape[1]):
            if len(ortho_index) == 0:
                u_v = np.concatenate(
                    (u_v, np.array([0 for _ in range(u_v.shape[0])]).reshape((-1, 1))), axis=1)
            else:
                u_v = np.concatenate((u_v, np.array([0 if k != ortho_index[i] else 1 for k in range(
                    u_v.shape[0])]).reshape((-1, 1))), axis=1)
        if test:
            print('After expand orthogonal basis')
            print(u_v)

    Sigma = np.zeros((M, N), dtype=dtype)
    for i in range(M):
        if eigs[i] > eps:
            Sigma[i, i] = cmath.sqrt(eigs[i])
        else:
            Sigma[i, i] = eigs[i]

    if test:
        print(Sigma)

    return u_v, np.diag(Sigma), eig_vecs

test example

N=4
A=np.random.randint(0,10,(N,N)).astype(np.float32)
A
>>>
array([[3., 1., 3., 2.],
       [0., 1., 2., 4.],
       [7., 6., 7., 2.],
       [0., 4., 6., 4.]], dtype=float32)

# np method
npu,npsig,npv=np.linalg.svd(A)
# my method
mu,msig,mv=MS.svd(A,test=False)
print(mu)
>>>
[[ 0.3072  +0.j -0.08334 +0.j  0.620199+0.j -0.717405+0.j]
 [ 0.219614+0.j  0.53638 +0.j  0.600369+0.j  0.550751+0.j]
 [ 0.777033+0.j -0.537072+0.j -0.093277+0.j  0.314457+0.j]
 [ 0.503606+0.j  0.645677+0.j -0.496209+0.j -0.28792 +0.j]]
print(msig)
>>>
[14.643497+0.j  5.399259+0.j  2.386839+0.j  0.847939+0.j]

print(mv):
>>>
[[ 0.434377-0.j  0.49192 -0.j  0.670721-0.j  0.345638+0.j]
 [-0.742606+0.j -0.034757+0.j  0.173598-0.j  0.645904+0.j]
 [ 0.50594 -0.j -0.554678+0.j -0.23833 +0.j  0.616081+0.j]
 [ 0.06136 -0.j  0.670328-0.j -0.680531+0.j  0.289435+0.j]]

np.linalg.norm(npsig-msig)
>>>
9.732064e-05

#reconstruct error
RA=mu@np.diag(msig)@mv
print(RA)
>>>
array([[ 2.999815+0.j,  0.999665+0.j,  3.000289+0.j,  2.000133+0.j],
       [-0.00005 +0.j,  0.999512+0.j,  2.000394+0.j,  4.000115+0.j],
       [ 6.999669+0.j,  6.000313+0.j,  6.999985+0.j,  1.999857+0.j],
       [ 0.000281+0.j,  3.999811+0.j,  5.999872+0.j,  4.00033 +0.j]], dtype=complex64)

np.linalg.norm(RA-A)
>>>
0.0010600829

print(npsig)
>>>
[14.643506  5.399242  2.386854  0.847844]


# 查看不同维度的方法误差
error=[]
N=[i for i in range(3,11)]
Error_N=[]
for n in N:
    for i in range(50):
        A=np.random.randn(n,n).astype(np.float32)
        npu,npsig,npv=np.linalg.svd(A)
        mu,msig,mv=MS.svd(A,test=False)
        error.append(np.linalg.norm(npsig-msig))
    Error_N.append(np.mean(error))