矩阵论补坑—— SVD分解
前言
下面的code是根据矩阵论教材中关于矩阵奇异值分解部分而写的初步代码,原理讲解的blog已经烂大街了,真正能实现的不多。算是填上去年挖的坑,有些函数可以在矩阵论专题中的其他文章中找到,初步实现了求特征值(包括实数和复数特征根)、特征向量、SVD分解等函数,适用阶数不大于10阶。本身没有优化,超过10阶太费时了,不至于,当个demo去看即可。基本原理是QR分解迭代求解特征值,再求每个特征值对应的特征向量,然后再构建矩阵 U U U和 V H V^H VH,比较成熟的算法是双位移的QR迭代方法,个人技拙写了个折中版本,大家笑纳@-@。
代码实现
@staticmethod
def solve_square_root(a, b, c):
# 求二次函数根
temp = b**2-4*a*c
return [(-b+np.sqrt(temp+0j))/(2*a), (-b-np.sqrt(temp+0j))/(2*a)]
@classmethod
def solve_complex_root_QR_eig(self, target):
return self.solve_square_root(1, -target[0, 0]-target[1, 1], target[0, 0]*target[1, 1]-target[0, 1]*target[1, 0])
@classmethod
def eig_QR(self, target, mode='householder', epoch=20, eps=1e-4, rnd=3, test=False):
"""eig_QR use QR_Fact to get all eig_values of target M
*reference: https://blog.csdn.net/xfijun/article/details/109464005
*通过不断迭代至对角线元素基本保持不变,或者只剩下2*2方块时停止,但是很耗时,暂时没有实现双位移的方法!!
*此实现方法对于利用上Hessenberg矩阵化简的优化过程没有明显体现
*size > 10*10 还是考虑用np的内置方法
Args:
target ([float or complex]]): [matrix to get eig_values]
mode (str, optional): [transform methods]. Defaults to 'householder'.
epoch (int, optional): [thershold to stop loops]. Defaults to 20.
eps ([float], optional): [thershold to stop loops]. Defaults to 1e-4.
rnd([int], optional): [The number of decimal places reserved]. Defaults to 3.
test (bool, optional): [show checking information]. Defaults to False.
Return:
eig_list (list): eig_values of input matrix
Author: Junno
Date: 2022-04-29
"""
assert len(target.shape) == 2
M, N = target.shape
assert M == N
cnt = 0
# one loop for QR_Fact_Eig
def QR_epoch(A):
nonlocal cnt, epoch, mode, eps
Ak = deepcopy(A)
flag = 1
while flag:
Ak0 = Ak.copy()
Q, R = self.QR_Fact(Ak, mode)
Ak = R@Q
if np.linalg.norm(np.diag(np.abs(Ak-Ak0))) <= eps:
flag = 0
cnt += 1
if cnt > epoch:
flag = 0
return Ak
Ak = QR_epoch(target)
cnt += epoch
# check result to reloop or stop
iter_flag = True
while iter_flag:
check_complex_eig = False
eig_list = []
for i in range(M):
if check_complex_eig:
check_complex_eig = False
continue
# check 2*2 block to get roots or continue QR_epoch loops
if (i+1) < M and abs(Ak[i+1, i]) >= eps:
if i+2 < M and (abs(np.sum(Ak[i+2:, i])) >= eps or abs(np.sum(Ak[i+2:, i+1])) >= eps):
Ak = QR_epoch(Ak)
break
else:
check_complex_eig = True
if test:
print(Ak[i:i+2, i:i+2])
c_eig = self.solve_complex_root_QR_eig(
Ak[i:i+2, i:i+2])
eig_list += c_eig
else:
eig_list.append(Ak[i, i])
# stop when get to the last row or last two rows is a 2*2 block
if i == M-1 or (i == M-2 and check_complex_eig == True):
iter_flag = False
if test:
print("Iteration times: ", cnt)
print(Ak)
# sort
eig_list = sorted(
eig_list, key=lambda x: np.linalg.norm(x), reverse=True)
return np.round(eig_list, rnd)
@staticmethod
def solve_eig_vec(target, eig, dtype=np.complex64, eps=1e-3, test=False):
"""solve_eig_vec: solve according eig_vector given an eig
Args:
target ([float or complex]]): [matrix to get eig_vec]
eig ([float or complex]): [one eig of target]
dtype ([type], optional): [description]. Defaults to np.complex64.
eps ([float]): numerical threshold.
test (bool, optional): [show checking information]. Defaults to False.
Returns:
[np.array]: [eig_vector]
Author: Junno
Date: 2022-04-29
"""
M, N = target.shape
A = target-np.eye(M, dtype=dtype)*eig
if test:
print(A)
for i in range(M):
if test:
print('During process in row:{}, col:{}'.format(i, i))
if sum(abs(A[i:, i])) > eps:
zero_index = []
non_zero_index = []
for k in range(i, M):
if abs(A[k, i]):
non_zero_index.append(k)
else:
zero_index.append(k)
non_zero_index = sorted(
non_zero_index, key=lambda x: abs(A[x, i]))
sort_cols = non_zero_index+zero_index
if test:
print("Sort_cols index: {}".format(sort_cols))
A[i:, :] = A[sort_cols, :]
if test:
print('After resorting')
print(A)
if np.sum(abs(A[i+1:, :])):
prefix = -A[i+1:, i]/A[i, i]
temp = (np.array(prefix).reshape(-1, 1)
)@A[i, :].reshape((1, -1))
A[i+1:, :] += temp
# eliminate elements with the same col index above
# if i != 0:
# for j in range(i):
# A[j, :] += -(A[j, i]/A[i, i])*A[i, :]
if test:
print('After primary row transformation:')
print(A)
else:
continue
# 如果秩不是n-1,即有k重根,需要另外计算,否则所有的特征值给出的特征向量均是一样的,
# the equation must be non-full-rank, the last row will be left
for m in range(M-1):
for n in range(m, N):
if abs(A[m, n]) > eps:
A[m, :] *= 1./A[m, n]
break
if test:
print(A)
x = np.zeros((1, M), dtype=dtype)
for i in range(M-1, -1, -1):
if i < M-1:
if np.abs(A[i, i]) < eps:
x[0, i] = 0
else:
x[0, i] = -np.sum(A[i, i+1:]*x[0, i+1:])/A[i, i]
else:
x[0, i] = 1
if test:
print(i, x)
return x
@classmethod
def eig_vv(self, target, eps=1e-3, test=False):
"""eig_vv 求解矩阵的所有特征值与特征向量
Args:
target ([float or complex]): [target matrix]
eps ([float]): numerical threshold.
Returns:
[list and array]: [list of eigs ans array of eig_vectors]
Author: Junno
Date: 2022-04-29
"""
assert target.shape[0] <= 10, 'too large matrix to solve using this imperfect method, it will cost much of time, please use numpy methods instead'
eigs = self.eig_QR(target, test=test)
num_eigs = len(eigs)
eig_vecs = None
for i in range(num_eigs):
if test:
print('solve eigvv')
print(eigs[i], self.solve_eig_vec(target, eigs[i], eps=eps))
if i == 0:
eig_vecs = self.solve_eig_vec(target, eigs[i], eps=eps)
else:
eig_vecs = np.concatenate(
(eig_vecs, self.solve_eig_vec(target, eigs[i], eps=eps)), axis=0)
return eigs, eig_vecs
@classmethod
def svd(self, target, dtype=np.complex64, eps=1e-4, test=False):
"""svd
Args:
target ([float or complex]]): [matrix to get eig_vec]
dtype ([type], optional): [description]. Defaults to np.complex64.
eps ([float]): numerical threshold.
test (bool, optional): [show checking information]. Defaults to False.
Returns:
[u,v,d]: [svd components]
Author: Junno
Date: 2022-04-29
"""
import cmath
assert len(target.shape) == 2
M, N = target.shape
AHA = np.conj(target).T@target
if test:
print('AHA:')
print(AHA)
eigs, eig_vecs = self.eig_vv(AHA, eps=eps, test=test)
if test:
print(eigs, '\n', eig_vecs)
u_v = None
temp = False
for j in range(eig_vecs.shape[0]):
eig_vecs[j, :] /= np.linalg.norm(eig_vecs[j, :])
# u_i=(1/sigma_i)*(A@eigv_i) if sigma_i>0 sigma_i=1/sqrt(eig_i)
if np.linalg.norm(eigs[j]) > eps:
sigma_j = cmath.sqrt(eigs[j])
if not temp:
u_v = 1/sigma_j*(target@eig_vecs[j, :]).reshape((-1, 1))
temp = True
else:
u_v = np.concatenate(
(u_v, 1/sigma_j*(target@eig_vecs[j, :]).reshape((-1, 1))), axis=1)
if test:
print(eigs, '\n', eig_vecs)
print('u_v')
print(u_v)
# expand orthogonal basis
if u_v.shape[1] < M:
ortho_index = []
for i in range(u_v.shape[0]):
if sum(abs(u_v[i, :])) < eps:
ortho_index.append(i)
for i in range(M-u_v.shape[1]):
if len(ortho_index) == 0:
u_v = np.concatenate(
(u_v, np.array([0 for _ in range(u_v.shape[0])]).reshape((-1, 1))), axis=1)
else:
u_v = np.concatenate((u_v, np.array([0 if k != ortho_index[i] else 1 for k in range(
u_v.shape[0])]).reshape((-1, 1))), axis=1)
if test:
print('After expand orthogonal basis')
print(u_v)
Sigma = np.zeros((M, N), dtype=dtype)
for i in range(M):
if eigs[i] > eps:
Sigma[i, i] = cmath.sqrt(eigs[i])
else:
Sigma[i, i] = eigs[i]
if test:
print(Sigma)
return u_v, np.diag(Sigma), eig_vecs
test example
N=4
A=np.random.randint(0,10,(N,N)).astype(np.float32)
A
>>>
array([[3., 1., 3., 2.],
[0., 1., 2., 4.],
[7., 6., 7., 2.],
[0., 4., 6., 4.]], dtype=float32)
# np method
npu,npsig,npv=np.linalg.svd(A)
# my method
mu,msig,mv=MS.svd(A,test=False)
print(mu)
>>>
[[ 0.3072 +0.j -0.08334 +0.j 0.620199+0.j -0.717405+0.j]
[ 0.219614+0.j 0.53638 +0.j 0.600369+0.j 0.550751+0.j]
[ 0.777033+0.j -0.537072+0.j -0.093277+0.j 0.314457+0.j]
[ 0.503606+0.j 0.645677+0.j -0.496209+0.j -0.28792 +0.j]]
print(msig)
>>>
[14.643497+0.j 5.399259+0.j 2.386839+0.j 0.847939+0.j]
print(mv):
>>>
[[ 0.434377-0.j 0.49192 -0.j 0.670721-0.j 0.345638+0.j]
[-0.742606+0.j -0.034757+0.j 0.173598-0.j 0.645904+0.j]
[ 0.50594 -0.j -0.554678+0.j -0.23833 +0.j 0.616081+0.j]
[ 0.06136 -0.j 0.670328-0.j -0.680531+0.j 0.289435+0.j]]
np.linalg.norm(npsig-msig)
>>>
9.732064e-05
#reconstruct error
RA=mu@np.diag(msig)@mv
print(RA)
>>>
array([[ 2.999815+0.j, 0.999665+0.j, 3.000289+0.j, 2.000133+0.j],
[-0.00005 +0.j, 0.999512+0.j, 2.000394+0.j, 4.000115+0.j],
[ 6.999669+0.j, 6.000313+0.j, 6.999985+0.j, 1.999857+0.j],
[ 0.000281+0.j, 3.999811+0.j, 5.999872+0.j, 4.00033 +0.j]], dtype=complex64)
np.linalg.norm(RA-A)
>>>
0.0010600829
print(npsig)
>>>
[14.643506 5.399242 2.386854 0.847844]
# 查看不同维度的方法误差
error=[]
N=[i for i in range(3,11)]
Error_N=[]
for n in N:
for i in range(50):
A=np.random.randn(n,n).astype(np.float32)
npu,npsig,npv=np.linalg.svd(A)
mu,msig,mv=MS.svd(A,test=False)
error.append(np.linalg.norm(npsig-msig))
Error_N.append(np.mean(error))
写在最后
- 应该会有很多问题,留言指正~