机器学习之多项式曲线拟合的Python实现

Python 专栏收录该内容
24 篇文章 2 订阅

1、什么是多项式曲线拟合

y ( x , W ) = w 0 + w 1 x + w 2 x 2 + … + w k x k = ∑ i = 0 k w i x i y(x,W) = w_0 + w_1x + w_2x^2 + … + w_kx^k = \sum_{i=0}^{k}w_ix^i

k k 为多项式的阶数

w 0 , … , w k w_0,…,w_k 为多项式的系数，记为 W W

E ( W ) = 1 2 ∑ i = 1 n ( y ( x i , W ) − t n ) 2 = 1 2 ( X W − T ) T ( X W − T ) E(W) = \frac{1}{2} \sum_{i=1}^{n}(y(x_i,W)-t_n)^2=\frac{1}{2} (XW-T)^T(XW-T)

W = [ w 0 w 1 ⋮ w k ] ， X = [ 1 x 1 ⋯ x 1 k 1 x 2 ⋯ x 2 k ⋮ ⋮ ⋱ ⋮ 1 x n ⋯ x n k ] W =\begin{bmatrix} w_0 \\ w_1 \\ \vdots \\ w_k \\ \end{bmatrix}，X=\begin{bmatrix} 1 & x_1 & \cdots & x_1^k \\ 1 & x_2 & \cdots & x_2^k \\ \vdots & \vdots & \ddots & \vdots \\ 1 & x_n & \cdots & x_n^k \\ \end{bmatrix}

n n 为样本的点数

k k 为拟合出的多项式的阶数

∂ E ( W ) ∂ W = X T X W − X T T W = ( X T X ) − 1 X T T \frac{\partial E(W)}{\partial W} = X^TXW-X^TT \\ W = (X^TX)^{-1}X^TT

3、如何对拟合结果做出评估

3.1、使用拟合优度指数进行评价

y y 为待拟合数值，其均值为 y ˉ \bar y ，拟合值为 y ^ \hat y ，则：

S S T = ∑ i = 1 n ( y i − y ˉ ) 2 SST = \sum_{i=1}^{n}(y_i- \bar y)^2

S S R = ∑ i = 1 n ( y ^ i − y ˉ ) 2 SSR = \sum_{i=1}^{n}(\hat y_i- \bar y)^2

S S E = ∑ i = 1 n ( y i − y ^ i ) 2 SSE = \sum_{i=1}^{n}(y_i - \hat y_i)^2

S S T = S S R + S S E SST = SSR + SSE

R 2 = S S R S S T = ∑ i = 1 n ( y ^ i − y ˉ ) 2 ∑ i = 1 n ( y i − y ^ i ) 2 = 1 − S S E S S T R^2 = \frac{SSR}{SST} = \frac{\sum_{i=1}^{n}(\hat y_i- \bar y)^2}{\sum_{i=1}^{n}(y_i - \hat y_i)^2} = 1 - \frac{SSE}{SST}

4、利用正则化改善过拟合

E ( W ) = 1 2 ∑ n = 1 N { y ( x n , W ) − t n } 2 E(W) = \frac{1}{2} \sum_{n=1}^{N} \{y(x_n,W)-t_n \}^2

1 2 \frac{1}{2} 是为了方便计算引入的

N N 为样本的个数

∣ ∣ w ∣ ∣ 2 = w T w = w 0 2 + w 1 2 + … + w k 2 ||w||^2 = wTw = {w_0}^2 + {w_1}^2 + … + {w_k}^2

E ~ ( w ) = 1 2 ∑ n = 1 N { y ( x n , w ) − t n } 2 + λ 2 ∣ ∣ w ∣ ∣ 2 \tilde{E}(w) = \frac{1}{2} \sum_{n=1}^{N} \{y(x_n,w)-t_n \}^2+\frac{\lambda}{2}||w||^2

W = ( X T X + λ E m + 1 ) − 1 X T T W=(X^TX+\lambda E_{m+1})^{-1}X^TT

W W 为多项式的系数

代码实现

import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures

from PolynomialFitting.PolynomialFittingTest import PolynomialFittingTest

class PolynomialFitting:
@staticmethod
def fitting(x_list, y_list, degree=2):
"""
拟合
:param y_list:Y坐标
:param x_list: X坐标
:param degree: 阶数
:return:w参数矩阵, r2拟合优度指数, f可以参与计算的多项式
"""
# 参数格式效验
if not isinstance(x_list, list):
raise Exception(print("X坐标参数类型错误"))
if not isinstance(y_list, list):
raise Exception(print("Y坐标参数类型错误"))
if not isinstance(degree, int):
raise Exception(print("多项式阶数错误"))

# 将list装换为ndarray
try:
x_array = np.array(x_list)
y_array = np.array(y_list)
except:
raise Exception(print("坐标转换失败"))

# 对X坐标和Y坐标的形状进行比较，不一致则无法完成拟合
x_shape = x_array.shape
y_shape = y_array.shape
if x_shape != y_shape:
raise Exception("X坐标与Y坐标无法对应")

# w为拟合后的多项式的系数
try:
w = np.polyfit(x_array, y_array, degree)
except:
raise Exception("多项式阶数过高")

f = PolynomialFitting.get_fx(w)
r2 = PolynomialFitting.get_r2(y_array, x_array, f)

return w, r2, f

@staticmethod
def fitting_with_lambda(x_list, y_list, degree=2, lambda_=0.001):
"""
正则化拟合（利用岭回归为其添加惩罚项）
:param x_list: 训练集的X坐标
:param y_list: 训练集的Y坐标
:param degree: 要拟合的多项式的阶数
:param lambda_:
:return:
"""
# 参数格式效验
if not isinstance(x_list, list):
raise Exception(print("X坐标参数类型错误"))
if not isinstance(y_list, list):
raise Exception(print("Y坐标参数类型错误"))
if not isinstance(degree, int):
raise Exception(print("多项式阶数错误"))
if not isinstance(lambda_, float):
raise Exception(print("lambda系数错误"))

if lambda_ <= 0.0:
raise Exception(print("非法的lambda值"))

# 将list装换为ndarray
try:
x_array = np.array([x_list])
y_array = np.array([y_list])
except:
raise Exception(print("坐标转换失败"))

# 对X坐标和Y坐标的形状进行比较，不一致则无法完成拟合
x_shape = x_array.shape
y_shape = y_array.shape
if x_shape != y_shape:
raise Exception("X坐标与Y坐标无法对应")

# 矩阵转置
x_array = x_array.T
y_array = y_array.T

# 设置多项式的阶数
poly = PolynomialFeatures(degree=degree)

x_list_ploy = poly.fit_transform(x_array)

# Ridge(岭回归)通过对系数大小施加惩罚来解决拟合的一些问题。
# alpha >= 0 是控制系数收缩量的复杂性参数，alpha值越大，收缩量越大，模型对共线性的鲁棒性也更强。
lr = Ridge(alpha=(lambda_ / 2))
# Ridge 用 fit 方法完成拟合，并将模型系数 w 存储在其 coef_ 成员中
lr.fit(x_list_ploy, y_array)
# 获取多项式系数(参数从低向高)
w = lr.coef_[0]
# 逆序
w_l = w.tolist()
w_l.reverse()
w = np.array(w_l)

# 获取可以参与计算的多项式表达式
f = PolynomialFitting.get_fx(w)
# 计算r2拟合优度指数
r2 = PolynomialFitting.get_r2(y_list, x_list, f)
return w, r2, f

@staticmethod
def print_polynomial(w_list):
"""
获取多项式
:param w_list: 参数列表
:return:
"""
fx = "y = "
for i in range(0, len(w_list)):
param = w_list[i]
order = len(w_list) - 1 - i
if order:
fx += "{} * x ^ {} + ".format(param, order)
else:
fx += "{}".format(param)
return fx

@staticmethod
def get_fx(w_list):
"""
获取可以参与计算的多项式
:param w_list:w参数矩阵
:return:可以参与计算的多项式
"""
f = np.poly1d(w_list)
return f

@staticmethod
def get_r2(y_ture, x_ture, f):
"""
计算拟合优度指数
:param y_ture:Y坐标真实值
:param x_ture:X坐标真实值
:param f:可以参与计算的多项式
:return:r2拟合优度指数
"""
# 计算R2
# coefficient_of_determination = r2_score(y_ture, f(x_ture))
r2 = r2_score(y_ture, f(x_ture))
return r2

@staticmethod
def get_best_fitting(x_list, y_list):
"""
获取最优拟合结果
:param x_list: X坐标数组
:param y_list: Y坐标数组
:return:
"""
degree = 1
best_degree = 1
w_r, r2_r, f_r = PolynomialFitting.fitting(x_list, y_list, degree)
while True:
try:
# 多项式拟合
w, r2, f = PolynomialFitting.fitting(x_list, y_list, degree)
# print("多项式参数列表：{}".format(w))
# print("多项式阶数：{}".format(order))
# print("拟合优度指数：{}".format(r2))
print("阶数：{}\t拟合优度指数：{}".format(degree, r2))

if r2 <= 0 or r2 > 1:
break

if w[0] == 0:
degree += 1
continue

if r2 > r2_r:
w_r = w
r2_r = r2
f_r = f
best_degree = degree

degree += 1
except:
break

# time.sleep(0.5)
print("正常结束")
return w_r, r2_r, f_r, best_degree

@staticmethod
def get_best_fitting_with(x_real, y_real):
best_degree = 2
w_r, r2_r, f_r = PolynomialFitting.fitting_with_lambda(x_real, y_real)
lambda_r = np.exp(0)

for degree in range(2, 200):
for i in range(0, -19, -1):
lambda_ = np.exp(i)
w, r2, f = PolynomialFitting.fitting_with_lambda(x_real, y_real, degree=degree, lambda_=lambda_)
print("多项式阶数：{},lambda系数：{}".format(degree, lambda_))

# print("多项式参数列表：{}".format(w_2))
# print("多项式阶数：{}".format(degree))
# print("lambda系数：{}".format(lambda_))
# print("拟合优度指数：{}".format(r2_2))
if r2 > r2_r:
w_r = w
r2_r = r2
f_r = f
best_degree = degree
lambda_r = lambda_

return w_r, r2_r, f_r, best_degree, lambda_r

if __name__ == '__main__':
# 生成测试数据
X_train, y_train = PolynomialFittingTest.create_data(100)
print(X_train)
print(y_train)
x_l = X_train.reshape((1, 100))[0].tolist()
y_l = y_train.reshape((1, 100))[0].tolist()

# 未考虑正则化
w_1, r2_1, f1, best_degree_1 = PolynomialFitting.get_best_fitting(x_l, y_l)
print("多项式参数列表：{}".format(w_1))
print("多项式阶数：{}".format(best_degree_1))
print("拟合优度指数：{}".format(r2_1))

# 考虑正则化
w_2, r2_2, f2, best_degree_2, lambda_2 = PolynomialFitting.get_best_fitting_with(x_l, y_l)
print("多项式参数列表：{}".format(w_2))
print("多项式阶数：{}".format(best_degree_2))
print("lambda系数：{}".format(lambda_2))
print("拟合优度指数：{}".format(r2_2))

# 绘制画布
plt.figure()
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 绘制散点图
plt.scatter(x_l, y_l)

# 常规拟合
y_pre_1 = f1(x_l)
plt.plot(x_l, y_pre_1, color='b', label="常规拟合")

# 绘制岭回归曲线
y_pre_2 = f2(x_l)
plt.plot(x_l, y_pre_2, color='r', label="岭回归")

# 真实曲线
y_sin = PolynomialFittingTest.sin_fun(X_train)
plt.plot(x_l, y_sin, color='g', label="$sin(x)$")

# 设置图片标题
plt.title("预测曲线")
# 显示图例
plt.legend()
plt.show()



参考资料

[Python] 多项式曲线拟合(Polynomial Curve Fitting)：https://blog.csdn.net/m0_38068229/article/details/105202554

Numpy实现多项式曲线拟合：https://www.cnblogs.com/zhjblogs/p/14725864.html

numpy进行多项式拟合：https://www.cnblogs.com/yjybupt/p/12972682.html

• 2
点赞
• 0
评论
• 1
收藏
• 打赏
• 扫一扫，分享海报

01-24
10-14

05-30
04-20 2万+
08-26 3万+
06-13 678
03-14 1万+
06-28 6603
07-06 1034
12-05 5077
01-23 1990
11-19 8万+
10-03 3473
03-21 292

MELF晓宇

¥2 ¥4 ¥6 ¥10 ¥20

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、C币套餐、付费专栏及课程。