最小二乘回归、岭回归、贝叶斯回归实现代码

最新推荐文章于 2024-04-21 00:00:00 发布

萌新待开发

最新推荐文章于 2024-04-21 00:00:00 发布

阅读量791

点赞数 4

分类专栏： ᕦ 机器学习 ᕤ 文章标签： python 机器学习线性代数

本文链接：https://blog.csdn.net/qq_44785318/article/details/120690189

版权

ᕦ 机器学习 ᕤ 专栏收录该内容

68 篇文章

订阅专栏

此代码展示了多项式回归、岭回归和贝叶斯曲线拟合的过程，并通过matplotlib和sklearn库进行动画展示。首先，生成随机数据，然后用不同次数的多项式进行最小二乘法拟合，接着使用岭回归进行拟合，并对比了贝叶斯方法的预测结果。最后，计算并比较了各种方法的误差和，同时展示了所有拟合曲线的图例。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline


def trueY(X):
    return np.sin(X) * 0.4 +np.cos(X)**2 + (np.cos(X)+1)**(5/6)

def generate(num):
    X = np.random.rand(num)*10
    X = np.sort(np.asarray(X))
    Y = trueY(X)+ np.random.normal(0, 0.2, size=X.shape)
    return X.tolist(),Y.tolist()



class LinearRrgression(object):
    def __init__(self,w = None):
        self.w = w
        self.poly = 4
        self.matAA = []
    def least_square_fit(self,X,Y,poly=4):
        self.poly = poly
        matA = []
        for i in range(poly+1):
            matA1 = []
            for j in range(poly+1):
                tx = 0.0
                for k in range(len(X)):
                    dx = 1.0
                    for l in range(j+i):
                        dx = dx*X[k]
                    tx+=dx
                matA1.append(tx)
            matA.append(matA1)
        matB = []
        for i in range(0, poly + 1):
            ty = 0.0
            for k in range(0, len(X)):
                dy = 1.0
                for l in range(0, i):
                    dy = dy * X[k]
                ty += Y[k] * dy
            matB.append(ty)
        matB = np.array(matB)
        matAA = np.linalg.solve(matA, matB)
        self.matAA = matAA
        return matAA
    def Least_square_predict(self,x):
        x = np.sort(x)
        yya = []
        for i in range(0, len(x)):
            yy = 0.0
            for j in range(0, self.poly + 1):
                dy = 1.0
                for k in range(0, j):
                    dy *= x[i]
                dy *= self.matAA[j]
                yy += dy
            yya.append(yy)
        return yya

X,y = generate(200)
plt.scatter(X,y,alpha=0.5,c = 'b',label='generate')
plt.plot(X,trueY(X),c='r',label="true")
plt.title("0.4*sin(X)+cos(X)^2 + (cos(X)+1)^(5/6)")
plt.legend()
plt.show()
LR  =LinearRrgression()
ims = []
fig = plt.figure()
plt.title("最小二乘法曲线拟合")
plt.plot(X,trueY(X),color='r',linestyle='-',marker='',label='true',alpha=0.7)
for i in range(1, 11):
    LR.least_square_fit(X, y, i)
    yya = LR.Least_square_predict(X)
    im = plt.plot(X, yya,  alpha = (lambda x:0.6 if  i<10 else 1)(i) ,linestyle='-', marker='', label='poly:'+str(i))
    plt.legend()
    ims.append(im)
ani = animation.ArtistAnimation(fig, ims, interval=500, repeat_delay=1000)
Writer = animation.writers['ffmpeg']  # 需安装ffmpeg
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
ani.save("最小二乘法多项式曲线回归.gif",writer='pillow')
plt.show()




from sklearn.linear_model import Ridge, LinearRegression


def RidgeRegression(degree, alpha):
    return Pipeline([
        ("poly", PolynomialFeatures(degree=degree)),
        ("ridge_reg", Ridge(alpha=alpha))
    ])
ims = []
fig = plt.figure()
plt.title("岭回归曲线拟合")
plt.plot(X,trueY(X),color='r',linestyle='-',marker='',label='true',alpha=0.7)
for i in range(1, 11):
    ridge1_reg = RidgeRegression(degree=i, alpha=0.00001)
    ridge1_reg.fit(np.array(X).reshape(-1, 1), y)
    ridge1_predict = ridge1_reg.predict(np.array(X).reshape(-1, 1))
    im = plt.plot(X, ridge1_predict,  alpha = (lambda x:0.7 if  i<10 else 1)(i) ,linestyle='-', marker='', label='poly:'+str(i))
    plt.legend()
    ims.append(im)
ani = animation.ArtistAnimation(fig, ims, interval=500, repeat_delay=1000)
Writer = animation.writers['ffmpeg']  # 需安装ffmpeg
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
ani.save("岭回归曲线拟合.gif",writer='pillow')
plt.show()





def PolynomialRegression(degree):
    return Pipeline([
        ("poly", PolynomialFeatures(degree=degree)),
        ("lin_reg", LinearRegression())
    ])
ims = []
fig = plt.figure()
plt.title("多项式曲线拟合")
plt.plot(X,trueY(X),color='r',linestyle='-',marker='',label='true',alpha=0.7)
for i in range(1, 11):
    poly20_reg = PolynomialRegression(i)
    poly20_reg.fit(np.array(X).reshape(-1, 1), y)
    y20_predict = poly20_reg.predict(np.array(X).reshape(-1, 1))
    im = plt.plot(X, y20_predict,  alpha = (lambda x:0.6 if  i<10 else 1)(i) ,linestyle='-', marker='', label='poly:'+str(i))
    plt.legend()
    ims.append(im)
ani = animation.ArtistAnimation(fig, ims, interval=500, repeat_delay=1000)
Writer = animation.writers['ffmpeg']  # 需安装ffmpeg
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
ani.save("多项式曲线拟合.gif",writer='pillow')
plt.show()






class Bayesian_curve(object):
    def __init__(self,poly = 9):
        self.poly = poly
        self.beta = 5
        self.alpha = 0.00

    def phi(self,data):
        phi_all = []
        for j in data:
            phi_all.append([j ** i for i in range(self.poly + 1)])

        phi_all = np.array(phi_all).T
        I = np.identity(self.poly + 1)
        res = np.sum([i.reshape(-1, 1).dot(i.reshape(-1, 1).T) for i in phi_all.T], axis=0)
        S = np.linalg.inv(self.beta * res + self.alpha * I)
        return phi_all, S

    # Mean of Gaussian distribution of t corresponding to x
    def m(self,data,y):
        Phi_all, S = self.phi(data)
        result = np.sum([y[i] * Phi_all.T[i] for i in range(len(y))], axis=0)

        return self.beta * (Phi_all.T).dot(S.dot(result)).reshape(-1)

    # Variance of Gaussian distribution of t corresponding to x
    def var(self,data):
        Phi_all, S = self.phi(data)
        return np.diagonal((self.beta ** -1 + (Phi_all.T).dot(S).dot(Phi_all)))
ims = []
fig = plt.figure()
plt.title("贝叶斯曲线拟合")
plt.plot(X,trueY(X),color='r',linestyle='-',marker='',label='true',alpha=0.7)
for i in range(1, 11):
    bayesian = Bayesian_curve(poly=i)
    im = plt.plot(X, bayesian.m(X,y),  alpha = (lambda x:0.6 if  i<10 else 1)(i) ,linestyle='-', marker='', label='poly:'+str(i))
    plt.legend()
    ims.append(im)
plt.fill_between(X, bayesian.m(X,y) - bayesian.var(X),bayesian.m(X,y) + bayesian.var(X), color="g", alpha=0.2)
ani = animation.ArtistAnimation(fig, ims, interval=500, repeat_delay=1000)
Writer = animation.writers['ffmpeg']  # 需安装ffmpeg
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
ani.save("贝叶斯曲线拟合.gif",writer='pillow')
plt.show()

plt.title("0.4*sin(X)+cos(X)^2 + (cos(X)+1)^(5/6)")
plt.plot(X, bayesian.m(X,y),c = 'b',label='prediction')
plt.fill_between(X, bayesian.m(X,y) - bayesian.var(X),bayesian.m(X,y) + bayesian.var(X), color="g", alpha=0.2)
plt.plot(X,trueY(X),c='r',label='true')
plt.legend()
plt.show()



print("最小二乘误差和：",np.sum(np.abs(trueY(X)-yya)))
print("多项式误差和：",np.sum(np.abs(trueY(X)-y20_predict)))
print("岭回归误差和：",np.sum(np.abs(trueY(X)-ridge1_predict)))
print("贝叶斯误差和：",np.sum(np.abs(trueY(X)-bayesian.m(X,y))))
fig = plt.figure()
plt.scatter(X,trueY(X),label='true',alpha=0.7)
plt.plot(X,y20_predict,label='poly')
plt.plot(X,ridge1_predict,label='ridge')
plt.plot(X,bayesian.m(X,y),label='bayesian' )
plt.plot(X,yya,label='ols' )
plt.legend()
plt.show()