import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
def trueY(X):
return np.sin(X) * 0.4 +np.cos(X)**2 + (np.cos(X)+1)**(5/6)
def generate(num):
X = np.random.rand(num)*10
X = np.sort(np.asarray(X))
Y = trueY(X)+ np.random.normal(0, 0.2, size=X.shape)
return X.tolist(),Y.tolist()
class LinearRrgression(object):
def __init__(self,w = None):
self.w = w
self.poly = 4
self.matAA = []
def least_square_fit(self,X,Y,poly=4):
self.poly = poly
matA = []
for i in range(poly+1):
matA1 = []
for j in range(poly+1):
tx = 0.0
for k in range(len(X)):
dx = 1.0
for l in range(j+i):
dx = dx*X[k]
tx+=dx
matA1.append(tx)
matA.append(matA1)
matB = []
for i in range(0, poly + 1):
ty = 0.0
for k in range(0, len(X)):
dy = 1.0
for l in range(0, i):
dy = dy * X[k]
ty += Y[k] * dy
matB.append(ty)
matB = np.array(matB)
matAA = np.linalg.solve(matA, matB)
self.matAA = matAA
return matAA
def Least_square_predict(self,x):
x = np.sort(x)
yya = []
for i in range(0, len(x)):
yy = 0.0
for j in range(0, self.poly + 1):
dy = 1.0
for k in range(0, j):
dy *= x[i]
dy *= self.matAA[j]
yy += dy
yya.append(yy)
return yya
X,y = generate(200)
plt.scatter(X,y,alpha=0.5,c = 'b',label='generate')
plt.plot(X,trueY(X),c='r',label="true")
plt.title("0.4*sin(X)+cos(X)^2 + (cos(X)+1)^(5/6)")
plt.legend()
plt.show()
LR =LinearRrgression()
ims = []
fig = plt.figure()
plt.title("最小二乘法曲线拟合")
plt.plot(X,trueY(X),color='r',linestyle='-',marker='',label='true',alpha=0.7)
for i in range(1, 11):
LR.least_square_fit(X, y, i)
yya = LR.Least_square_predict(X)
im = plt.plot(X, yya, alpha = (lambda x:0.6 if i<10 else 1)(i) ,linestyle='-', marker='', label='poly:'+str(i))
plt.legend()
ims.append(im)
ani = animation.ArtistAnimation(fig, ims, interval=500, repeat_delay=1000)
Writer = animation.writers['ffmpeg'] # 需安装ffmpeg
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
ani.save("最小二乘法多项式曲线回归.gif",writer='pillow')
plt.show()
from sklearn.linear_model import Ridge, LinearRegression
def RidgeRegression(degree, alpha):
return Pipeline([
("poly", PolynomialFeatures(degree=degree)),
("ridge_reg", Ridge(alpha=alpha))
])
ims = []
fig = plt.figure()
plt.title("岭回归曲线拟合")
plt.plot(X,trueY(X),color='r',linestyle='-',marker='',label='true',alpha=0.7)
for i in range(1, 11):
ridge1_reg = RidgeRegression(degree=i, alpha=0.00001)
ridge1_reg.fit(np.array(X).reshape(-1, 1), y)
ridge1_predict = ridge1_reg.predict(np.array(X).reshape(-1, 1))
im = plt.plot(X, ridge1_predict, alpha = (lambda x:0.7 if i<10 else 1)(i) ,linestyle='-', marker='', label='poly:'+str(i))
plt.legend()
ims.append(im)
ani = animation.ArtistAnimation(fig, ims, interval=500, repeat_delay=1000)
Writer = animation.writers['ffmpeg'] # 需安装ffmpeg
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
ani.save("岭回归曲线拟合.gif",writer='pillow')
plt.show()
def PolynomialRegression(degree):
return Pipeline([
("poly", PolynomialFeatures(degree=degree)),
("lin_reg", LinearRegression())
])
ims = []
fig = plt.figure()
plt.title("多项式曲线拟合")
plt.plot(X,trueY(X),color='r',linestyle='-',marker='',label='true',alpha=0.7)
for i in range(1, 11):
poly20_reg = PolynomialRegression(i)
poly20_reg.fit(np.array(X).reshape(-1, 1), y)
y20_predict = poly20_reg.predict(np.array(X).reshape(-1, 1))
im = plt.plot(X, y20_predict, alpha = (lambda x:0.6 if i<10 else 1)(i) ,linestyle='-', marker='', label='poly:'+str(i))
plt.legend()
ims.append(im)
ani = animation.ArtistAnimation(fig, ims, interval=500, repeat_delay=1000)
Writer = animation.writers['ffmpeg'] # 需安装ffmpeg
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
ani.save("多项式曲线拟合.gif",writer='pillow')
plt.show()
class Bayesian_curve(object):
def __init__(self,poly = 9):
self.poly = poly
self.beta = 5
self.alpha = 0.00
def phi(self,data):
phi_all = []
for j in data:
phi_all.append([j ** i for i in range(self.poly + 1)])
phi_all = np.array(phi_all).T
I = np.identity(self.poly + 1)
res = np.sum([i.reshape(-1, 1).dot(i.reshape(-1, 1).T) for i in phi_all.T], axis=0)
S = np.linalg.inv(self.beta * res + self.alpha * I)
return phi_all, S
# Mean of Gaussian distribution of t corresponding to x
def m(self,data,y):
Phi_all, S = self.phi(data)
result = np.sum([y[i] * Phi_all.T[i] for i in range(len(y))], axis=0)
return self.beta * (Phi_all.T).dot(S.dot(result)).reshape(-1)
# Variance of Gaussian distribution of t corresponding to x
def var(self,data):
Phi_all, S = self.phi(data)
return np.diagonal((self.beta ** -1 + (Phi_all.T).dot(S).dot(Phi_all)))
ims = []
fig = plt.figure()
plt.title("贝叶斯曲线拟合")
plt.plot(X,trueY(X),color='r',linestyle='-',marker='',label='true',alpha=0.7)
for i in range(1, 11):
bayesian = Bayesian_curve(poly=i)
im = plt.plot(X, bayesian.m(X,y), alpha = (lambda x:0.6 if i<10 else 1)(i) ,linestyle='-', marker='', label='poly:'+str(i))
plt.legend()
ims.append(im)
plt.fill_between(X, bayesian.m(X,y) - bayesian.var(X),bayesian.m(X,y) + bayesian.var(X), color="g", alpha=0.2)
ani = animation.ArtistAnimation(fig, ims, interval=500, repeat_delay=1000)
Writer = animation.writers['ffmpeg'] # 需安装ffmpeg
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
ani.save("贝叶斯曲线拟合.gif",writer='pillow')
plt.show()
plt.title("0.4*sin(X)+cos(X)^2 + (cos(X)+1)^(5/6)")
plt.plot(X, bayesian.m(X,y),c = 'b',label='prediction')
plt.fill_between(X, bayesian.m(X,y) - bayesian.var(X),bayesian.m(X,y) + bayesian.var(X), color="g", alpha=0.2)
plt.plot(X,trueY(X),c='r',label='true')
plt.legend()
plt.show()
print("最小二乘误差和:",np.sum(np.abs(trueY(X)-yya)))
print("多项式误差和:",np.sum(np.abs(trueY(X)-y20_predict)))
print("岭回归误差和:",np.sum(np.abs(trueY(X)-ridge1_predict)))
print("贝叶斯误差和:",np.sum(np.abs(trueY(X)-bayesian.m(X,y))))
fig = plt.figure()
plt.scatter(X,trueY(X),label='true',alpha=0.7)
plt.plot(X,y20_predict,label='poly')
plt.plot(X,ridge1_predict,label='ridge')
plt.plot(X,bayesian.m(X,y),label='bayesian' )
plt.plot(X,yya,label='ols' )
plt.legend()
plt.show()