多项式回归还是在线性回归的基础上进行,是对数据进行升维,然后进行线性回归处理
import numpy as np
import matplotlib.pyplot as plt
x = np.random.uniform(-3,3,size=100)
X = x.reshape(-1,1)
X.shape
y = 0.5 * x**2 +x +2 +np.random.normal(0,1,size=100)
plt.scatter(x,y)
plt.show()
from sklearn.linear_model import LinearRegression
line_reg = LinearRegression()
line_reg.fit(X,y)
y_predict =line_reg.predict(X)
plt.scatter(x,y)
plt.plot(x, y_predict, color='r')
plt.show()
X2 = np.hstack([X,X**2]) # 为数据进行升维,再加一个特征,用x^2来表示。
lin_reg2 = LinearRegression()
lin_reg2.fit(X2,y)
y_predict2 = lin_reg2.predict(X2)
plt.scatter(x,y)
plt.plot(np.sort(x),y_predict2[np.argsort(x)],color='r')
plt.show()
lin_reg2.coef_ # array([1.09884169, 0.6036477 ])
lin_reg2.intercept_ # 1.6885866140412158
scikit-learn 中多项式回归
import numpy as np
import matplotlib.pyplot as plt
x = np.random.uniform(-3,3,size=100)
X = x.reshape(-1,1)
y = 0.5 * x**2 +x +2 +np.random.normal(0,1,size=100)
# 对其进行添加多项式列
from sklearn.preprocessing import PolynomialFeatures
poly=PolynomialFeatures(degree=2) # degree=2 表示 最多为2次方
poly.fit(X)
X2 = poly.transform(X)
X2.shape #(100,3)
from sklearn.linear_model import LinearRegression
lin_reg2 = LinearRegression()
lin_reg2.fit(X2,y)
y_predict2 = lin_reg2.predict(X2)
plt.scatter(x,y)
plt.plot(np.sort(x),y_predict2[np.argsort(x)], color='r')
plt.show()
关于 PolynomialFeatures
Pipeline
x=np.random.uniform(-3,3,size=100)
X = x.reshape(-1,1)
y = 0.5*x**2 +x+2 +np.random.normal(0,1,100)
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
poly_reg = Pipeline([
('poly',PolynomialFeature