1 scikit-learn中的多项式回归与pipeline
import numpy as np
import matplotlib.pyplot as plt
x = np.random.uniform(-3, 3, size = 100)
print(x.shape)
x1 = x.reshape(-1, 1)
print(x1.shape)
y = 0.5 * x ** 2 + 2 + np.random.normal(0, 1, 100)
print(y.shape)
>>>(100,)
>>>(100, 1)
>>>(100,)
plt.scatter(x, y)
plt.show()
输出:
# 使用PolynomialFeatures为数据添加多项式特征
from sklearn.preprocessing import PolynomialFeatures
# degree=2表示最多添加2次多项式特征
poly = PolynomialFeatures(degree = 2)
poly.fit(x1)
# 将数据转化成含2次多项式特征的数据
x2 = poly.transform(x1)
print(x2.shape)
>>>(100, 3)
x2[:5, :]
# 第一列是x的0次幂,第二列是x,第三列是x^2
>>>array([[ 1. , 1.81232389, 3.28451788],
[ 1. , -2.56381436, 6.5731441 ],
[ 1. , 2.8426554 , 8.08068973],
[ 1. , -0.79656011, 0.63450801],
[ 1. , 2.93915965, 8.63865947]])
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(x2, y)
y_predict = lin_reg.predict(x2)
plt.scatter(x, y)
plt.plot(np.sort(x), y_predict[np.argsort(x)].reshape(100, 1), color = 'r')
plt.show()
输出:
lin_reg.coef_
>>>array([[0. , 0.05280184, 0.45891719]])
lin_reg.intercept_
>>>array([2.25953634])
关于PolynomialFeatures:
x = np.arange(1, 11).reshape(5, 2)