import matplotlib.pyplot as plt
x_train = [[6], [8], [10], [14], [18]]
y_train = [[7], [9], [13], [17.5], [18]]
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train, y_train)
import numpy as np
xx = np.linspace(0, 26, 100)#生成了1行100列的一个矩阵
#print(xx)
xx = xx.reshape(xx.shape[0], 1)#将1行100列的矩阵转化成100行1列的矩阵形式
#print(xx)
yy = regressor.predict(xx)
from sklearn.preprocessing import PolynomialFeatures
#初始化二次多项式生成器
poly2 = PolynomialFeatures(degree=2)
x_train_poly2 = poly2.fit_transform(x_train)
regressor_poly2 = LinearRegression()
regressor_poly2.fit(x_train_poly2, y_train)
xx_poly2 = poly2.transform(xx)
yy_poly2 = regressor_poly2.predict(xx_poly2)
#初始化四次多项式生成器
poly4 = PolynomialFeatures(degree=4)
x_train_poly4 = poly4.fit_transform(x_train)
regressor_poly4 = LinearRegression()
#对四次多项式回归模型进行训练
regressor_poly4.fit(x_train_poly4, y_train)
xx_poly4 = poly4.transform(xx)
yy_poly4 = regressor_poly4.predict(xx_poly4)
plt.scatter(x_train, y_train)
plt1, = plt.plot(xx, yy, label='Degree=1')
plt2, = plt.plot(xx, yy_poly2, label='Degree=2')
plt4, = plt.plot(xx, yy_poly4, label='Degree=4')
plt.axis([0, 25, 0, 25])
plt.xlabel('Diameter of Pizza')
plt.ylabel('Pirce of Pizza')
plt.legend(handles=[plt1, plt2, plt4])
plt.show()
#评估3种回归模型在训练集上的R-squared值
print('The R-squared value of Polynomonal Regressor(Degree=1) performing on the training data is', regressor.score(x_train, y_train))
print('The R-squared value of Polynomonal Regressor(Degree=2) performing on the training data is', regressor_poly2.score(x_train_poly2, y_train))
print('The R-squared value of Polynomonal Regressor(Degree=4) performing on the training data is', regressor_poly4.score(x_train_poly4, y_train))
#评估3种回归模型在测试数据集上的性能表现
x_test = [[6], [8], [11], [16]]
y_test = [[8], [12], [15], [18]]
print("线性模型性能评估:",regressor.score(x_test, y_test))
x_test_poly2 = poly2.transform(x_test)
print("2次多项式回归模型的性能评估:",regressor_poly2.score(x_test_poly2, y_test))
x_test_poly4 = poly4.transform(x_test)
print("4次多项式回归模型的性能评估:",regressor_poly4.score(x_test_poly4, y_test))
运行结果:
The R-squared value of Polynomonal Regressor(Degree=1) performing on the training data is 0.910001596424
The R-squared value of Polynomonal Regressor(Degree=2) performing on the training data is 0.98164216396
The R-squared value of Polynomonal Regressor(Degree=4) performing on the training data is 1.0
线性模型性能评估: 0.809726797708
2次多项式回归模型的性能评估: 0.867544365635
4次多项式回归模型的性能评估: 0.809588079578
最终效果图: