使用了scikit-learn的datasets数据集和线性回归算法做一个房价预测模型。房价特征有13个。分别使用一阶多项式线性回归,二阶多项式线性回归,和三阶多项式线性回归生成模型并作了性能比较。
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
#导入数据
from sklearn.datasets import load_boston
boston = load_boston()
X = boston.data
Y = boston.target
X.shape
(506, 13)
X[0]
array([6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01, 6.575e+00,
6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01, 3.969e+02,
4.980e+00])
boston.feature_names
array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='<U7')
#模型训练
#将数据分为训练集和测试集,分20%作为测试集
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=3)
#训练模型
import time
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures