载入boston房价数据 from sklearn.datasets import load_boston from random import shuffle boston = load_boston() #seed(0) # Creates a replicable shuffling #new_index = range(boston.data.shape[0]) #shuffle(new_index) # shuffling the index #X, y = boston.data[new_index], boston.target[new_index] X, y = boston.data, boston.target print (X.shape, y.shape, boston.feature_names)
#散点图判断变量与目标之间的关系 import pandas as pd df = pd.DataFrame(X,columns=boston.feature_names) df['target'] = y scatter = df.plot(kind='scatter', x='LSTAT', y='target', c='r')
从图中可以看出,LSTA可尝试log变换
import numpy as np from sklearn.feature_selection.univariate_selection import f_regression a=df['LSTAT'].values y=boston.target a=a.reshape(-1,1)