#% %
#我还是使用sklearn吧。。
import numpy as np
from sklearn import datasets
from sklearn. model_selection import train_test_split
boston = datasets. load_boston ( )
X = boston. data
y = boston. target
X = X[ y < 50 ]
y = y[ y < 50 ]
X_train, X_test, y_train, y_test = train_test_split ( X, y)
from sklearn. linear_model import LinearRegression
lin_reg1 = LinearRegression( )
% time lin_reg1. fit( X_train, y_train)
print ( lin_reg1. score( X_test, y_test) )
print ( lin_reg1. coef_)
Wall time: 998 µs
0.7938748117566617
[ - 9.73603259e-02 3.07679209e-02 - 2.40146783e-03 7.11942254e-01
- 1.17404300e+01 4.09422424e+00 - 3.01307879e-02 - 1.12181269e+00
2.56969258e-01 - 1.47222778e-02 - 7.81371111e-01 6.58684222e-03
- 3.26238047e-01
数据标准化
from sklearn. preprocessing import StandardScaler
standardScaler = StandardScaler( )
standardScaler. fit( X_train)
X_train_standard = standardScaler. transform( X_train)
print ( X_train_standard[ : 10 ] )
lin_reg2 = LinearRegression( )
% time lin_reg2. fit( X_train_standard, y_train)
Wall time: 986 µs
X_test_standard = standardScaler. transform( X_test)
print ( lin_reg2. score( X_test_standard, y_test) )
0.7938748117566619