SK实现线性回归
直接来干货
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.cross_validation import train_test_split
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
# 线性回归
from sklearn.linear_model import LinearRegression
X=load_boston().data
y=load_boston().target
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.1,random_state=33)
lin_reg=LinearRegression()
lin_reg.fit(X_train,y_train)
y_pre=lin_reg.predict(X_test)
print(lin_reg.score(X_test,y_test))
a=[]
for i in range(y_test.size):
a.append(i)
x=np.array(a,dtype = int)
plt.plot(x,y_pre,color='r')
plt.plot(x,y_test,color='blue')
plt.axis([0,x.size,0,40])
plt.show()
效果图
继续都这样了,不如来个KNN回归比较
来了:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
param_grid=[
{
'weights':['uniform'],
'n_neighbors':[i for i in range(1,11)]
},
{
'weights':['distance'],
'n_neighbors':[i for i in range(1,11)],
'p':[1,2]
}
]
knn_reg=KNeighborsRegressor()
grid_search=GridSearchCV(knn_reg,param_grid,verbose=1)
grid_search.fit(X_train,y_train)
KNN的参数比较多,所以可以用超参数搜索进行最佳的参数取值
grid_search.best_params_
knn_reg=KNeighborsRegressor(n_neighbors=6, p=1, weights='distance')
knn_reg.fit(X_train,y_train)
y_pre=knn_reg.predict(X_test)
print(knn_reg.score(X_test,y_test))
a=[]
for i in range(y_test.size):
a.append(i)
x=np.array(a,dtype = int)
plt.plot(x,y_pre,color='r')
plt.plot(x,y_test,color='blue')
plt.axis([0,x.size,0,40])
plt.show()
效果图