1.线性回归
1.1简单例子
from sklearn import linear_model
x=[[0,0],[1,1],[2,2]]
y=[0,1,2]
reg=linear_model.LinearRegression()
reg.fit(x,y)
print(reg.predict([[3,3]]))
1.2糖尿病数据集
import matplotlib.pyplot as plt
import numpy as np
#导入数据集包与回归模型需要的包
from sklearn import datasets,linear_model
#导入度量结果的包
from sklearn.metrics import mean_squared_error,r2_score
#载入数据集
diabetes=datasets.load_diabetes()
#只取第三个属性进行一元回归
diabetes_X=diabetes.data[:,np.newaxis,2]
print(diabetes_X)
数据集
#划分训练集和测试集
#对特征进行划分
diabetes_x_train = diabetes_X[ :-20 ]
diabetes_X_test = diabetes_X[-20:]
#对标签进行划分
diabetes_y_train = diabetes.target[ :-20 ]
diabetes_y_test = diabetes.target[-20: ]
#创建模型对象
regr = linear_model.LinearRegression( )
#在训练集上训练模型
regr.fit(diabetes_x_train,diabetes_y_train)
#在测试集上测试
diabetes_y_pred = regr.predict (diabetes_X_test)
#查看回归系数
print( ' Coefficients: \n', regr.coef_ )
# MSE
print( "Mean squared error: %.2f"%mean_squared_error(diabetes_y_test,diabetes_y_pred))
#解释方差R^2
print('variance score:%.2f' % r2_score(diabetes_y_test,diabetes_y_pred))
#绘图查看预测结果
plt.scatter(diabetes_X_test,diabetes_y_test)
plt.plot(diabetes_X_test, diabetes_y_pred,'b', linewidth=3 )
plt.xticks(())
plt.yticks(())
plt.show()
模型结果与评价
2.回归树
2.1简单例子
from sklearn import tree
X =[[0, 0],[1, 1],[2, 2]]
y =[0, 1,2]
dt_reg_test = tree.DecisionTreeRegressor()
dt_reg_test = dt_reg_test.fit(X, y)
dt_reg_test.predict([[1, 1]])
2.2生成随机的数据集
import numpy as np
from sklearn.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt
#生成数据集,并加入随机误差
rng = np.random.RandomState(1 )
X = np.sort(5*rng.rand(80,1),axis=0)
y= np.sin(X).ravel( )
y[::5] += 3 *(0.5 - rng.rand(16) )
#用不同参数,分别拟合模型
dt_regr_1 = DecisionTreeRegressor(max_depth=2 )
dt_regr_2 = DecisionTreeRegressor(max_depth=5)
dt_regr_1.fit(X, y)
dt_regr_2.fit(X, y)
#分别进行模型预测
X_test = np.arange(0.0,5.0, 0.01)[:,np.newaxis]
y_1= dt_regr_1.predict(X_test)
y_2= dt_regr_2.predict(X_test)
#绘制结果
plt.figure()
plt.scatter(X, y, s=20,edgecolor= "black" ,label=" data" )
plt.plot(X_test, y_1,color="cornflowerblue" ,
label= "max_depth=2", linewidth=2 )
plt.plot(X_test, y_2, color="yellowgreen", label= "max_depth=5", linewidth=2)
plt.xlabel( "data" )
plt.ylabel("target" )
plt.title( "Decision Tree Regression" )
plt.legend()
plt.show()