# -*- coding: utf-8 -*-
from sklearn import datasets
from sklearn import linear_model
import matplotlib.pyplot as plt
import numpy as np
diabetes = datasets.load_diabetes()
#获取一个特征
diabetes_x_temp = diabetes.data[:, np.newaxis, 2]
diabetes_x_train = diabetes_x_temp[:-20]#训练样本
diabetes_x_test = diabetes_x_temp[-20:]#测试样本 后20行
diabetes_y_train = diabetes.target[:-20]#训练标记
diabetes_y_test = diabetes.target[-20:]#预测对比标记
#回归训练及预测
clf = linear_model.LinearRegression()
clf.fit(diabetes_x_train,diabetes_y_train) #训练数据集
#验证集
print '验证集',diabetes_x_test
print '预测结果',clf.predict(diabetes_x_test)
#系数 残差平方 方差得分
print '系数:',clf.coef_
print ('残差平方:%.2f'%np.mean((clf.predict(diabetes_x_test)-diabetes_y_test)**2))
print ('方差得分: %.2f' % clf.score(diabetes_x_test,diabetes_y_test))
#绘图
plt.title('LinearRegression Diabetes')
plt.xlabel('attributes')
plt.ylabel('measure of diabetes')
plt.scatter(diabetes_x_test,diabetes_y_test,color='black')#点的准确位置
plt.plot(diabetes_x_test,clf.predict(diabetes_x_test),color='blue',linewidth = 3)#预测结果,直线表示
plt.show()
结果
验证集 [[ 0.07786339]
[-0.03961813]
[ 0.01103904]
[-0.04069594]
[-0.03422907]
[ 0.00564998]
[ 0.08864151]
[-0.03315126]
[-0.05686312]
[-0.03099563]
[ 0.05522933]
[-0.06009656]
[ 0.00133873]
[-0.02345095]
[-0.07410811]
[ 0.01966154]
[-0.01590626]
[-0.01590626]
[ 0.03906215]
[-0.0730303 ]]
预测结果 [ 225.9732401 115.74763374 163.27610621 114.73638965 120.80385422
158.21988574 236.08568105 121.81509832 99.56772822 123.83758651
204.73711411 96.53399594 154.17490936 130.91629517 83.3878227
171.36605897 137.99500384 137.99500384 189.56845268 84.3990668 ]
系数: [ 938.23786125]
残差平方:2548.07
方差得分: 0.47