import pandas as pd
import sklearn.metrics as metircs
data = pd.read_csv('007-day.csv',encoding='utf-8')
data.drop(['dteday','instant','casual','registered'],axis=1,inplace=True)
data.as_matrix()
from sklearn.model_selection import train_test_split
trainSet,testSet = train_test_split(data,test_size=0.3)
train_x,train_y = trainSet.iloc[:,:-1].values,trainSet.iloc[:,-1].values
test_x,test_y = testSet.iloc[:,:-1].values,testSet.iloc[:,-1].values
from sklearn.tree import DecisionTreeRegressor
dtReg = DecisionTreeRegressor()
dtReg.fit(train_x,train_y)
y_pre = dtReg.predict(test_x)
print('无优化mse:{}'.format(metircs.mean_absolute_error(y_pred=y_pre,y_true=test_y)))
from sklearn.ensemble import AdaBoostRegressor
dtReg = DecisionTreeRegressor(max_depth=20)
adaboost = AdaBoostRegressor(dtReg,n_estimators=600)
adaboost.fit(train_x,train_y)
y_pre = adaboost.predict(test_x)
print('优化后mse:{}'.format(metircs.mean_absolute_error(y_pred=y_pre,y_true=test_y)))
print('解释方差分R2:{}'.format(metircs.r2_score(y_pred = y_pre,y_true=test_y)))
print('查看模型得分:{}'.format(adaboost.score(test_x,test_y)))
from sklearn.externals import joblib
path = '地址加文件名.txt'
joblib.dump(模型,path)
linear_re_loaded = joblib.load(path)
y_pre_train = linear_re_loaded.predict(train_x)
print(y_pre_train)
往期推荐