1、利用决策树建立训练模型
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer #特征转换器
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn import tree
#数据获取
dataSet=pd.read_csv('C:\\User\Desktop\ML\\train.csv')
#特征,此时X是dataFrame的格式,相当于一个二维表结构
X=dataSet[['Elevation','Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology' , 'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways', 'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm','Horizontal_Distance_To_Fire_Points','Wilderness_Area1']]
#标记
y=dataSet['Cover_Type']
#数据预处理,划分训练集和测试集
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.1,random_state=33)
vec=DictVectorizer(sparse