决策树---预测泰坦尼克号的生存
#!/usr/bin/python
# -*- coding:utf-8 -*-
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn.tree import DecisionTreeClassifier
def mydesiontree():
'''
决策树预测泰坦尼克号的生死
:return:
'''
data=pd.read_csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.txt")
# print(data.columns)
x=data[['pclass','age','sex']]
y=data['survived']
x['age'].fillna(x['age'].mean(),inplace=True)
print(x['age'])
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=34)
dictver=DictVectorizer(sparse=False)
x_train=dictver.fit_transform(x_train.to_dict(orient="records"))
print(dictver.get_feature_names())
x_test=dictver.fit_transform(x_test.to_dict(orient="records"))
decisiontree=DecisionTreeClassifier()
decisiontree.fit(x_train,y_train)
y_predict=decisiontree.predict(x_test)
print("预测结果:",y_predict)
print("预测准确率:",decisiontree.score(x_test,y_test))
pass
if __name__ == '__main__':
print("hello")
mydesiontree()