# 导入pandas读取数据
import pandas as pd
# 导入字典类特征抽取的类
from sklearn.feature_extraction import DictVectorizer
# 导入分割数据集的类
from sklearn.model_selection import train_test_split
# 导入决策树的类
from sklearn.tree import DecisionTreeClassifier
# 导入保存模型的包
import joblib
def tree():
"""
决策树预测泰坦尼克号案例
:return:
"""
# 1.获取数据集
taitan = pd.read_csv("https://biostat.app.vumc.org/wiki/pub/Main/DataSets/titanic.txt")
# print(taitan)
# 2.确定特征值与目标值
x = taitan[["pclass", "age", "sex"]]
y = taitan["survived"]
"""
特征值:
pclass age sex
1st 29.0000 female
1st 2.0000 female
1st 30.0000 male
1st 25.0000 female
1st 0.9167 male
[[1st, 29, female,], [1st. 2. female]]
"""
print(x)
# 3.实例化一个转换器对象
dic = DictVectorizer(sparse=False)
x["age"].fillna(x["age"].mean(), inplace=True)
x = dic.fit_transform((x.to_dict(orient="records")))
# 4.分割数据集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
# 5.训练数据
Dtree = DecisionTreeClassifier(max_depth=5)
# 6.拟合数据进行训练
Dtree.fit(x_train, y_train)
# 7.计算模型准确率
score = Dtree.score(x_test, y_test)
if score > 0.8:
joblib.dump(Dtree, "./Dtree.pkl")
print("保存成功")
else:
print("模型不合格")
print("score : ", score)
return None
# tree()
def predict():
# 1.加载模型
tree = joblib.load("./Dtree.pkl")
# 2.预测结果
data = [[18, 1, 0, 0, 0, 1]]
y_predict = tree.predict(data)
print(y_predict)
predict()
决策树预测泰坦尼克号案例
最新推荐文章于 2024-10-09 00:00:00 发布