决策树预测泰坦尼克号案例

最新推荐文章于 2024-10-09 00:00:00 发布

小徐的记事本

最新推荐文章于 2024-10-09 00:00:00 发布

阅读量69

点赞数

分类专栏： # Python 文章标签：决策树机器学习 python Powered by 金山文档

本文链接：https://blog.csdn.net/weixin_51332399/article/details/129181405

版权

Python 专栏收录该内容

61 篇文章 0 订阅

订阅专栏

# 导入pandas读取数据
import pandas as pd
# 导入字典类特征抽取的类
from sklearn.feature_extraction import DictVectorizer
# 导入分割数据集的类
from sklearn.model_selection import train_test_split
# 导入决策树的类
from sklearn.tree import DecisionTreeClassifier
# 导入保存模型的包
import joblib


def tree():
    """
        决策树预测泰坦尼克号案例
    :return: 
    """

    # 1.获取数据集
    taitan = pd.read_csv("https://biostat.app.vumc.org/wiki/pub/Main/DataSets/titanic.txt")
    # print(taitan)

    # 2.确定特征值与目标值
    x = taitan[["pclass", "age", "sex"]]
    y = taitan["survived"]
    """ 
    特征值:
      pclass      age             sex
      1st         29.0000         female
      1st         2.0000          female
      1st         30.0000         male
      1st         25.0000         female
      1st         0.9167          male
    [[1st, 29, female,], [1st. 2. female]]
    """
    print(x)

    # 3.实例化一个转换器对象
    dic = DictVectorizer(sparse=False)
    x["age"].fillna(x["age"].mean(), inplace=True)
    x = dic.fit_transform((x.to_dict(orient="records")))

    # 4.分割数据集
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

    # 5.训练数据
    Dtree = DecisionTreeClassifier(max_depth=5)

    # 6.拟合数据进行训练
    Dtree.fit(x_train, y_train)

    # 7.计算模型准确率
    score = Dtree.score(x_test, y_test)

    if score > 0.8:

        joblib.dump(Dtree, "./Dtree.pkl")
        print("保存成功")

    else:

        print("模型不合格")

    print("score : ", score)
    return None

# tree()

def predict():

    # 1.加载模型
    tree = joblib.load("./Dtree.pkl")

    # 2.预测结果
    data = [[18, 1, 0, 0, 0, 1]]

    y_predict = tree.predict(data)

    print(y_predict)

predict()