首先回顾一下本次美赛的F题:
问题1的解答思路如下:
python示例代码:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
from sklearn.datasets import make_classification
# 生成模拟数据集
X, y = make_classification(n_samples=1000, n_features=5, n_classes=2, random_state=42)
# 将数据集划分为训练集和测试集
#见完整版
# 预测测试集
def predict(model, X_test):
y_pred = model.predict(X_test)
return y_pred
# 评估模型性能
def evaluate_performance(y_true, y_pred):
accuracy = accuracy_score(y_true, y_pred)
report = classification_report(y_true, y_pred)
return accuracy, report
# 训练模型
trained_model = train_decision_tree(X_train, y_train)
# 进行预测
y_pred = predict(trained_model, X_test)
# 评估性能
accuracy, report = evaluate_performance(y_test, y_pred)
# 打印模型性能
print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')
# 可视化决策树
def visualize_tree(model, feature_names, class_names):
dot_data = export_graphviz(model, out_file=None,
feature_names=feature_names,
class_names=class_names,
filled=True, rounded=True,
special_characters=True)
graph = graphviz.Source(dot_data)
graph.render("decision_tree") # 保存为PDF文件
return graph
# 获取特征和类别名称
feature_names = X.columns # 如果你的数据集是DataFrame的话
class_names = [str(i) for i in range(len(set(y)))]
# 可视化决策树
tree_graph = visualize_tree(trained_model, feature_names, class_names)
# 显示决策树图形
Image(tree_graph.render("decision_tree", format="png"))
查看完整思路如下:
【腾讯文档】2024美赛全题目深度解析(建模过程+代码实现+论文指导)
https://docs.qq.com/doc/DSG1LQWtOQ3lFWHNj