import pandas as pd
import pdpbox
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from pdpbox.info_plots import PredictPlot # 导入PredictPlot类
# 加载鸢尾花数据集
iris = load_iris()#调用 load_iris() 函数来加载鸢尾花数据集
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target # 添加目标列(0-2类:山鸢尾、杂色鸢尾、维吉尼亚鸢尾)
# 特征与目标变量
features = iris.feature_names # 4个特征:花萼长度、花萼宽度、花瓣长度、花瓣宽度
target = 'target' # 目标列名
# 划分训练集与测试集
X_train, X_test, y_train, y_test = train_test_split(
df[features], df[target], test_size=0.2, random_state=42
)
# 训练模型
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# 选择待分析的特征(如:petal length (cm))
feature = 'petal length (cm)'
feature_name = feature # 特征显示名称
predict_plot = PredictPlot(
df=df,
feature=feature,
feature_name=feature_name,
model=model,
grid_type='percentile',
model_features=features,
)
fig, axes, summary_df = predict_plot.plot(
which_classes=None, # 绘制所有类别(0,1,2)
show_percentile=True, # 显示百分位线
engine='plotly',
template='plotly_white'
)
fig.update_layout(
width=1200, # 宽度800像素
height=1600, # 高度500像素
title=dict(text=f'Target Plot: {feature_name}', x=0.5) # 居中标题
)
fig.show()
06-19
2057

04-18
3万+
