背景
在机器学习和数据科学领域,构建高效的预测模型只是第一步,如何将模型成果落地应用至关重要,借助在线部署工具,可以实现模型的实时预测与可视化交互,为用户提供直观的分析支持。本次实践以Stacking回归模型为核心,结合SHAP值分析特征重要性,并通过Streamlit搭建交互式Web应用关注微信公众号:Python机器学习AI
代码实现
模型构建
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['axes.unicode_minus'] = False
df = pd.read_excel('2024-11-27公众号Python机器学习AI.xlsx')
from sklearn.model_selection import train_test_split, KFold
X = df.drop(['Y'],axis=1)
y = df['Y']
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, StackingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.linear_model import LinearRegression
# 定义一级学习器
base_learners = [
("RF", RandomForestRegressor(n_estimators=100, random_state=42)),
("XGB", XGBRegressor(n_estimators=100, random_state=42, verbosity=0)),
("LGBM", LGBMRegressor(n_estimators=100, random_state=42, verbose=-1)),
("GBM", GradientBoostingRegressor(n_estimators=100, random_state=42)),
("AdaBoost", AdaBoostRegressor(n_estimators=100, random_state=42)),
("CatBoost", CatBoostRegressor(n_estimator