分别利用xgbboost和catboost算法做特征重要性排序
xgbboost
# -*- coding: utf-8 -*-
import pandas as pd
from sklearn.model_selection import train_test_split
import xgboost as xgb
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['font.family']='Microsoft YaHei' #显示中文标签
plt.style.use ('ggplot') #设定绘图风格
import seaborn as sns
df = pd.read_csv('***.csv')
df = df.fillna(0)
df.drop(['distinct_id'],axis=1,inplace=True)
df.rename(columns={
'is_stay':'是否留存','max(is_login)':'是否登录','start_numbers':'启动次数','is_play':'是否播放视频','is_action':'是否评论/收藏/点赞/分享','show_numbers':'视频曝光数','video_play_numbers':'视频播放数','real_video_play_numbers':'视频有效播放数','video_play_time':'视频播放总时长','video_play_rate':'视频播放率','video_real_play_rate':'视频有效播放率','play_time_per_play':'每播放播放时长','play_time_per_show':'每曝光播放时长'},inplace=True)
X = df.drop(['是否留存'],axis=1)
y = df