1、相关系数热力图
f,ax = plt.subplots(figsize=(15,15))
ax = sns.heatmap(data.corr(), annot=True, cmap = 'viridis', linewidths = .1, linecolor = 'grey', fmt=".2f")
ax.set_title("Correlation")
plt.show()
效果:
2、离散变量柱状图(带文字)
def plot_discrete_features(data, feature, label):
fig, ax1 = plt.subplots(figsize=(10,6))
# ax1.grid(True)
data = pd.DataFrame(pd.crosstab(data[feature], data[label]))
data['Attr%'] = round(data['Yes'] / (data['Yes'] + data['No']) * 100, 2)
new_index = data.index.astype('str')
ax1.set_ylabel('Count', fontsize=12)
ax1.set_xlabel(feature, fontsize=12)
ax1.set_title('{0} : Count + % Ratio'.format(feature), fontsize=12)
ax1.legend(loc='upper left')
data[['No','Yes']].plot.bar(ax=ax1, alpha=0.5, rot=0, fontsize=12)
ax2 = ax1.twinx()
ax2.set_ylabel('% Ratio', fontsize=12)
ax2.legend(loc='best')
ax2.set_ylim([0,100]) #同理y轴数值范围
ax2.plot(new_index, data['Attr%'].values, 'k--o', alpha=0.5, linewidth=2, markersize=8)
for a,b in zip(new_index, data['Attr%'].values): #设置注释 zip函数是对应关系
ax2.text(a,b,b,ha='center',va='bottom',fontsize=12)
plt.yticks(fontsize=12)
plt.show()
plot_discrete_features(df, 'gender', 'Churn')
效果:
3、饼图(分类问题的标签)
df["Churn"].value_counts().plot.pie(labels=df['Churn'].unique()
,autopct='%.2f%%'
,fontsize=20
,figsize=(6, 6))
效果:
4、概率密度图(针对连续变量)
def plot_numerical_features(data, feature, label) :
sns.set_style("ticks")
s = sns.FacetGrid(data, hue = label,aspect = 2.5, palette ={'No' : 'lightblue', 'Yes' : 'gold'})
s.map(sns.kdeplot, feature, shade = True, alpha = 0.8)
s.set(xlim=(0, data[feature].max()))
s.add_legend()
s.set_axis_labels(feature, 'proportion')
s.fig.suptitle(feature)
plt.show()
plot_numerical_features(df,'tenure','Churn')
效果:
sns.distplot(df['tenure'])
效果:
5、箱线图(查看异常数据)
# 这是循环画出所有特征的
for i in data.columns[1:].tolist():
ax = sns.boxplot(x="SeriousDlqin2yrs", y=i, data=data)
plt.title(i)
plt.show()
效果:
df_rfm['R'].plot.box()
6、ROC曲线
import scikitplot as skplt
# roc曲线
vali_proba_df = pd.DataFrame(xgb_base_sk.predict_proba(X_test))
skplt.metrics.plot_roc(y_test, vali_proba_df,
plot_micro=False, figsize=(6,6),
plot_macro=False)
效果:
7、混淆矩阵热力图
y_predict_gbd = gbdt_base.predict(X_test)
labels = [0, 1]
sns.set()
cm = confusion_matrix(y_test, y_predict_gbd, labels=labels)
print("混淆矩阵:\n{0}".format(cm))
cm_normalized = cm/cm.sum(axis=1)[:, np.newaxis]
sns.heatmap(cm_normalized,annot=True)
plt.xlabel('predict label')
plt.ylabel('true label')
效果: