无解释说明,可百度~
data.info()
# 查看缺失情况
missing = data.isna().sum()
missing = pd.DataFrame(data={'特征': missing.index,'缺失值个数':missing.values})
#通过~取反,选取不包含数字0的行
missing = missing[~missing['缺失值个数'].isin([0])]
# 缺失比例
missing['缺失比例'] = missing['缺失值个数']/data.shape[0]
# 排序
missing = missing.sort_values(by='缺失比例',ascending=False)
missing
sns.set(style="whitegrid", color_codes=True)
#sns.set(style='ticks')
fig = plt.figure(figsize=(20,10))
sns.barplot(np.arange(missing.shape[0]), list(missing['缺失比例'].values))
plt.title('Histogram of missing value of variables', fontsize=18)
plt.xlabel('variables names', fontsize=18)
plt.ylabel('missing rate', fontsize=18)
# 添加x轴标签,并旋转90度
plt.xticks(np.arange(missing.shape[0]),list(missing['特征']))
#plt.xticks(rotation=90)
# 添加数值显示
for x,y in enumerate(list(missing['缺失比例'].values)):
plt.text(x,y+0.02,'{:.2%}'.format(y),ha='center',rotation=90,size=14)
plt.ylim([0,1.05])
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.show()
missingno库
安装
pip install -i https://pypi.doubanio.com/simple/ --trusted-host pypi.doubanio.com missingno
import missingno as msno
msno.matrix(data)
msno.bar(data, color=(0.25, 0.25, 0.25))
msno.dendrogram(data)
msno.heatmap(data)