分析:统计出造假和非造假的个数并作图
正负样本可视化
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# 查看0/1的占比
data=pd.read_csv(r'C:\Users\44933\Desktop\数据挖掘材料\全部数据\makeflag.csv',encoding='gbk')
count_classes = pd.value_counts(data['是否在当年造假'], sort = True).sort_index() # 某一列的查询按照数据排序
count_classes.plot(kind = 'bar') # 条形图
plt.text(0 , count_classes[0], '%d' % count_classes[0], ha='center', va='bottom')
plt.text(1 , count_classes[1], '%d' % count_classes[1], ha='center', va='bottom')
plt.title("FLAG ZERO/ONE") # 标题
plt.xlabel("FLAG") # x 轴
plt.ylabel("Number") # y 轴
plt.savefig('制造业正负样本flag统计.png')