python中的数据统计,使用matplotlib画直方图,条状图,柱状图,饼图

一.统计文本中每种实体的个数

1.ann

2.代码

import os
import re
import pandas as pd
import numpy as np
import random
import math
from datetime import datetime
from matplotlib import pyplot as plt

train_dir='../../siriyang/中医药命名实体识别/dataset/train'
test_dir='../../siriyang/中医药命名实体识别/dataset/chusai_xuanshou'
prepare_dir='./prepare'
def get_entitie(dir):
    entities={} #用来存储实体名,创建为字典类别
    files=os.listdir(dir)
    files=list(set([file.split('.')[0] for file in files if (".ann" in file or ".txt" in file) ]))
    for file in files:
        path=os.path.join(dir,file+'.ann')
        with open(path,'r',encoding='utf8') as f:
            for line in f.readlines():
                name=line.split('\t')[1].split(' ')[0]
                #以\t制表符,为分隔符,取第二位,再在第二位中,以‘ ’空格为分隔符,取第一位,例如第一行取DRUG_EFFICACY
                if name in entities:#统计每个实体的个数
                    entities[name]+=1
                else:
                    entities[name]=1
    return entities
count=get_entitie(train_dir)#调用函数
print(count)
print([i for i in count.keys()])#取出键
print([i for i in count.values()])#取出值

3.结果

二.绘出直方图

def autolabel(rects):
    for rect in rects:
        height =rect.get_height()
        plt.text(rect.get_x() + rect.get_width()/2, height, height, ha='center', va='bottom')#垂直和水平的布局
            #rect.get_x(),1.03*height,'%s' % int(height))
        plt.xticks(range(len(num_list)), name_list, rotation=80)#rotation=80是横坐标的倾斜度
        
name_list = [i for i in count.keys()]
num_list = [i for i in count.values()]
plt.figure(figsize=(10,5))#画布大小
plt.title('Category statistics of entities',fontsize=13)#设置标题,footsize是设置字体大小
plt.xlabel(u'category',fontsize=13)#设置x轴的标题,以及它的字号大小
plt.ylabel(u'quantity',fontsize=13)#设置y轴的标题,以及它的字号
#plt.bar(data[i for i in count.keys()],data[i for i in count.values()],alpha=0.6,width=0.8,facecolor='deeppink',edgecolor='darkblue',w=1,label='number of class')
autolabel(plt.bar(range(len(num_list)),num_list,width=0.8,edgecolor='darkblue',lw=1))#edgecolor:柱子轮廓色;lw:柱子轮廓的宽度;
fig=plt.gcf()
#plt.legend(loc=2)
plt.show()
fig.savefig('./Category statistics of entities.png')#把图例保存到当前的目录上

三,绘出饼状图

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

def draw_pie(labels,quants):
    plt.figure(1,figsize=(10,10))
    expl = [0,0,0,0,0,0,0,0,0,0,0,0,0]#第五块离开圆心一点点
    colors = ["blue","red","coral","green","yellow","orange"]#设置颜色,可循环显示
    plt.pie(quants,explode=expl,colors=colors,labels=labels,autopct='%1.1f%%',pctdistance=0.8,shadow=True)#autopct:百分数的格式
    plt.title('length of the entity',bbox={'facecolor':'0.8','pad':5})
    fig=plt.gcf()
    plt.show()
    fig.savefig("./pie.jpg")
    plt.close()
    
labels = [i for i in dic.keys()]
quants = [i for i in dic.values()]
draw_pie(labels,quants)

直方图博客链接分享

饼状图链接分享

 

  • 1
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值