排列3统计从0到9,每个数总共出现过几次,以及每个数在个十百位分别出现过几次,

本文链接：https://blog.csdn.net/lmhopen/article/details/114649418
接前一篇文章,网络爬虫抓完全部排列3历史数据后,写个代码进行统计分析,主要统计从0到9,每个数总共出现过几次,以及每个数在个十百位分别出现过几次,得到数据就可以用图表显示出来.之前抓取的数据,保存在pl3.xls文件里
from pyecharts.charts import Bar
import numpy
numpy.__version__

import xlrd

import pandas as pd
import string
ling_9cishu= pd.DataFrame()

bar = Bar()
bar.add_xaxis(["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"])
bar.add_yaxis("商家A", [5, 20, 36, 10, 75, 90])
# render 会生成本地 HTML 文件，默认会在当前目录生成 render.html 文件
# 也可以传入路径参数，如 bar.render("mycharts.html")
bar.render()

#L = list(range(10))
L=[0]*10
L
# [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
print(type(L[0]))
# int
print(L)

""" 打开excel表格"""
workbook = xlrd.open_workbook("pl3.xls")
print("打开EXCEL文件后返回值",workbook)       # 结果：<xlrd.book.Book object at 0x000000000291B128>
""" 获取所有sheet名称"""
sheet_names = workbook.sheet_names()
print("所有表的名称",sheet_names)     # 结果：['表1', 'Sheet2']
""" 获取所有或某个sheet对象"""
# 获取所有的sheet对象
sheets_object = workbook.sheets()
print("所有表对象",sheets_object)    # 结果：[<xlrd.sheet.Sheet object at 0x0000000002956710>, <xlrd.sheet.Sheet object at 0x0000000002956AC8>]
# 通过index获取第一个sheet对象
sheet1_object = workbook.sheet_by_index(0)
print("第一个表对象",sheet1_object)    # 结果：<xlrd.sheet.Sheet object at 0x0000000002956710>
# 通过name获取第一个sheet对象
sheet1_object = workbook.sheet_by_name(sheet_name="pl3")
print("用表名pl3获取第一个表对象",sheet1_object)    # 结果：<xlrd.sheet.Sheet object at 0x0000000002956710>
""" 判断某个sheet是否已导入"""
# 通过index判断sheet1是否导入
sheet1_is_load = workbook.sheet_loaded(sheet_name_or_index=0)
print("用索引判断表1pl3是否已经导入",sheet1_is_load)    # 结果：True
# 通过sheet名称判断sheet1是否导入
sheet1_is_load = workbook.sheet_loaded(sheet_name_or_index="pl3")
print("用表名称pl3判断表是否已经导入",sheet1_is_load)    # 结果：True
""" 对sheet对象中的行执行操作 """
# 获取sheet1中的有效行数
nrows = sheet1_object.nrows
print("获取表有多少行",nrows)        # 结果：5
# 获取sheet1中第3行的数据
all_row_values = sheet1_object.row_values(rowx=2)
print("获取第三行的数据",all_row_values)      # 结果：[3.0, 'b', 1, '']
row_values = sheet1_object.row_values(rowx=2, start_colx=1, end_colx=3)
print("更精确的指定数据",row_values)        # 结果：['b', 1]
# 获取sheet1中第3行的单元对象
row_object = sheet1_object.row(rowx=2)
print("第3行的单元对象",row_object)        # 结果：[number:3.0, text:'b', bool:1, empty:'']
# 获取sheet1中第3行的单元
row_slice = sheet1_object.row_slice(rowx=2)
print("第3行的单元",row_slice)        # 结果：[number:3.0, text:'b', bool:1, empty:'']
# 获取sheet1中第3行的单元类型
row_types = sheet1_object.row_types(rowx=2)
print("第3行单元类型",row_types)        # 结果：array('B', [2, 1, 4, 0])
# 获取sheet1中第3行的长度
row_len = sheet1_object.row_len(rowx=2)
print("第3行的长度",row_len)         # 结果：4
# 获取sheet1所有行的生成器
rows_generator = sheet1_object.get_rows()
print("所有行的生成器",rows_generator)      # 结果：<generator object Sheet.get_rows.<locals>.<genexpr> at 0x00000000028D8BA0>
  
""" 对sheet对象中的列执行操作 """
# 获取sheet1中的有效列数
ncols = sheet1_object.ncols
print("有效列数",ncols)        # 结果：4
# 获取sheet1中第colx=1列的数据
col_values = sheet1_object.col_values(colx=1)
print("第colx=1列的数据",col_values)      # 结果：['测试', 'a', 'b', 'c', 'd']
col_values1 = sheet1_object.col_values(1, 1, 3)
print("更精确的列数据",col_values1)     # 结果：['a', 'b']
# 获取sheet1中第2列的单元
col_slice = sheet1_object.col_slice(colx=1)
print("第2列的单元",col_slice)      # 结果：[text:'测试', text:'a', text:'b', text:'c', text:'d']
# 获取sheet1中第2列的单元类型
col_types = sheet1_object.col_types(colx=1)
print("第2列的单元类型",col_types)      # 结果：[1, 1, 1, 1, 1]
  
"""对sheet对象中的单元执行操作"""
# 获取sheet1中第rowx=1行，第colx=2列的单元对象
cell_info = sheet1_object.cell(rowx=5776, colx=1)
print("第5777行第2列的单元对象",cell_info)      # 结果: text:'m'
print("类型",type(cell_info))   # 结果：<class 'xlrd.sheet.Cell'>
# 获取sheet1中第rowx=1行，第colx=2列的单元值
cell_value = sheet1_object.cell_value(rowx=1, colx=2)
print("第1行第2列单元值",cell_value)      # 结果: m
# 获取sheet1中第rowx=1行，第colx=2列的单元类型值
cell_type = sheet1_object.cell_type(rowx=1, colx=2)
print("第1行第2列单元类型",cell_type)      # 结果：1
  
#单元类型ctype：empty为0，string为1，number为2，date为3，boolean为4，error为5；

def excel_one_line_to_list():
    global ling_9cishu,df4
    df = pd.read_excel("pl3.xls", usecols=[1],
                       names=None,dtype=object)  # 读取项目名称列,不要列名
    df_li = df.values.tolist()
    result = []
    for s_li in df_li:
        result.append(s_li[0])
    print(result)
    df2 = df.value_counts() # 统计某一列中各值的出现次数
    print('每个三位数出现次数',df2)
    # df为要保存的Dataframe 
    df['百位数']=df['中奖号码'].str[0:1]
    df['十位数']=df['中奖号码'].str[1:2]
    df['个位数']=df['中奖号码'].str[2:3]
    df3=df
    for i in range(10):
        print('当前统计的数字是',i)
        df3=df.isin([str(i)]).sum(axis=0)#统计0到9每个数出现几次
        print('统计0到9每个数出现几次',df3)
        zongcishu=df3[df3>0].sum()#总次数
        print('个十百位分别统计出来的数加到一起',zongcishu)
        df4=df3.to_frame()#Series转换成DataFrame
        df4.insert(0, '统计的数字',i)#添加一列
        df4.rename(columns={0:'统计结果'},inplace=True)#改列名
        print('Series转换成DataFrame后加入一列',df4)
        df4.loc['总次数']=[i,zongcishu]#添加一行
        df4=df4.drop(index=['中奖号码'])#删除一行
        print('删掉无用行,又加一行',df4)
        ling_9cishu=ling_9cishu.append(df4)#统计结果写到表里
    print('最终结果',ling_9cishu)
    print('列名',ling_9cishu.columns.values.tolist())
    df.to_excel('zjhm.xlsx')
    
    print('索引还原为列',ling_9cishu.reset_index(drop=False))
    ling_9cishu=ling_9cishu.reset_index(drop=False)#索引不算一列,因为需要用到,所以把它还原为列
    print('列名',ling_9cishu.columns.values.tolist())
    df6=ling_9cishu.pivot(index='统计的数字', values='统计结果', columns='index')#最关键的一步,不管原来的表怎么排列,可以直接指定索引,列名和元素值,无比方便
    print('重新排列',df6)
    df6.to_excel('df3.xlsx')
    


if __name__ == '__main__':
    excel_one_line_to_list()