Excel数据分析的代码（包括列表的操作）

东方-教育技术博主

已于 2022-08-11 14:24:24 修改

阅读量933

点赞数

分类专栏： python 文章标签：数据分析 pandas python

于 2022-08-11 12:59:02 首次发布

本文链接：https://blog.csdn.net/qq_41517071/article/details/126278750

版权

python 专栏收录该内容

63 篇文章 3 订阅

订阅专栏

import xlrd

xlsx = xlrd.open_workbook('chengji.xls')


# 通过sheet名查找：xlsx.sheet_by_name("sheet1")
# 通过索引查找：xlsx.sheet_by_index(3)
table = xlsx.sheet_by_index(0)

# 获取单个表格值 (2,1)表示获取第3行第2列单元格的值
# value = table.cell_value(2, 1)
# print("第3行2列值为", value)

# 获取表格行数
nrows = table.nrows
print("表格一共有", nrows, "行")

#定义一个字典存放编码类别和次数
all_count=0
code_dict = {"1": 0, "2": 0, "3": 0,"4": 0,"5": 0,"6": 0,"7": 0,"8": 0}
for i in range(1,nrows):
    s=str(table.cell_value(i,2))
    #编码不为空，数量+1
    if s.strip() != '':
        code_dict[s]+=1
        #编码总数量+1
        all_count+=1

改用了pandas，
但是编码的映射关系没变

python列表
这个里面可以查看每个元素出现的次数，

注意避开两个坑

1 字典的key不要加双引号
2 防止读取数字转成浮点数读取的时候加参数dtype=object
源代码

import  pandas  as pd

#方法一：默认读取第一个表单
df=pd.read_excel('data/刘老师课堂文本切分的语句3.xlsx',dtype=object)#这个会直接默认读取到这个Excel的第一个表单

i=12
code_1,code_2,code_3=df.iat[i,1],df.iat[i,2],df.iat[i,3]
print(code_1)
print(code_2)
print(code_3)
#定义一个字典存放编码类别和次数
all_count=0
code_dict = {1: 0, 2: 0, 3: 0,4: 0,5: 0,6: 0,7: 0,8: 0,9: 0,10: 0,11: 0}
print(code_dict[code_1])
code_dict[code_1]+=1
print(code_dict[code_1])

#这种是加双引号的遍历

注意最后的结果

1 python 拼接格式化打印
2 python 转百分比的形式现实

print('percent: {:.2%}'.format(42/50))
i='{:.2%}'.format(1/20)
print(type(i))
print(i)

整个项目完整代码和运行截图

在这里插入图片描述

import  pandas  as pd

#方法一：默认读取第一个表单
df=pd.read_excel('data/刘老师课堂文本切分的语句3.xlsx',dtype=object)#这个会直接默认读取到这个Excel的第一个表单
#data=df.head()#默认读取前5行的数据
# data=df.iat[0,1]
# if pd.isna(data):
#     print('1')
# print("获取到所有的值:\n{0}".format(data))#格式化输出


#定义一个字典存放编码类别和次数 以及定义总条数
all_count=len(df)

code_dict = {1: 0, 2: 0, 3: 0,4: 0,5: 0,6: 0,7: 0,8: 0,9: 0,10: 0,11: 0}
#定义列表存放编码类别和顺序，按顺序添加即可记录编码顺序
all_code=[]
for i in df.index.values:#获取行号的索引，并对其进行遍历：
    #遍历每行的三类编码
    print(i)
    code_1,code_2,code_3=df.iat[i,1],df.iat[i,2],df.iat[i,3]
    #因为是三种编码 所以分三种情况
    print(code_1,code_2,code_3)
    #情况一 只有一种编码 权重为一
    if pd.isna(code_2) and pd.isna(code_3):
        print('第一种情况')
        code_1= int(df.iat[i, 1])
        code_dict[code_1]+=1
        all_code.append(code_1)
    #情况二 有两种编码 权重为  7 3
    elif pd.isna(code_3):
        print('第二种情况')
        code_1, code_2= int(df.iat[i, 1]), int(df.iat[i, 2])
        code_dict[code_1]+=0.7
        code_dict[code_2]+=0.3
        all_code.append(code_1)
        all_code.append(code_2)
    #情况三  有三种编码 权重为 5 3 2
    else:
        code_1, code_2, code_3 = int(df.iat[i, 1]), int(df.iat[i, 2]), int(df.iat[i, 3])
        print('第三种情况')
        code_dict[code_1]+=0.5
        code_dict[code_2] += 0.3
        code_dict[code_3] += 0.2
        all_code.append(code_1)
        all_code.append(code_2)
        all_code.append(code_3)

#4.依次打印key和value,通过索引
for key,value in code_dict.items():
    print("类别:%-10s 次数:%-8s 比例：%.4s%%" % (key, value, '{:.2%}'.format(value/all_count)))

print ("列表长度为： " ,len(all_code))