import pandas as pd
df = pd.read_excel("cnbirdegree.xls", index_col=None, header=None)
Order = ''
Family = ''
Order_EN = ''
Family_EN = ''
birds = []
for i in range(0, len(df)):
item = df.iloc[i]
if pd.isna(item[2]) and pd.isna(item[3]):
if item[0].find("目") != -1:
Order = item[0].strip().replace("#","")
Order_EN = item[1].strip()
if item[0].find("科") != -1:
Family = item[0].strip().replace("#","")
Family_EN = item[1].strip()
else:
levle = item[3] if pd.isna(item[2]) else item[2]
bird = {'Species': item[0].strip(), 'Order': Order, 'Family': Family, 'Order_EN': Order_EN, 'Family_EN': Family_EN, 'levle': levle}
#bird = [item[0].strip(), Order, Family, Order_EN, Family_EN, levle]
birds.append(bird)
birds = pd.DataFrame(birds)
birds.to_excel('data.xlsx', index=False)
实例2:
import pandas as pd
'''
read_excel主要参数说明
path 表明文件系统位置的字符串、URL 或文件型对象
header 用作列名的行号,默认是0(第一行),如果没有列名的话,应该为None
index_col 用作结果中行索引的列号或列名,可以是一个单一的名称/数字也可以是一个分层索引
names 结果的列名列表,和 header=None 一起用
skiprows 从文件开头处起,需要跳过的行数或行号列表
na_values 需要用NA替换的值序列
'''
#方法1
data = pd.read_excel("abun2023.xls", index_col=None, header=None, skiprows=1)
birds = {}
df = data.values.tolist()
for temp in df:
item = temp[0]
bird = {}
bird["name"] = item.split(';')[1]
bird["number"] = item.split(';')[2]
bird["frequence"] = 1
if len(birds) == 0:
birds[bird["name"]] = bird
else:
if birds.get(bird["name"]) is None:
birds[bird["name"]] = bird
else:
birds[bird["name"]]["number"] = int(birds[bird["name"]]["number"]) + int(bird["number"])
birds[bird["name"]]["frequence"] = int(birds[bird["name"]]["frequence"]) + 1
#方法2
# data = pd.read_excel("abun2023.xls", index_col=None, header=None, skiprows=1)
# birds = {}
# for index, row in data.iteritems():
# for item in row.tolist():
# bird = {}
# bird["name"] = item.split(';')[1]
# bird["number"] = item.split(';')[2]
# bird["frequence"] = 1
# if len(birds) == 0:
# birds[bird["name"]] = bird
# else:
# if birds.get(bird["name"]) is None:
# birds[bird["name"]] = bird
# else:
# birds[bird["name"]]["number"] = int(birds[bird["name"]]["number"]) + int(bird["number"])
# birds[bird["name"]]["frequence"] = int(birds[bird["name"]]["frequence"]) + 1
# print(birds)
# print(len(birds))
# birds = pd.DataFrame(birds.values()) #将去所有的值
# print(birds)
# birds.to_excel("2025.xlsx",)