描述:当某个excel文件过大时,需要将其拆分成多个文件来保存。
数据格式如下:(假如下图中数据非常多)
示例代码1:
import openpyxl
# 读取数据
load_data = openpyxl.load_workbook("data/体检表.xlsx")
# 获取sheet0中数据
table_data = load_data[load_data.sheetnames[0]]
# print(table_data)
# 循环读取每行数据
row_data_list = []
for row in table_data.iter_rows():
# 判断行为空跳过循环
if not row:
continue
# 读取每一行数据
data = [row[i].value for i in range(len(row))]
# print(data)
row_data_list.append(data)
print(row_data_list)
# 将数据拆分为多个子数据列表,此时默认每个excel表格中3条数据
sub_data_list = []
sub_datas_list = []
for i in range(len(row_data_list)):
if i % 3 == 0 and i != 0:
sub_datas_list.append(sub_data_list)
sub_data_list = []
sub_data_list.append(row_data_list[i])
sub_datas_list.append(sub_data_list)
print(sub_datas_list)
# 将拆分的子列表分别保存在excel文件中
num = 1
for sub_data in sub_datas_list:
# 创建新的excel对象
new_excel = openpyxl.Workbook()
sheet = new_excel.active
for i in range(1, len(sub_data) + 1):
for j in range(1, 6):
sheet.cell(row=i, column=j, value=sub_data[i-1][j-1])
new_excel.save(f'data/体检表_{num}.xlsx')
num += 1
运行结果:
上述结果有时候不是我们想要的结果,从第个excel表格中开始就没有表头了,继续优化代码,如下所示。
示例代码2:
import openpyxl
# 读取数据
load_data = openpyxl.load_workbook("data/体检表.xlsx")
# 获取sheet0中数据
table_data = load_data[load_data.sheetnames[0]]
# print(table_data)
# 循环读取每行数据
row_data_list = []
count = 0
for row in table_data.iter_rows():
count += 1
if count == 1:
# 保存表头
title = [row[i].value for i in range(len(row))]
continue
# 判断行为空跳过循环
if not row:
continue
# 读取每一行数据
data = [row[i].value for i in range(len(row))]
# print(data)
row_data_list.append(data)
# print(row_data_list)
# 将数据拆分为多个子数据列表,此时默认每个excel表格中3条数据
sub_data_list = []
sub_datas_list = []
for i in range(len(row_data_list)):
if i % 3 == 0 and i != 0:
sub_datas_list.append(sub_data_list)
sub_data_list = []
sub_data_list.append(row_data_list[i])
sub_datas_list.append(sub_data_list)
# print(sub_datas_list)
# 将表头放到每个子列表中的第一个位置
sub_datas_list_title = []
for sub_data_list in sub_datas_list:
sub_data_list.insert(0, title)
sub_datas_list_title.append(sub_data_list)
# print(sub_datas_list_title)
# 将拆分的子列表分别保存在excel文件中
num = 1
for sub_data in sub_datas_list_title:
# 创建新的excel对象
new_excel = openpyxl.Workbook()
sheet = new_excel.active
for i in range(1, len(sub_data) + 1):
for j in range(1, 6):
sheet.cell(row=i, column=j, value=sub_data[i-1][j-1])
new_excel.save(f'data/体检表_{num}.xlsx')
num += 1
运行结果:
完善上述代码,封装成函数。
示例代码3:
import openpyxl
def load_data(file_path):
# 读取数据
load_data = openpyxl.load_workbook(file_path)
# 获取sheet0中数据
table_data = load_data[load_data.sheetnames[0]]
# print(table_data)
# 循环读取每行数据
row_data_list = []
count = 0
for row in table_data.iter_rows():
count += 1
if count == 1:
# 保存表头
title = [row[i].value for i in range(len(row))]
continue
# 判断行为空跳过循环
if not row:
continue
# 读取每一行数据
data = [row[i].value for i in range(len(row))]
# print(data)
row_data_list.append(data)
# print(row_data_list)
return title, row_data_list
def split_data(title, row_data_list, amount):
# 将数据拆分为多个子数据列表
sub_data_list = []
sub_datas_list = []
for i in range(len(row_data_list)):
if i % amount == 0 and i != 0:
sub_datas_list.append(sub_data_list)
sub_data_list = []
sub_data_list.append(row_data_list[i])
sub_datas_list.append(sub_data_list)
# print(sub_datas_list)
# 将表头放到每个子列表中的第一个位置
sub_datas_list_title = []
for sub_data_list in sub_datas_list:
sub_data_list.insert(0, title)
sub_datas_list_title.append(sub_data_list)
print(sub_datas_list_title)
return sub_datas_list_title
def save_data(sub_datas_list_title):
# 将拆分的子列表分别保存在excel文件中
# 创建新的excel对象
num = 1
for sub_data in sub_datas_list_title:
new_excel = openpyxl.Workbook() # 注意:这个要放在循环内,每循环一次都要新建,要不然下一次循环是覆盖上次文档操作
sheet = new_excel.active
print(sub_data)
for i in range(1, len(sub_data) + 1):
for j in range(1, 6):
sheet.cell(row=i, column=j, value=sub_data[i-1][j-1])
new_excel.save(f'data/体检表{num}.xlsx')
num += 1
if __name__ == '__main__':
file_path = "data/体检表.xlsx"
# 读取数据
title, row_data_list = load_data(file_path)
# 拆分数据
amount = 6 # 每个excel表格中放几条数据
sub_datas_list_title = split_data(title, row_data_list, amount)
# 保存数据到文件中
save_data(sub_datas_list_title)