import openpyxl
from openpyxl.cell.cell import MergedCell
import pandas as pd
from datetime import datetime
import sys
def parse_old_excel():
# 先做成格式化数据
file = openpyxl.load_workbook(file_name) # 读文件
sheet_name = file.sheetnames
ws = file[sheet_name[0]]
# 获取sheet的最大行数和列数
rows = ws.max_row
cols = ws.max_column
data_list = [[0 for col in range(cols - 1)] for row in range(rows + 1)]
for c in range(1, cols):
last = None
for r in range(title_line, rows + 1):
cell = ws.cell(r, c)
val = cell.value
if val is None:
# 检测是否是合并单元格
if isinstance(cell, MergedCell):
val = last
else:
val = 0.0
else:
last = val
data_list[r][c - 1] = val
return data_list
# 指定标题行
title_line = 2
# 指定要读取的excel文件
file_name = '原始1.xlsx'
# 指定要输出的excel文件
out_file_name = '输出.csv'
data_list = parse_old_excel()
# for line in data_list:
# print(line)
month = data_list[title_line][6:]
print(month)
mon_dict = {}
for mon in month:
left = str(mon)[:4]
if left not in mon_dict:
mon_dict[left] = [mon]
else:
mon_dict[left].append(mon)
print(mon_dict)
df = pd.DataFrame(data_list[title_line + 1:], columns=data_list[title_line])
df.to_excel('格式化结果.xlsx')
# 获取当前时间
now = int(datetime.now().strftime('%Y%m'))
# 逐月处理
# for mon in month:
# # 根据PID和Item分组,然后选取月
# two_level_index_series = df.set_index(['PID', 'Item'])[mon, 202108]
# # 列转行
# new_df = two_level_index_series.unstack()
# new_df = new_df.reset_index()
# print(new_df)
# break
file_n = file_name.split('/')[-1]
# nan的填充为0,防止运算出错
df = df.fillna(0)
# 根据型号和PID分组
df_group = df.groupby(['品类', '型号', 'PID'])
out_data = []
head = ['file_name', 'Creat time', 'year_month', 'product_id', 'requirement', 'production_plan',
'actual_production', 'cumulative_requirement', 'cumulative_production', 'cumulative_gqp']
# print(head)
for key in df_group.groups:
data = df_group.get_group(key)
# print(key)
# print(type(key))
key = list(key)
for mon_left in mon_dict:
# 统计结果
count = 0
cum_req = 0
cum_pro = 0
month = mon_dict[mon_left]
# 按时间分别取三行对应的数据
for mon in month:
# print(data)
month_val = data[mon].values
# print(mon, month_val)
try:
int(mon)
except Exception as e:
print(e)
continue
try:
mv0 = int(month_val[0])
except Exception:
mv0 = 0
try:
mv1 = int(month_val[1])
except Exception:
mv1 = 0
try:
mv2 = int(month_val[2])
except Exception:
mv2 = 0
if int(mon) <= now:
# 过去的算法
line = [file_n, '', mon, key[2], mv0, mv1, mv2]
count += mv1
cum_req += mv0
cum_pro += mv2
line.extend([cum_req, cum_pro, cum_req - cum_pro])
else:
# 未来的算法
line = [file_n, '', mon, key[2], mv0, mv1, mv2]
count += mv1
cum_req += mv0
cum_pro = count
line.extend([cum_req, cum_pro, cum_pro - cum_req])
print(line)
out_data.append(line)
# print(now)
to_df = pd.DataFrame(out_data)
to_df.to_csv(out_file_name, header=head, encoding='gbk', index=False)
print('over')
# df.to_excel('格式化结果.xlsx', index=None)
excel处理运算
最新推荐文章于 2024-11-18 12:05:16 发布