excel处理运算

import openpyxl
from openpyxl.cell.cell import MergedCell
import pandas as pd
from datetime import datetime
import sys

def parse_old_excel():
    # 先做成格式化数据
    file = openpyxl.load_workbook(file_name)  # 读文件

    sheet_name = file.sheetnames
    ws = file[sheet_name[0]]

    # 获取sheet的最大行数和列数
    rows = ws.max_row
    cols = ws.max_column

    data_list = [[0 for col in range(cols - 1)] for row in range(rows + 1)]
    for c in range(1, cols):
        last = None
        for r in range(title_line, rows + 1):
            cell = ws.cell(r, c)
            val = cell.value
            if val is None:
                # 检测是否是合并单元格
                if isinstance(cell, MergedCell):
                    val = last
                else:
                    val = 0.0
            else:
                last = val
            data_list[r][c - 1] = val
    return data_list


# 指定标题行
title_line = 2
# 指定要读取的excel文件
file_name = '原始1.xlsx'
# 指定要输出的excel文件
out_file_name = '输出.csv'
data_list = parse_old_excel()
# for line in data_list:
#     print(line)
month = data_list[title_line][6:]
print(month)

mon_dict = {}
for mon in month:
    left = str(mon)[:4]
    if left not in mon_dict:
        mon_dict[left] = [mon]
    else:
        mon_dict[left].append(mon)
print(mon_dict)
df = pd.DataFrame(data_list[title_line + 1:], columns=data_list[title_line])
df.to_excel('格式化结果.xlsx')
# 获取当前时间
now = int(datetime.now().strftime('%Y%m'))

# 逐月处理
# for mon in month:
#     # 根据PID和Item分组,然后选取月
#     two_level_index_series = df.set_index(['PID', 'Item'])[mon, 202108]
#     # 列转行
#     new_df = two_level_index_series.unstack()
#     new_df = new_df.reset_index()
#     print(new_df)
#     break

file_n = file_name.split('/')[-1]
# nan的填充为0,防止运算出错
df = df.fillna(0)
# 根据型号和PID分组
df_group = df.groupby(['品类', '型号', 'PID'])

out_data = []
head = ['file_name', 'Creat time', 'year_month', 'product_id', 'requirement', 'production_plan',
        'actual_production', 'cumulative_requirement', 'cumulative_production', 'cumulative_gqp']
# print(head)
for key in df_group.groups:
    data = df_group.get_group(key)
    # print(key)
    # print(type(key))
    key = list(key)

    for mon_left in mon_dict:
        # 统计结果
        count = 0
        cum_req = 0
        cum_pro = 0
        month = mon_dict[mon_left]
        # 按时间分别取三行对应的数据
        for mon in month:
            # print(data)
            month_val = data[mon].values
            # print(mon, month_val)
            try:
                int(mon)
            except Exception as e:
                print(e)
                continue
            try:
                mv0 = int(month_val[0])
            except Exception:
                mv0 = 0
            try:
                mv1 = int(month_val[1])
            except Exception:
                mv1 = 0
            try:
                mv2 = int(month_val[2])
            except Exception:
                mv2 = 0
            if int(mon) <= now:
                # 过去的算法
                line = [file_n, '', mon, key[2], mv0, mv1, mv2]
                count += mv1

                cum_req += mv0
                cum_pro += mv2
                line.extend([cum_req, cum_pro, cum_req - cum_pro])
            else:
                # 未来的算法
                line = [file_n, '', mon, key[2], mv0, mv1, mv2]
                count += mv1

                cum_req += mv0
                cum_pro = count
                line.extend([cum_req, cum_pro, cum_pro - cum_req])
            print(line)
            out_data.append(line)

# print(now)
to_df = pd.DataFrame(out_data)
to_df.to_csv(out_file_name, header=head, encoding='gbk', index=False)
print('over')
# df.to_excel('格式化结果.xlsx', index=None)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值