excel批量处理

最新推荐文章于 2024-11-15 10:17:44 发布
小小权
最新推荐文章于 2024-11-15 10:17:44 发布
阅读量985
点赞数
文章标签： python excel
本文链接：https://blog.csdn.net/u012366798/article/details/121932446
版权
# coding=utf-8
import pandas as pd
import os
import csv
import shutil


def sel_line(res_data):
    res_header = ['file', 'ending_time', 'part_id', 'lot_id', 'part_no', 'program', 'file_group', 'tempreature',
                  'retest', 'nt', 'value', 't_time', 'soft_bin', 'lots']
    df = pd.DataFrame(res_data, columns=res_header)
    df_group = df.groupby('part_id')
    for key in df_group.groups:
        pro_data = df_group.get_group(key)
        for idx, row in pro_data.iterrows():
            out_line = row
            line_3 = [out_line['file'], out_line['ending_time'], out_line['part_id'], out_line['lot_id'],
                      out_line['lot_id'],
                      out_line['part_no'], 0, 0, '', '', out_line['program'], out_line['file_group'],
                      out_line['tempreature'],
                      out_line['lots'], 0, 0, '', 'TF', 0, 0]

            line_1 = [out_line['part_id'], 0, 0, out_line['retest'], out_line['nt'], out_line['value'],
                      out_line['program'],
                      out_line['lot_id'], out_line['ending_time']]
            line_2 = [out_line['part_id'], 0, 0, out_line['retest'], out_line['t_time'], out_line['soft_bin'],
                      out_line['soft_bin'], out_line['lot_id'], '', 1, out_line['program']]

            prefix_dict_1[out_line['file_group']].append(line_1)
            if line_2 not in prefix_dict_2[out_line['file_group']]:
                prefix_dict_2[out_line['file_group']].append(line_2)
            all_ft.append(line_3)


def parse_excel(df, xlsx_file, file, program, lot_id, ending_time):
    print('当前文件：', file)
    file_group = file.replace('__', '_').split('_')[1]
    if file_group not in prefix_dict_1:
        prefix_dict_1[file_group] = []
        prefix_dict_2[file_group] = []
    lots = lot_id.split('_')
    if len(lots) < 2:
        print('\t', '错误跳过')
        shutil.move(xlsx_file, os.path.join(error_dir, file))
        return
    if 'P1' == lots[-1]:
        retest = 0
    elif 'R1' == lots[-1] or '1' == lots[-1]:
        retest = 1
    elif 'R2' == lots[-1]:
        retest = 2
    elif 'R3' == lots[-1]:
        retest = 3
    else:
        print('\t', '错误跳过')
        shutil.move(xlsx_file, os.path.join(error_dir, file))
        return
    try:
        part_no = program.split('\\')[-2].split('_')[0]
    except Exception:
        print('\t', '错误跳过')
        shutil.move(xlsx_file, os.path.join(error_dir, file))
        return
    if 'FT1' == file_group:
        tempreature = 150
    elif 'FT2' == file_group or 'FT3' == file_group:
        tempreature = 25
    else:
        tempreature = 0

    res_data = []

    # 当part id相同的时候,取soft bin为1的,当soft bin都为1或者都不为1时,取t time最大的
    df_group = df.groupby('PART_ID')
    for key in df_group.groups:
        pro_data = df_group.get_group(key)
        now_row = None
        for idx, row in pro_data.iterrows():
            if key is None or len(key.strip()) == 0:
                continue
            if now_row is None:
                now_row = row
            else:
                sb = row['soft_bin']
                rt = row['t_time']
                ols = now_row['soft_bin']
                olt = now_row['t_time']
                if sb == 1:
                    if ols == sb:
                        if rt > olt:
                            now_row = row
                    else:
                        now_row = row
        if now_row is None:
            continue
        for x in range(len(title)):
            # print(title)
            nt = title[x]
            line = [file, ending_time, key, lots[0], part_no, program, file_group, tempreature, retest, nt,
                    now_row[nt],
                    now_row['T_TIME'], now_row['SOFT_BIN'], lots[-1]]
            res_data.append(line)
    sel_line(res_data)

    for idx, rows in df.iterrows():
        part_id = rows['PART_ID']
        passfg = rows['PASSFG']
        # print(passfg)

        test_num = rows['TEST_NUM']
        if idx < 1:
            global parameter_id
            if parameter_id >= 50:
                is_chart = 0
            else:
                is_chart = 1
            for x in range(len(title)):
                # print(title)
                nt = title[x]
                module_line = ['Module', part_no, file_group, nt, limit_l[x], limit_u[x],
                               unit[x], test_num, program, parameter_id, unit[x], is_chart]
                parameter_id += 1
                # print(module_line)
                module_excel.append(module_line)


# 初始化,start_line 为数据开始行
start_line = 'SITE_NUM'
# 列头
header = ['File_name', 'Ending_time', 'Wafer_id', 'lot_id', 'C_lot', 'Part_no', 'Records', 'Insert_num',
          'Update_num', 'Machine_Name', 'Program', 'Step', 'Tempreature', 'flow', 'PARA_RECORDS', 'PARA_INSERT',
          'PARA_UPDATE',
          'VENDOR', 'RAW_WAFERID', 'wafer_id']
prefix_header_1 = ['Wafer_id', 'Locate_X', 'Locate_Y', 'Retest', 'Test_Name', 'Value', 'Program', 'Lot_id',
                   'Ending_time']
prefix_header_2 = ['Wafer_id', 'Locate_X', 'Locate_Y', 'Retest', 'T_Time', 'Soft_Bin', 'Hard_Bin', 'Lot_id', 'part_id',
                   'site_num', 'Program']

new_header = ['列1', '列2', '列3', '列4', '列5', '列6', '列7', '列8']
module_header = ['PROJECT_TYPE', 'PART_NO', 'PROCESS', 'PARAMETER',
                 'LIMIT_L', 'LIMIT_U', 'UNIT', 'TEST_NUM', 'TEST_PROGRAM', 'PARAMETER_ID', 'DISPLAY_UNIT', 'IS_CHART']
new_excel = []
module_excel = []

# 指定输出的文件夹
out_file_dir = 'out'
# 指定ft输出总文件名
out_file_name = 'Module_FT_Devices_Fixed_Item.csv'
# 错误的路径
error_dir = 'error'
# 指定要读取excel的目录
file_dir = r'../12_24/test'
files = os.listdir(file_dir)
prefix_dict_1 = {}
prefix_dict_2 = {}
all_ft = []
for file in files:
    if '.csv' not in file or '~$' in file or 'result' in file:
        continue
    xlsx_file = os.path.join(file_dir, file)
    print('当前处理', xlsx_file)
    read_data = []
    head = None
    lot_id = None
    program = ''
    ending_time = ''
    file_group = file.replace('__', '_').split('_')[1]

    parameter_id = 1
    with open(xlsx_file, encoding='gbk') as f:
        row = csv.reader(f, delimiter=',')
        for idx, r in enumerate(row):
            if len(r) <= 0:
                continue
            if head is not None:
                read_data.append(r)
            first = r[0]
            if 'Program' in first:
                program = first.lstrip('Program:')
            elif 'Ending Time' in first:
                ending_time = first.lstrip('Ending Time:')
            elif 'Lot Id' in first:
                lot_id = first.split(':')[-1].strip()
            elif start_line in first:
                head = r
            elif 'SBin' in first:
                l1 = first.split(']')[0].replace('SBin[', '')
                if 'Fail_Default' in first:
                    l2 = 'Fail_Default'
                elif 'Pass_Default' in first:
                    l2 = 'Pass_Default'
                else:
                    l2 = first.split()[0].split(']')[-1].replace('__AllFail', '')
                try:
                    part_no = program.split('\\')[-2].split('_')[0]
                except Exception:
                    print('\t', '错误跳过')
                    shutil.move(xlsx_file, os.path.join(error_dir, file))
                    exit(0)
                new_excel.append([l1, l2, file[0], part_no, file_group, program, '', ''])
    if lot_id is None:
        print('\t', '错误跳过')
        shutil.move(xlsx_file, os.path.join(error_dir, file))
        continue
    print('\t', '当前开始行：', head)
    df = pd.DataFrame(read_data, columns=head, dtype=object)
    title = list(df.columns.values[6:])
    tmp = []
    for x in new_excel:
        if x[1] in title:
            tmp.append(x)
    new_excel = tmp
    # print(title)
    unit = df.iloc[0, 6:].values
    # print(unit)
    limit_l = df.iloc[1, 6:].values
    # print(limit_l)
    limit_u = df.iloc[2, 6:].values
    # print(limit_u)
    try:
        parse_excel(df, xlsx_file, file, program, lot_id, ending_time)
    except Exception:
        print('\t', '异常跳过')
        shutil.move(xlsx_file, os.path.join(error_dir, file))

to_df = pd.DataFrame(all_ft)
to_df.to_csv(os.path.join(out_file_dir, out_file_name),
             header=header, encoding='gbk', index=False)

# to_df = pd.DataFrame(new_excel)
# to_df.to_csv(os.path.join(out_file_dir, 'GLOBAL_MAP_COLOR_V2.csv'),
#              header=new_header, encoding='gbk', index=False)

to_df = pd.DataFrame(module_excel)
to_df.to_csv(os.path.join(out_file_dir, 'Module_run.csv'),
             header=module_header, encoding='gbk', index=False)

for key in prefix_dict_1:
    to_df = pd.DataFrame(prefix_dict_1[key])
    to_df.to_csv(os.path.join(out_file_dir, 'Module_' + key + '_Devices_Other_Items.csv'),
                 header=prefix_header_1, encoding='gbk', index=False)

for key in prefix_dict_2:
    to_df = pd.DataFrame(prefix_dict_2[key])
    to_df.to_csv(os.path.join(out_file_dir, 'Module_' + key + '_Devices.csv'),
                 header=prefix_header_2, encoding='gbk', index=False)