增加读取时，x和y重复的情况，去除重复项

小小权

已于 2022-04-11 16:25:15 修改

阅读量137

点赞数

文章标签： python excel

于 2022-04-06 11:48:52 首次发布

本文链接：https://blog.csdn.net/u012366798/article/details/123986472

版权

import pandas as pd

# 要读取处理的excel文件
read_file = 'R158745W03-Q2341233(1).xlsx'

# 定义要输出的文件名
color_out_file = 'Color_V2.xlsx'
devices_out_file = 'Devices_V2.xlsx'
fixed_out_file = 'CP_Devices_Fixed_Item_V2.xlsx'
other_out_file = 'CP_Devices_Other_Item_V2.xlsx'
parameter_out_file = 'Parameter_V2.xlsx'

"""
初始值
"""
program = ''
lot_id = ''
temperature = ''
date = ''
computer = ''
"""
原始txt处理成标准行列格式
"""
data_line = []

df = pd.read_excel(read_file)
read_data = []
read_column = []
for idx, rows in df.iterrows():
    if idx == 7:
        read_column = rows.values
    elif 7 < idx < 12:
        read_data.append(rows.values)
    elif idx >= 12:
        for i, line in enumerate(read_data):
            if rows[3] == line[3] and rows[4] == line[4]:
                print(rows[3], line[3], rows[4], line[4], line)
                del read_data[i]
                break
        read_data.append(rows.values)
    elif idx > 5:
        continue
    elif 'Program' in rows[2]:
        program = rows[3]
    elif 'Temperature' in rows[2]:
        temperature = rows[3]
    elif 'LotID:' in rows[1]:
        lot_id = rows[2]
    elif 'Date:' in rows[1]:
        date = rows[2]
    elif 'Computer:' in rows[1]:
        computer = rows[2]

    # print(rows[3])
print(read_column)
read_data_tmp = []
for rows in read_data:
    print(rows)
# 转为dataframe处理
df = pd.DataFrame(read_data, columns=read_column)
# print(df)
limit_l = df.iloc[0, 5:].values
limit_h = df.iloc[1, 5:].values
units = df.iloc[2, 5:].values
# print(limit_l)
test_nums = df.columns[5:]

wafer_id = read_file[:9].replace('w', '#')

"""
处理parameter
"""
to_parameter_datas = []
PARAMETER_ID = 1
for x in range(len(limit_l)):
    u_index = 0

    li_l = limit_l[x]
    li_h = limit_h[x]
    li_unit = units[x]

    parameter_line = ['CP', lot_id, 'CP1', test_nums[x], li_l, li_h, li_unit,
                      program, PARAMETER_ID, '', 1]
    PARAMETER_ID += 1
    to_parameter_datas.append(parameter_line)

"""
处理color
"""
to_color_datas = []
for x in range(len(test_nums)):
    tn = test_nums[x]
    tns = tn.split('(')
    color_line = [tns[1].rstrip(')'), tns[0], 'CP', lot_id, 'CP', program, '', '']
    to_color_datas.append(color_line)

"""
处理devices
"""
to_devices_datas = []
for idx, row in df.iterrows():
    # 跳过前几行数据
    if idx < 4:
        continue
    Locate_X = row[3]
    Locate_Y = row[4]
    T_Time = row[2]
    Soft_Bin = row[1]
    devices_line = [wafer_id, Locate_X, Locate_Y, 'P1', T_Time, Soft_Bin, Soft_Bin,
                    lot_id, lot_id, '', program, date]
    to_devices_datas.append(devices_line)

"""
处理fixed
"""
to_fixed_datas = []
fixed_line = [read_file, date, wafer_id, lot_id, lot_id, lot_id,
              'CP', '', '', computer, program, '', temperature,
              'P1', 'NULL', 'NULL', 'NULL', 'F3', '', '']
to_fixed_datas.append(fixed_line)

"""
处理other
"""
to_other_datas = []
u_count = 0
bin_count = 0
for idx, row in df.iterrows():
    # 跳过limitU和limitL
    if idx < 4:
        continue
    Locate_X = row[3]
    Locate_Y = row[4]
    for test_name in test_nums:
        value = row[test_name]
        other_line = [wafer_id, Locate_X, Locate_Y, 'P1', test_name, value, program, lot_id, date]
        to_other_datas.append(other_line)

"""
输出到文件中
"""
columns = ['SOFT_BIN', 'BIN_NAME', 'PROJECT_TYPE', 'PART_NO', 'PROCESS', 'TEST_PROGRAM', 'COLOR', 'CREATE_DATE']
out_df = pd.DataFrame(to_color_datas, columns=columns)
out_df.to_excel(color_out_file, index=None)

columns = ['Wafer_id', 'Locate_X', 'Locate_Y', 'Retest', 'T_Time', 'Soft_Bin', 'Hard_Bin', 'lot_id', 'part_id',
           'site_num', 'Program', 'Ending_Time']
out_df = pd.DataFrame(to_devices_datas, columns=columns)
out_df.to_excel(devices_out_file, index=None)

columns = ['File_name', 'Ending_time', 'Wafer_id', 'lot_id', 'C_lot', 'Part_no', 'Records', 'Insert_num', 'Update_num',
           'Machine_Name', 'Program', 'Step', 'Tempreature', 'FLOW', 'PARA_RECORDS', 'PARA_INSERT', 'PARA_UPDATE',
           'VENDOR', 'RAW_WAFERID', 'PASS_DIE']
out_df = pd.DataFrame(to_fixed_datas, columns=columns)
out_df.to_excel(fixed_out_file, index=None)

columns = ['wafer_id', 'Locate_X', 'Locate_Y', 'Retest', 'Test_Name', 'Value', 'Program', 'Lot_id', 'Ending_time']
out_df = pd.DataFrame(to_other_datas, columns=columns)
out_df.to_excel(other_out_file, index=None)

columns = ['PROJECT_TYPE', 'PART_NO', 'PROCESS', 'PARAMETER', 'LIMIT_L', 'LIMIT_H', 'UNIT', 'TEST_PROGRAM',
           'PARAMETER_ID', 'DISPLAY_UNIT', 'IS_CHART']
out_df = pd.DataFrame(to_parameter_datas, columns=columns)
out_df.to_excel(parameter_out_file, index=None)