new333

import re
import shutil
from pathlib import Path

import pandas as pd
import sys
import os
def clean(paths):
    return_data = []
    with open(paths,"r") as f:
        text = f.read()

    lists = []
    strs = []
    count = 0
    for i in text.split("\n"):
        # print(i)
        if "DataName" in i :
            if strs:
                lists.append(strs)
            strs = []
            i = '%s,'%count + i.replace("  ",",").replace(" ",",")
            strs.append(i)
            # lists.append(i.split(","))
            # lists.append(str(count))

        if "DataValue" in i:
            i = '%s,'%count + i.replace(" ",",")
            strs.append(i)
            # lists.append(i.split(","))
            # lists.append(str(count))
        count +=1
    else:
        if strs:
            lists.append(strs)
    for i in lists:
        table = pd.DataFrame([ii.split(',') for ii in i]).applymap(lambda x:"" if x == None else x)
        # print(table)
        columns_name = []
        columns = table.columns
        for index, row in table.iterrows():
            if index == 0:
                # 提取Dataname行数据
                for c in columns.values.tolist():
                    columns_name.append(row[c])
            # print(columns_name)
            if index > 1:
                # 提取Datavalue行数据
                for c in range(2,len(columns_name)):
                    # 判断是否为空值
                    if row[c]:
                        return_data.append([columns_name[c],row[c],row[0]])

    # print(return_data)
    return return_data

def fileclean(inputdirs,outputfile,errordir):
    datas = []
    for folderName,subfolders,filenames in os.walk(inputdirs):
        # print(folderName,subfolders,filenames)
        for filename in filenames:
            try:
                if filename.endswith(".csv"):
                    parent = folderName.split("\\")[-1]
                    paths = folderName+'/'+filename
                    parent_s = parent.split("_")
                    Devices_time = re.findall(";(.*?)]",filename)[0].replace("_",'/',2).replace("_",":")
                    test_name = filename.split(" ")[0]
                    Wafer_id = re.findall("\[(.*?)-",filename)[0]
                    test_pipeline = re.findall("-(.*?)\(",filename)[0]
                    #ICES5V [A213911020-T1D1(17) ; 11_7_2021 10_31_26 PM]
                    for i,j,k in clean(paths=paths):
                        data = {
                            "PATH":parent,
                            "FILE_NAME":filename,
                            "DATANAME":i,
                            "DATAVALUE":j,
                            "PROJECT_TYPE":parent_s[0],
                            "PRODUCT":parent_s[1],
                            "PRODUCT_VERSION":parent_s[2],
                            "LOT":parent_s[3],
                            "TEST_ITEM":parent_s[4],
                            "TEST_NODE":parent_s[5],
                            "FACTORY":parent_s[6],
                            "DEVICES_TIME":Devices_time,
                            "TEST_NAME":test_name,
                            "WAFER_ID":Wafer_id,
                            "TEST_PIPELINE":test_pipeline,
                            "LINE_NO":k
                        }
                        datas.append(data)
                else:
                    if os.path.isdir(errordir):
                        paths = folderName + '/' + filename
                        shutil.move(paths, errordir)
            except Exception as e:
                if os.path.isdir(errordir):
                    paths = folderName + '/' + filename
                    shutil.move(paths, errordir)
                # with open(errorfile,"a+") as f:
                #     import datetime
                #     f.write(f"{datetime.datetime.now()} -- f{folderName}/f{filename} -- error")
        if datas:
            table = pd.DataFrame(datas)
            # 日期处理
            table['DEVICES_TIME'] = pd.to_datetime(table['DEVICES_TIME'])
            table.to_csv(outputfile,index=None)
if __name__ == '__main__':
    argv = sys.argv
    print(argv)
    fileclean(argv[1],argv[2],argv[3])
    # python clean_data.py 文件夹路径 输出文件路径.csv 错误保存.txt
    # fileclean("data","data2.csv","error")

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

qq_32888845

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值