数据处理(自用)

import json
import shutil
import os



# 预测数据格式化
def load_test(input_dir,label = True, out_json='out_json.json'):
    """预测数据格式化"""
    """
        输出格式示例
        data = {
            "REF_WAFER_ID" : "4aa9edf8c2aecc60ec438bd11ac3d056475d9432",
            "IMAGE_INFO" : [{"REF_DEFECT_ID":"2_1_1.jpg_0",
                "IMAGE_PATH":[
                    "2_1_1.jpg",
                    "2_1_1.jpg"]},
                {"REF_DEFECT_ID":"2_1_1.jpg_0",
                "IMAGE_PATH":[
                    "2_1_1.jpg",
                    "2_1_1.jpg"]}]
        }
    """
    fileid_list = []
    IMAGE_INFO = []
    # input_dir = "/home/zcr/demo/Multiscale/mul/data_adc" #r'E:\t061'
    if label:
        label_dirs = os.listdir(input_dir)
        for label in label_dirs:
            label_dir = os.path.join(input_dir, label)
            if os.path.isdir(label_dir) and not label.startswith('.'):
                img_filenames = os.listdir(label_dir)
                img_filenames.sort()
                defect_dict = {}
                for img_filename in img_filenames:  # type:str
                    if '_' not in img_filename:
                        defect_name = img_filename
                    else:
                        defect_name = "_".join(img_filename.split('_')[0:-1])
                    ll = defect_dict.get(defect_name, list())
                    ll.append(img_filename)
                    if len(ll) == 1:
                        defect_dict[defect_name] = ll
                fileid_list.append(defect_dict)
        
        for label in fileid_list:
            for key, value in label.items():
                dict_1= {"REF_DEFECT_ID":key,"IMAGE_PATH":value}
                IMAGE_INFO.append(dict_1)
        
    else:
        img_filenames = os.listdir(input_dir)
        img_filenames.sort()
        defect_dict = {}
        for img_filename in img_filenames:  # type:str
            if '_' not in img_filename:
                defect_name = img_filename
            else:
                defect_name = "_".join(img_filename.split('_')[0:-1])
            ll = defect_dict.get(defect_name, list())
            ll.append(img_filename)
            if len(ll) == 1:
                defect_dict[defect_name] = ll
    
        for key, value in defect_dict.items():
            dict_1= {"REF_DEFECT_ID":key,"IMAGE_PATH":value}
            IMAGE_INFO.append(dict_1)
    data = {
    "REF_WAFER_ID" : "4aa9edf8c2aecc60ec438bd11ac3d056475d9432",
    "IMAGE_INFO" : IMAGE_INFO
        }
    with open(out_json,"w",encoding='utf-8') as f:
        json.dump(data,f,ensure_ascii=False,indent=4)

#从原数据文件夹选择第几张,然后放入新的文件夹呢供后面使用
def select_img(input_dir,output_dir,img_num = [0,1]):
    """从原数据文件夹选择第几张,然后放入新的文件夹呢供后面使用"""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    label_dirs = os.listdir(input_dir)
    for label in label_dirs:
        label_dir = os.path.join(input_dir, label)
        out_label_dir = os.path.join(output_dir, label)
        if not os.path.exists(out_label_dir):
            os.makedirs(out_label_dir)
        if os.path.isdir(label_dir) and not label.startswith('.'):
            img_filenames = os.listdir(label_dir)
            # img_filenames.sort()
            defect_dict = {}
            for img_filename in img_filenames:  # type:str
                if '_' not in img_filename:
                    defect_name = img_filename
                else:
                    defect_name = "_".join(img_filename.split('_')[0:-1])
                ll = defect_dict.get(defect_name, list())
                ll.append(img_filename)
                if len(ll) == 1:
                    defect_dict[defect_name] = ll
            
            for k, filename_list in defect_dict.items():
                filename_list.sort()
                for i in img_num:
                    img_path = os.path.join(label_dir, filename_list[i])
                    out_img_path = os.path.join(out_label_dir, filename_list[i])
                    shutil.copy2(img_path,out_img_path)

# 根据id获取指定图片
def img_path(REF_DEFECT_ID,id=[1,2]):
    """根据id获取所有图片"""
    img_list = [f"{REF_DEFECT_ID}_{i}.jpg" for i in id]
    return img_list

# 将结果复制到对应的类别中
def response_parser(response_txt,input_dir, output_dir = "out_img"):
    """
    将结果复制到对应的类别中
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    with open(response_txt,"r") as f:
        res = f.read()
        data = res[10:-2].replace("\\",'')
        json_data = json.loads(data)
    for REF_DEFECT_ID in json_data["PREDICT_INFO"]:
        print(REF_DEFECT_ID["REF_DEFECT_ID"])
        img_list = img_path(REF_DEFECT_ID["REF_DEFECT_ID"])
        # print(REF_DEFECT_ID["CNN"])
        code = REF_DEFECT_ID["REJECT"] # 分类结果
        code_path = os.path.join(output_dir,code)
        if not os.path.exists(code_path):
            os.makedirs(code_path)
        for img in img_list:
            input_img_path = os.path.join(input_dir,img)
            out_img_path = os.path.join(code_path,img)
            shutil.copy2(input_img_path,out_img_path)

if __name__ == "__main__":

    # 将需要预测的数据格式化为json文件
    # #已经分好类的情况
    # input_dir = "/home/zcr/demo/Multiscale/mul/adc/data_adc"
    # load_test(input_dir,out_json='out_json.json')
    # 未分类的情况
    input_dir = "/home/zcr/demo/Multiscale/mul/adc/labs/test_img"
    load_test(input_dir,label=False, out_json='out_json.json')


    # # 将预测结果分类到指定文件夹内
    # response_txt = "response.txt"
    # input_dir = "test_img"
    # response_parser(response_txt,input_dir, output_dir = "out_img")

    # # 选择指定的图片输出到新的文件夹内
    # input_dir = "/home/zcr/demo/Multiscale/mul/adc/data_adc"
    # output_dir = "out_img"
    # select_img(input_dir,output_dir,img_num = [1])

import json
import shutil
import os

# 预测数据格式化
def load_test(input_dir,label = True, out_json='out_json.json'):
    """预测数据格式化"""
    """
        输出格式示例
        data = {
            "REF_WAFER_ID" : "4aa9edf8c2aecc60ec438bd11ac3d056475d9432",
            "IMAGE_INFO" : [{"REF_DEFECT_ID":"2_1_1.jpg_0",
                "IMAGE_PATH":[
                    "2_1_1.jpg",
                    "2_1_1.jpg"]},
                {"REF_DEFECT_ID":"2_1_1.jpg_0",
                "IMAGE_PATH":[
                    "2_1_1.jpg",
                    "2_1_1.jpg"]}]
        }
    """
    fileid_list = []
    IMAGE_INFO = []
    # input_dir = "/home/zcr/demo/Multiscale/mul/data_adc" #r'E:\t061'
    if label:
        label_dirs = os.listdir(input_dir)
        for label in label_dirs:
            label_dir = os.path.join(input_dir, label)
            if os.path.isdir(label_dir) and not label.startswith('.'):
                img_filenames = os.listdir(label_dir)
                img_filenames.sort()
                defect_dict = {}
                for img_filename in img_filenames:  # type:str
                    if '_' not in img_filename:
                        defect_name = img_filename
                    else:
                        defect_name = "_".join(img_filename.split('_')[0:-1])
                    ll = defect_dict.get(defect_name, list())
                    ll.append(img_filename)
                    if len(ll) == 1:
                        defect_dict[defect_name] = ll
                fileid_list.append(defect_dict)
        
        for label in fileid_list:
            for key, value in label.items():
                dict_1= {"REF_DEFECT_ID":key,"IMAGE_PATH":value}
                IMAGE_INFO.append(dict_1)
        
    else:
        img_filenames = os.listdir(input_dir)
        img_filenames.sort()
        defect_dict = {}
        for img_filename in img_filenames:  # type:str
            if '_' not in img_filename:
                defect_name = img_filename
            else:
                defect_name = "_".join(img_filename.split('_')[0:-1])
            ll = defect_dict.get(defect_name, list())
            ll.append(img_filename)
            if len(ll) == 1:
                defect_dict[defect_name] = ll
    
        for key, value in defect_dict.items():
            dict_1= {"REF_DEFECT_ID":key,"IMAGE_PATH":value}
            IMAGE_INFO.append(dict_1)
    data = {
    "REF_WAFER_ID" : "4aa9edf8c2aecc60ec438bd11ac3d056475d9432",
    "IMAGE_INFO" : IMAGE_INFO
        }
    with open(out_json,"w",encoding='utf-8') as f:
        json.dump(data,f,ensure_ascii=False,indent=4)

#从原数据文件夹选择第几张,然后放入新的文件夹呢供后面使用
def select_img(input_dir,output_dir,img_num = [0,1]):
    """从原数据文件夹选择第几张,然后放入新的文件夹呢供后面使用"""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    label_dirs = os.listdir(input_dir)
    for label in label_dirs:
        label_dir = os.path.join(input_dir, label)
        out_label_dir = os.path.join(output_dir, label)
        if not os.path.exists(out_label_dir):
            os.makedirs(out_label_dir)
        if os.path.isdir(label_dir) and not label.startswith('.'):
            img_filenames = os.listdir(label_dir)
            # img_filenames.sort()
            defect_dict = {}
            for img_filename in img_filenames:  # type:str
                if '_' not in img_filename:
                    defect_name = img_filename
                else:
                    defect_name = "_".join(img_filename.split('_')[0:-1])
                ll = defect_dict.get(defect_name, list())
                ll.append(img_filename)
                if len(ll) == 1:
                    defect_dict[defect_name] = ll
            
            for k, filename_list in defect_dict.items():
                filename_list.sort()
                for i in img_num:
                    img_path = os.path.join(label_dir, filename_list[i])
                    out_img_path = os.path.join(out_label_dir, filename_list[i])
                    shutil.copy2(img_path,out_img_path)

# 根据id获取指定图片
def img_path(REF_DEFECT_ID,id=[1,2]):
    """根据id获取所有图片"""
    img_list = [f"{REF_DEFECT_ID}_{i}.jpg" for i in id]
    return img_list

# 将结果复制到对应的类别中
def response_parser(response_txt,input_dir, output_dir = "out_img"):
    """
    将结果复制到对应的类别中
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    with open(response_txt,"r") as f:
        res = f.read()
        data = res[10:-2].replace("\\",'')
        json_data = json.loads(data)
    for REF_DEFECT_ID in json_data["PREDICT_INFO"]:
        print(REF_DEFECT_ID["REF_DEFECT_ID"])
        img_list = img_path(REF_DEFECT_ID["REF_DEFECT_ID"])
        # print(REF_DEFECT_ID["CNN"])
        code = REF_DEFECT_ID["REJECT"] # 分类结果
        code_path = os.path.join(output_dir,code)
        if not os.path.exists(code_path):
            os.makedirs(code_path)
        for img in img_list:
            input_img_path = os.path.join(input_dir,img)
            out_img_path = os.path.join(code_path,img)
            shutil.copy2(input_img_path,out_img_path)

if __name__ == "__main__":

    # 将需要预测的数据格式化为json文件
    # #已经分好类的情况
    # input_dir = "/home/zcr/demo/Multiscale/mul/adc/data_adc"
    # load_test(input_dir,out_json='out_json.json')
    # 未分类的情况
    input_dir = "/home/zcr/demo/Multiscale/mul/adc/labs/test_img"
    load_test(input_dir,label=False, out_json='out_json.json')


    # # 将预测结果分类到指定文件夹内
    # response_txt = "response.txt"
    # input_dir = "test_img"
    # response_parser(response_txt,input_dir, output_dir = "out_img")

    # # 选择指定的图片输出到新的文件夹内
    # input_dir = "/home/zcr/demo/Multiscale/mul/adc/data_adc"
    # output_dir = "out_img"
    # select_img(input_dir,output_dir,img_num = [1])
 

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值