筛选数据的工具

最新推荐文章于 2020-03-26 16:17:12 发布

冉冉升起的码农

最新推荐文章于 2020-03-26 16:17:12 发布

阅读量1.2k

点赞数 2

分类专栏： c++ 和 python

本文链接：https://blog.csdn.net/jishuqianjin/article/details/84951976

版权

c++ 和 python 专栏收录该内容

7 篇文章 0 订阅

订阅专栏

一、利用 shutil.copy 进行处理

# -*- coding: utf-8 -*-
import os
import glob
import cv2
import json
import shutil

def move_src_accord_filter(filter_path, src_path, dataset_path):
    files = glob.glob(filter_path + '/*.png')
    count = 0
    test_path = "/home/user/lcw/RFBnet_Data/obqFind_wanli/test_dataset"
    for image_path in files:
        image_name = image_path[image_path.rfind("/")+1: ]
        src_image_path = os.path.join(src_path, image_name)
        if not os.path.exists(src_image_path):
            continue

        json_name = image_name[:image_name.rfind(".")] + ".json"
        json_image_path = os.path.join(src_path, json_name)
        print("json_image_path is: ", json_image_path)
        if count < 16:
            shutil.copy(src_image_path, dataset_path)
            shutil.copy(json_image_path, dataset_path)
        else:
            return
            # shutil.copy(src_image_path, test_path)
            # shutil.copy(json_image_path, test_path)
        count = count + 1
        #shutil.move(json_image_path, dataset_path)



if __name__ == "__main__":
    # filter_path = "/media/Data/wangjunjie/active_learning/test_lcw"
    # src_path = "/home/user/lcw/RFBnet_Data/obqFind_wanli/verify_data"
    # dataset_path ="/media/Data/wangjunjie/active_learning/test_lcw_src"
    filter_path = "/media/newData/paperData/opqData/data_28_filter/verify_result"
    src_path = "/media/newData/paperData/opqData/data_28_filter/verify_src"
    dataset_path = "/home/user/lcw/RFBnet_Data/obqFind_wanli/dataset_lcw"
    if not os.path.exists(dataset_path):
        os.mkdir(dataset_path)
    move_src_accord_filter(filter_path, src_path, dataset_path)

二、

import os
import shutil
import sys
def get_MD5(file_path):
    files_md5 = os.popen('md5 %s' %file_path).read().strip()
    file_md5 = files_md5.replace('MD5 (%s) = ' %file_path, ' ')
    return file_md5

def main(path, out):
    for files in os.listdir(path):
        name = os.path.join(path, files)
        back_name = os.path.join(out, files)
        if os.path.isfile(name):
            if os.path.isfile(back_name):
                if get_MD5(name) != get_MD5(back_name):
                    shutil.copy(name, back_name)
            else:
                shutil.copy(name, back_name)
        else:
            if not os.path.isdir(back_name):
                os.makedirs(back_name)
            main(name, back_name)
if __name__ == '__main__':
    A = sys.argv[1]
    B = sys.argv[2]
    print("A is: %s, B is: %s" %(A, B))
    main(A, B)

import os
import glob
import cv2
import json
def get_id(img_dir):
    files = glob.glob(img_dir + '/*_1.png')
    #print("files is: ", files)
    id_list = []
    length = len(files)
    for i in range(length):
        file = files[i]
        _, file_name = os.path.split(file)
        id = file_name[: file_name.find("_")]
        #print("id is:", id)
        id_list.append(id)
    return id_list


def save_detect_img(id_list, save_path="/home/user/lcw/project_new/data1/data_failure", base_path = "/home/user/lcw/project_new/data1/data_automodel_error"):
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    for i in range(len(id_list)):
        id = id_list[i]
        img_path = os.path.join(os.path.join(base_path, id), "src")
        img_files = glob.glob(img_path + '/*.png')
        img_files.sort()
        txt_files = glob.glob(img_path + '/*.txt')
        txt_files.sort()
        for j in range(len(txt_files)):
            img_file = img_files[j]
            txt_file = txt_files[j]
            print("img_file is: %s, txt_file is: %s" %(img_file, txt_file))
            _, file_name = os.path.split(img_file)
            name, _ = os.path.splitext(file_name)
            json_name = name + ".json"
            print("json_name is: ", json_name)
            img = cv2.imread(img_file, 0)
            save_path_img = os.path.join(save_path, file_name)
            save_path_json = os.path.join(save_path, json_name)
            cv2.imwrite(save_path_img, img)
            with open(txt_file, "r") as json_f:
                detect_result = json.load(json_f)
            with open(save_path_json, "w") as json_f_write:
                json_f_write.write(json.dumps(detect_result))


def fiter_error_data(id_list, save_path="/home/user/lcw/project_new/data1/data_failure_auto_model", base_path ="/home/user/lcw/project_new/data1/data"):
    for i in range(len(id_list)):
        id = id_list[i]
        save_path_id = os.path.join(save_path, id)
        base_path_id = os.path.join(base_path, id)
        print("save_path_id is: %s, base_path_id is: %s" %(save_path_id, base_path_id))
        cmd = "python cp_custom.py" + " " + base_path_id + " " + save_path_id
        os.system(cmd) 


if __name__ == "__main__":
    id_list = get_id("./failure")
    #save_detect_img(id_list)
    fiter_error_data(id_list)

import os
import glob
import cv2
import json
def get_id(img_dir):
    files = glob.glob(img_dir + '/*_1.png')
    #print("files is: ", files)
    id_list = []
    length = len(files)
    for i in range(length):
        file = files[i]
        _, file_name = os.path.split(file)
        id = file_name[: file_name.find("_")]
        #print("id is:", id)
        id_list.append(id)
    return id_list


def save_detect_img(id_list, save_path="/home/user/lcw/project_new/data1/data_failure", base_path = "/home/user/lcw/project_new/data1/data"):
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    for i in range(len(id_list)):
        id = id_list[i]
        img_path = os.path.join(os.path.join(base_path, id), "src")
        img_files = glob.glob(img_path + '/*.png')
        img_files.sort()
        txt_files = glob.glob(img_path + '/*.txt')
        txt_files.sort()
        for j in range(len(txt_files)):
            img_file = img_files[j]
            txt_file = txt_files[j]
            print("img_file is: %s, txt_file is: %s" %(img_file, txt_file))
            _, file_name = os.path.split(img_file)
            name, _ = os.path.splitext(file_name)
            json_name = name + ".json"
            print("json_name is: ", json_name)
            img = cv2.imread(img_file, 0)
            save_path_img = os.path.join(save_path, file_name)
            save_path_json = os.path.join(save_path, json_name)
            cv2.imwrite(save_path_img, img)
            with open(txt_file, "r") as json_f:
                detect_result = json.load(json_f)
            with open(save_path_json, "w") as json_f_write:
                json_f_write.write(json.dumps(detect_result))


  


if __name__ == "__main__":
    id_list = get_id("./failure")
    save_detect_img(id_list)

冉冉升起的码农

关注

2
点赞
踩
1

收藏

觉得还不错? 一键收藏
1
评论
筛选数据的工具

一、利用 shutil.copy 进行处理# -*- coding: utf-8 -*-import osimport globimport cv2import jsonimport shutildef move_src_accord_filter(filter_path, src_path, dataset_path): files = glob.glob(fi...
复制链接

扫一扫

专栏目录