【python】一些有用的文件操作函数:批量移动、抽取等

import os, random, shutil
import tqdm as tdqm

这些文件操作可以在处理训练集、验证集与测试集还有标签文件时用到。很实用,可收藏!

def splitDataset(fileDir, trainFile, valFile):
    """
    从fileDir随机分训练集与验证集
    """
    pathDir = os.listdir(fileDir)   
    filenumber=len(pathDir)
    print(filenumber)
    rate=0.9   
    picknumber=int(filenumber*rate)
    #         picknumber=25000
    print(picknumber)
    sample = random.sample(pathDir, picknumber)  

    f = open(trainFile, "w", encoding='utf-8')
    for name in sample:
        f.write(str(name.split(".")[0])+"\n")
    f.close()  
    print(trainFile, " Done!")
    
    f = open(valFile, "w", encoding='utf-8')
    for name in pathDir:
        if name not in sample:
            f.write(str(name.split(".")[0])+"\n")
    f.close()  
    print(valFile, " Done!")
    return
def rename(label_path):
    """
    批量重命名
    """
    file_list = [] 
    with open(label_path,"r") as f:
        all_lines = f.readlines()

    for line in all_lines:
        line=line.rstrip("\n")
        file_list.append(str(line))
        print(str(line))
    
    for file in tqdm(file_list):
        if not os.path.exists(file):
    #         print(file)
            continue
        os.rename(file,file.replace(" ","_"))
def remove_file(file_path,target_path):
    """
    根据文件夹file_path中文件名,从文件夹target_path找出对应文件并删除
    """
    if not os.path.exists(target_path):
        print("target folder does not exist!", target_path)
        os.makedirs(target_path)
    file_list = os.listdir(file_path)
    for file in tqdm(file_list):
        path = os.path.join(target_path, file)
        if not os.path.exists(path):
            print("removing file does not exist!", path)
            continue
        os.remove(path)
    print(target_path, "Removing finished!!") 
    return 0
def move_file_to_target(file_path, source_path, target_path):
    """
    根据文件夹file_path中文件名,从文件夹source_path找出对应文件保存至文件夹target_path
    """
    if not os.path.exists(target_path):
        print("target folder does not exist!", target_path)
        os.makedirs(target_path)
    file_list = os.listdir(file_path)
#     print(file_list)
    for file in tqdm(file_list):
        path = os.path.join(source_path, file)
#         print(path)
        if not os.path.exists(path):
            print("moving file does not exist!", path)
            continue
        shutil.move(path, os.path.join(target_path, file))    
    print(source_path, target_path, "Moving finished!!")
    return 0
def move_file_to_target_by_pathfile(file_path, target_path):
    """
    根据标签文件file_path中路径,找出对应文件保存至文件夹target_path
    """
    if not os.path.exists(target_path):
        print("target folder does not exist!", target_path)
        os.makedirs(target_path)
        
    with open(file_path,"r") as f:
        all_lines = f.readlines()

    img_list = []
    for line in tqdm(all_lines):
        issues = line.strip().split(",")              
        path = issues[0]
        if not os.path.exists(path):
            print("moving file does not exist!", path)
            continue
        shutil.copy(path, os.path.join(target_path, path.split("/")[-1])) 
    print(target_path, "Moving finished!!")
    return 0
def move_file_to_target_from_2source(file_path, source_path1, source_path2, target_path):
    """
    根据文件夹file_path中文件名,从某两个文件夹source_path1, source_path2找出对应文件保存至文件夹target_path
    """
    if not os.path.exists(target_path):
        print("target folder does not exist!", target_path)
        os.makedirs(target_path)
        
    img_list = os.listdir(file_path)

    for file in tqdm(img_list):
        path = file.split('/')[-1]
        file1 = os.path.join(source_path1, path)
        file2 = os.path.join(source_path2, path)
#         print(path)
        if not os.path.exists(file1) and not os.path.exists(file2):
            print("moving file does not exist!", file)
            continue
        if os.path.exists(file1):
            shutil.copy(file1, os.path.join(target_path, path)) 
        else:
            shutil.copy(file2, os.path.join(target_path, path)) 
    print(target_path, "Moving finished!!")
    return 0
def move_file_by_pathfile_2target(file_path, source_path, target_path1, target_path2):
    """
    根据标签文件file_path中路径,从某一文件夹source_path找出对应文件,并按特定比例保存至文件夹target_path1与target_path2
    """
    if not os.path.exists(target_path):
        print("target folder does not exist!", target_path)
        os.makedirs(target_path)
        
    with open(file_path,"r") as f:
        all_lines = f.readlines()

    img_list = []
    for line in all_lines:
        issues = line.strip()           
        img_list.append(issues)       

    for file in tqdm(img_list):
        path = file.split('/')[-1]
        file = os.path.join(source_path, path)
#         print(path)
        if not os.path.exists(file):
            print("moving file does not exist!", file)
            continue
        shutil.copy(file, os.path.join(target_path, path)) 
    print(target_path, "Moving finished!!")
    return 0
def move_file_by_label_2target(file_path, target_path1, target_path2):
    """
    根据标签文件file_path中路径,从某一文件夹source_path找出对应文件,并按标签保存至文件夹target_path1与target_path2
    """
    if not os.path.exists(target_path1):
        print("target folder does not exist!", target_path1)
        os.makedirs(target_path1)
    elif not os.path.exists(target_path2):
        print("target folder does not exist!", target_path2)
        os.makedirs(target_path2)
        
    with open(file_path,"r") as f:
        all_lines = f.readlines()

    target_path =[]
    for line in tqdm(all_lines):
        issues = line.strip().split(",")           
        label =  issues[1]      
        path = issues[0]
        if not os.path.exists(path):
            print("moving file does not exist!", path)
            continue

        if int(label) ==1: # 如果标签为1
            target_path = target_path1
        elif int(label) ==2: # 如果标签为2
            target_path = target_path2
        else:
            print("wrong label!", label)
        # print(target_path, path.split("/")[-1])
        target = os.path.join(target_path, path.split("/")[-1])
        if os.path.exists(target):
            print("target file is existed!", target)
            continue
        shutil.copy(path, target) 
    print(file_path, "Moving finished!!")
def move_file_to_target_by_pathfile_1source(file_path, source_path, target_path):
    """
    根据标签文件file_path中路径,从某一文件夹source_path找出对应文件保存至文件夹target_path
    """
    if not os.path.exists(target_path):
        print("target folder does not exist!", target_path)
        os.makedirs(target_path)
        
    with open(file_path,"r") as f:
        all_lines = f.readlines()

    img_list = []
    for line in all_lines:
        issues = line.strip()           
        img_list.append(issues)       

    for file in tqdm(img_list):
        path = file.split('/')[-1]
        file = os.path.join(source_path, path)
#         print(path)
        if not os.path.exists(file):
            print("moving file does not exist!", file)
            continue
        shutil.copy(file, os.path.join(target_path, path)) 
    print(target_path, "Moving finished!!")
    return 0
def moveFile(fileDir, tarDir):
    """
    从fileDir随机抽取固定比例的图片至目标文件夹tarDir
    """

    if not os.path.exists(tarDir):
        os.makedirs(tarDir)
    pathDir = os.listdir(fileDir)    
    filenumber=len(pathDir)
    print(filenumber)
    rate=0.1 
    picknumber=int(filenumber*rate)
    #         picknumber=25000
    print(picknumber)
    sample = random.sample(pathDir, picknumber) 
    #         print (sample)
    for name in sample:
        shutil.move(os.path.join(fileDir,name), os.path.join(tarDir,name))
    print(fileDir, tarDir, "Done!")
    return
def create_samplefile_from_pathfile(label_path, sample_result_path, rate=0.5):
    """
    从标签文件label_path随机抽取固定比例文本保存至新的标签文件sample_result_path
    """
    with open(label_path,"r") as f:
        all_lines = f.readlines()

    pathDir = []
    for line in all_lines:          
        pathDir.append(line.strip()) 
        
    filenumber=len(pathDir)
    print(filenumber)
#     rate=0.5  
    picknumber=int(filenumber*rate)
#     picknumber=10000
    print(picknumber)
    sample = random.sample(pathDir, picknumber)
    f = open(sample_result_path, "w", encoding='utf-8')
    for i in range(len(sample)):
        f.write(str(sample[i]+'\n'))
    f.close()  
    print(label_path, sample_result_path, "Done!")
    return
def batch_resize(img_dir, save_dir):
    """
    批量resize
    """
    output_size = 512
    img_list = os.listdir(img_dir)

    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)    
        
    i=0
    for img_path in tqdm(img_list):
        i=i+1
        path = os.path.join(img_dir, img_path)
        img = cv2.imread(path, cv2.IMREAD_COLOR)        
        if img is None:
            print("This image is empty! Image path:" + path)
            continue
        else:        
            resized_img = cv2.resize(img, (output_size, output_size))
            cv2.imwrite(os.path.join(save_dir, img_path), resized_img)

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值