import os, random, shutil
import tqdm as tdqm
这些文件操作可以在处理训练集、验证集与测试集还有标签文件时用到。很实用,可收藏!
def splitDataset(fileDir, trainFile, valFile):
"""
从fileDir随机分训练集与验证集
"""
pathDir = os.listdir(fileDir)
filenumber=len(pathDir)
print(filenumber)
rate=0.9
picknumber=int(filenumber*rate)
# picknumber=25000
print(picknumber)
sample = random.sample(pathDir, picknumber)
f = open(trainFile, "w", encoding='utf-8')
for name in sample:
f.write(str(name.split(".")[0])+"\n")
f.close()
print(trainFile, " Done!")
f = open(valFile, "w", encoding='utf-8')
for name in pathDir:
if name not in sample:
f.write(str(name.split(".")[0])+"\n")
f.close()
print(valFile, " Done!")
return
def rename(label_path):
"""
批量重命名
"""
file_list = []
with open(label_path,"r") as f:
all_lines = f.readlines()
for line in all_lines:
line=line.rstrip("\n")
file_list.append(str(line))
print(str(line))
for file in tqdm(file_list):
if not os.path.exists(file):
# print(file)
continue
os.rename(file,file.replace(" ","_"))
def remove_file(file_path,target_path):
"""
根据文件夹file_path中文件名,从文件夹target_path找出对应文件并删除
"""
if not os.path.exists(target_path):
print("target folder does not exist!", target_path)
os.makedirs(target_path)
file_list = os.listdir(file_path)
for file in tqdm(file_list):
path = os.path.join(target_path, file)
if not os.path.exists(path):
print("removing file does not exist!", path)
continue
os.remove(path)
print(target_path, "Removing finished!!")
return 0
def move_file_to_target(file_path, source_path, target_path):
"""
根据文件夹file_path中文件名,从文件夹source_path找出对应文件保存至文件夹target_path
"""
if not os.path.exists(target_path):
print("target folder does not exist!", target_path)
os.makedirs(target_path)
file_list = os.listdir(file_path)
# print(file_list)
for file in tqdm(file_list):
path = os.path.join(source_path, file)
# print(path)
if not os.path.exists(path):
print("moving file does not exist!", path)
continue
shutil.move(path, os.path.join(target_path, file))
print(source_path, target_path, "Moving finished!!")
return 0
def move_file_to_target_by_pathfile(file_path, target_path):
"""
根据标签文件file_path中路径,找出对应文件保存至文件夹target_path
"""
if not os.path.exists(target_path):
print("target folder does not exist!", target_path)
os.makedirs(target_path)
with open(file_path,"r") as f:
all_lines = f.readlines()
img_list = []
for line in tqdm(all_lines):
issues = line.strip().split(",")
path = issues[0]
if not os.path.exists(path):
print("moving file does not exist!", path)
continue
shutil.copy(path, os.path.join(target_path, path.split("/")[-1]))
print(target_path, "Moving finished!!")
return 0
def move_file_to_target_from_2source(file_path, source_path1, source_path2, target_path):
"""
根据文件夹file_path中文件名,从某两个文件夹source_path1, source_path2找出对应文件保存至文件夹target_path
"""
if not os.path.exists(target_path):
print("target folder does not exist!", target_path)
os.makedirs(target_path)
img_list = os.listdir(file_path)
for file in tqdm(img_list):
path = file.split('/')[-1]
file1 = os.path.join(source_path1, path)
file2 = os.path.join(source_path2, path)
# print(path)
if not os.path.exists(file1) and not os.path.exists(file2):
print("moving file does not exist!", file)
continue
if os.path.exists(file1):
shutil.copy(file1, os.path.join(target_path, path))
else:
shutil.copy(file2, os.path.join(target_path, path))
print(target_path, "Moving finished!!")
return 0
def move_file_by_pathfile_2target(file_path, source_path, target_path1, target_path2):
"""
根据标签文件file_path中路径,从某一文件夹source_path找出对应文件,并按特定比例保存至文件夹target_path1与target_path2
"""
if not os.path.exists(target_path):
print("target folder does not exist!", target_path)
os.makedirs(target_path)
with open(file_path,"r") as f:
all_lines = f.readlines()
img_list = []
for line in all_lines:
issues = line.strip()
img_list.append(issues)
for file in tqdm(img_list):
path = file.split('/')[-1]
file = os.path.join(source_path, path)
# print(path)
if not os.path.exists(file):
print("moving file does not exist!", file)
continue
shutil.copy(file, os.path.join(target_path, path))
print(target_path, "Moving finished!!")
return 0
def move_file_by_label_2target(file_path, target_path1, target_path2):
"""
根据标签文件file_path中路径,从某一文件夹source_path找出对应文件,并按标签保存至文件夹target_path1与target_path2
"""
if not os.path.exists(target_path1):
print("target folder does not exist!", target_path1)
os.makedirs(target_path1)
elif not os.path.exists(target_path2):
print("target folder does not exist!", target_path2)
os.makedirs(target_path2)
with open(file_path,"r") as f:
all_lines = f.readlines()
target_path =[]
for line in tqdm(all_lines):
issues = line.strip().split(",")
label = issues[1]
path = issues[0]
if not os.path.exists(path):
print("moving file does not exist!", path)
continue
if int(label) ==1: # 如果标签为1
target_path = target_path1
elif int(label) ==2: # 如果标签为2
target_path = target_path2
else:
print("wrong label!", label)
# print(target_path, path.split("/")[-1])
target = os.path.join(target_path, path.split("/")[-1])
if os.path.exists(target):
print("target file is existed!", target)
continue
shutil.copy(path, target)
print(file_path, "Moving finished!!")
def move_file_to_target_by_pathfile_1source(file_path, source_path, target_path):
"""
根据标签文件file_path中路径,从某一文件夹source_path找出对应文件保存至文件夹target_path
"""
if not os.path.exists(target_path):
print("target folder does not exist!", target_path)
os.makedirs(target_path)
with open(file_path,"r") as f:
all_lines = f.readlines()
img_list = []
for line in all_lines:
issues = line.strip()
img_list.append(issues)
for file in tqdm(img_list):
path = file.split('/')[-1]
file = os.path.join(source_path, path)
# print(path)
if not os.path.exists(file):
print("moving file does not exist!", file)
continue
shutil.copy(file, os.path.join(target_path, path))
print(target_path, "Moving finished!!")
return 0
def moveFile(fileDir, tarDir):
"""
从fileDir随机抽取固定比例的图片至目标文件夹tarDir
"""
if not os.path.exists(tarDir):
os.makedirs(tarDir)
pathDir = os.listdir(fileDir)
filenumber=len(pathDir)
print(filenumber)
rate=0.1
picknumber=int(filenumber*rate)
# picknumber=25000
print(picknumber)
sample = random.sample(pathDir, picknumber)
# print (sample)
for name in sample:
shutil.move(os.path.join(fileDir,name), os.path.join(tarDir,name))
print(fileDir, tarDir, "Done!")
return
def create_samplefile_from_pathfile(label_path, sample_result_path, rate=0.5):
"""
从标签文件label_path随机抽取固定比例文本保存至新的标签文件sample_result_path
"""
with open(label_path,"r") as f:
all_lines = f.readlines()
pathDir = []
for line in all_lines:
pathDir.append(line.strip())
filenumber=len(pathDir)
print(filenumber)
# rate=0.5
picknumber=int(filenumber*rate)
# picknumber=10000
print(picknumber)
sample = random.sample(pathDir, picknumber)
f = open(sample_result_path, "w", encoding='utf-8')
for i in range(len(sample)):
f.write(str(sample[i]+'\n'))
f.close()
print(label_path, sample_result_path, "Done!")
return
def batch_resize(img_dir, save_dir):
"""
批量resize
"""
output_size = 512
img_list = os.listdir(img_dir)
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
i=0
for img_path in tqdm(img_list):
i=i+1
path = os.path.join(img_dir, img_path)
img = cv2.imread(path, cv2.IMREAD_COLOR)
if img is None:
print("This image is empty! Image path:" + path)
continue
else:
resized_img = cv2.resize(img, (output_size, output_size))
cv2.imwrite(os.path.join(save_dir, img_path), resized_img)