已有包含所有图像名称的文件file.txt,现按比例对其进行划分。
按照train :test :val = 8:1:1的比例,对file.txt文件中的图片名称进行随机划分。
先获取file.txt文件的图像名称保存为list,再对list进行比例随机划分(先随机打乱顺序,再按比例划分),得到保存有3各不同集合的子list。最后返回并保存写入到对应的txt文件中。
# train/val/test = 8/1/1
# encoding: utf-8
import os
import random
def ran_split(full_list,shuffle=False,ratio1=0.8,ratio2=0.1):
sublists=[]
n_total = len(full_list)
offset1 = int(n_total * ratio1)
offset2 = int(n_total * ratio2) + offset1
if n_total == 0 or offset1 < 1:
return [], full_list
if shuffle:
random.shuffle(full_list) # 打乱排序
sublist_1 = full_list[:offset1]
sublist_2 = full_list[offset1:offset2]
sublist_3 = full_list[offset2:]
sublists.append(sublist_1)
sublists.append(sublist_2)
sublists.append(sublist_3)
return sublists # sublists=[sublist_1,sublist_2,sublist_3]
def read_file(filepath):
file_list=[]
with open(filepath,'r') as fr:
data = fr.readlines()
data = ''.join(data).strip('\n').splitlines()
# ''.join() list转为str
# s.strip(rm) 删除s中开头结尾处的rm字符
# .splitlines() 将字符串返回列表
file_list=data
return file_list
def write_file(dst1,txt):
fo=open(dst1,'w')
for item in txt:
fo.write(str(item)+'\n')
if __name__ == "__main__":
root_path=r'F:\all_date\WHU'
from_txt='file.txt'
txts=['train.txt','test.txt','val.txt']
from_path=os.path.join(root_path,from_txt)
txt_list=read_file(from_path)
sublists = ran_split(txt_list,shuffle=True,ratio1=0.8,ratio2=0.1)
# 注:生成的sublist数量与txts数量相同
for txt_name,i in zip(txts,range(len(txts))):
to_path=os.path.join(root_path,txt_name)
write_file(to_path,sublists[i])