# -*- coding: utf-8 -*-
"""
Created on Thu Jul 8 15:23:08 2021
@author: haijiao
用于从数据集txt中随机选择一部分作为测试集
filt_path填train.txt文件的目录
path_merges填输出文件目录
out_file_path最后为合并后txt的名字
k可以改变测试集所占总数据集的比例
"""
import os
import random
#路径设置
file_path = "./"
path_merges = "./"
in_file_path = os.path.join(file_path, 'train.txt')
out_file_path = os.path.join(path_merges, 'val.txt')
#判断路径
if not os.path.exists(path_merges):
os.mkdir(path_merges)
if os.path.exists(out_file_path):
os.remove(out_file_path)
#读取train.txt文件到列表
with open(in_file_path, 'r') as f:
train_list = f.readlines()
#创建val元素的列表,随机取train.txt文件列表1/30的元素
k = len(train_list)//30
val_list = random.sample(train_list, k)
val_list.sort()
#写入val.txt文件
with open(out_file_path, 'w') as f1:
for val in val_list:
f1.write(val)
#print(train_list)
print(f"训练集总数量:{len(train_list)}")
print(f"测试集总数量:{k}")
12-26
03-16
660
03-02
4064
01-04
2075
11-14
4598
03-19
3489