将一个大的txt文档,随机,按照比例分割为两个txt文件,用于从一个文件分为训练集和测试集,一般的的方法会比较慢,这个快
import random
with open('/h/verification_code/train_txt/train.txt') as f:
lines = f.readlines()
random.shuffle(lines)
train_len = int(len(lines)*0.9)
train_list = lines[:train_len]
val_list = lines[train_len:]
# 保存结果
with open('/home/ation_code/train_txt/1.txt','w',encoding='utf-8') as f:
f.writelines(train_list)
with open('/homcation_code/train_txt/2.txt','w',encoding='utf-8') as f:
f.writelines(val_list)