分割你的训练文本
with open(r"要处理的原文本.txt",'r',encoding='utf-8') as f:
countent = f.readlines()
count = 0
# pat = re.compile('\n')
for line in countent:
count=count+1
if 0<=count<=(数字:第一部分数据条数):
with open(r"train.txt", 'a+', encoding='utf-8') as af:
af.write(line)
af.close()
elif (数字:第一部分数据条数)<count<=(数字:第2部分数据条数):
with open(r"test.txt", 'a+', encoding='utf-8') as af:
af.write(line)
af.close()
else:
with open(r"vaild.txt", 'a+', encoding='utf-8') as af:
af.write(line)
af.close()
f.close()