import pandas as pd
import os
import json
file_data=pd.read_csv('文件.txt')
file_data.head()
print(file_data.isnull().sum())
file_data.dropna(subset=['title', 'reply'], how='any', inplace=True)
shuffled=file_data.sample(frac=1)
cut=int(file_data.shape[0]*.98)
test_data=shuffled[cut:]
with open('train.jsonl', mode='w',encoding='utf-8') as file:
for index, row in train_data.iterrows():
line={"input":row['title'],"target":row['reply']}
str=json.dumps(line,ensure_ascii=False)
file.write(str+'\n')