1、首先将json格式转换成jsonl格式文件,代码如下:
import json import jsonlines data = [] with open('poet.song.1000.json', 'r', encoding='utf-8') as file: value = json.load(file) data.append(value) with jsonlines.open('outputgusi.jsonl','w')as file1: file1.write_all(data)
import json import jsonlines data = [] with open('poet.song.1000.json', 'r', encoding='utf-8') as file: value = json.load(file) data.append(value) with jsonlines.open('outputgusi.jsonl','w')as file1: file1.write_all(data)
2、读取jsonl格式文件
根据文本内容分析,title为古诗的题目,paragraphs为古诗内容,其中title中有部分数据(首字母为句,不是古诗的名字,需要剔除),代码如下:
import json import jsonlines data = [] data_list = [] with open('outputgusi.jsonl', 'r', encoding='utf-8') as file: for line in file: data_list = json.loads(line) print (len(data_list)) for i in range(len(data_list)): if 'title' in data_list[i]: if data_list[i]['title'][0] !='句': value = {} value['instruction'] = '请根据古诗词的题目,回复古诗内容,要求对仗工整,押韵' value['input']=data_list[i]['title'] if 'paragraphs' in data_list[i]: value['output']=data_list[i]['paragraphs'] data.append(value) with jsonlines.open('outputgusi111.jsonl','w') as file: file.write_all(data)
import json import jsonlines data = [] data_list = [] with open('outputgusi.jsonl', 'r', encoding='utf-8') as file: for line in file: data_list = json.loads(line) print (len(data_list)) for i in range(len(data_list)): if 'title' in data_list[i]: if data_list[i]['title'][0] !='句': value = {} value['instruction'] = '请根据古诗词的题目,回复古诗内容,要求对仗工整,押韵' value['input']=data_list[i]['title'] if 'paragraphs' in data_list[i]: value['output']=data_list[i]['paragraphs'] data.append(value) with jsonlines.open('outputgusi111.jsonl','w') as file: file.write_all(data)