# -*- coding: utf-8 -*-
import json
path_adv = './test_set_1/AdvertiseGen/adv.src'
path_qg = './test_set_1/DuReaderQG/qg.src'
path_lcsts = './test_set_1/LCSTS_new/LCSTS.src'
path_adv_js = './test_set_1/AdvertiseGen/test.json'
path_qg_js = './test_set_1/DuReaderQG/test.json'
path_lcsts_js = './test_set_1/LCSTS_new/test.json'
select = {'adv', 'qg', 'LCSTS_new'}
if 'adv' in select: # adv # qg # LCSTS_new
# adv
fp_js = open(path_adv_js, 'w', encoding='utf-8')
with open(path_adv, 'r', encoding='utf-8') as fp:
new_line = dict()
for line in fp.readlines():
new_line['content'] = line.strip()
new_line['summary'] = ''
json.dump(new_line, fp_js, ensure_ascii=False)
fp_js.write('\n')
fp_js.close()
if 'qg' in select:
# qg
fp_js = open(path_qg_js, 'w', encoding='utf-8')
with open(path_qg, 'r', encoding='utf-8') as fp:
new_line = dict()
for i, line in enumerate(fp.readlines()):
new_line['context'], new_line['answer'] = line.split()[0], line.split()[1]
new_line['question'] = ''
new_line['id'] = i
json.dump(new_line, fp_js, ensure_ascii=False)
fp_js.write('\n')
fp_js.close()
if 'LCSTS_new' in select:
# LCSTS_new
fp_js = open(path_lcsts_js, 'w', encoding='utf-8')
with open(path_lcsts, 'r', encoding='utf-8') as fp:
new_line = dict()
for i, line in enumerate(fp.readlines()):
new_line['id'] = i
new_line['summary'] = ''
new_line['content'] = line.strip()
json.dump(new_line, fp_js, ensure_ascii=False)
fp_js.write('\n')
fp_js.close()
保存文件为json文件
最新推荐文章于 2024-04-03 14:24:35 发布