使用python工具
通过csv方式读取对联数据
上联读取和下联读取分开进行
合并上下联数据
转化成jsonl格式
# 导入所需的库 import pandas as pd import jsonlines import csv # 读取train.csv文件内容为DataFrame jsonl_in = [] jsonl_out = [] x=[] path = 'duilian\\1fixed_couplets_in-1.csv' with open(path, 'r', encoding='utf-8') as f: reader = csv.reader(f) for row in reader: jsonl_in.append(row[0].replace(' ', '')) if len(jsonl_in)>4000: break # print(jsonl_in) path = 'duilian\\1fixed_couplets_out-1.csv' with open(path, 'r', encoding='utf-8') as f1: reader_output = csv.reader(f1) for row1 in reader_output: jsonl_out.append(row1[0].replace(' ', '')) if len(jsonl_out)>4000: break for i in range(4000): value = {} value['instruction'] = '请根据上联,回复下联,要求对仗工整' value['input']=jsonl_in[i] value['output']=jsonl_out[i] x.append(value) with jsonlines.open('duilian.jsonl', mode='w') as writer: writer.write_all(x)
# 导入所需的库 import pandas as pd import jsonlines import csv # 读取train.csv文件内容为DataFrame jsonl_in = [] jsonl_out = [] x=[] path = 'duilian\\1fixed_couplets_in-1.csv' with open(path, 'r', encoding='utf-8') as f: reader = csv.reader(f) for row in reader: jsonl_in.append(row[0].replace(' ', '')) if len(jsonl_in)>4000: break # print(jsonl_in) path = 'duilian\\1fixed_couplets_out-1.csv' with open(path, 'r', encoding='utf-8') as f1: reader_output = csv.reader(f1) for row1 in reader_output: jsonl_out.append(row1[0].replace(' ', '')) if len(jsonl_out)>4000: break for i in range(4000): value = {} value['instruction'] = '请根据上联,回复下联,要求对仗工整' value['input']=jsonl_in[i] value['output']=jsonl_out[i] x.append(value) with jsonlines.open('duilian.jsonl', mode='w') as writer: writer.write_all(x)