转换数据格式以finetuning glm时,发现对json的load、loads、dump、dumps不太熟悉,特此记录
json.load() 传入文件路径,用于将整个文件内容转换为python对象
import json
#存储的是一个list,list中每一项是dict
src_json='/disk3/data/CMB/CMB-Clin/CMB-Clin-qa.json'
with open(src_json,'r') as f:
obj = json.load(f)#obj为list类型
json.loads()需传入字符串,用于将字符串转换为python对象
import json
with open(src_json,'r') as f:
for line in f.readlines():
obj = json.loads(line.strip())
json.dump()将python对象一次写入json文件
import json
#将obj对象保存到tmp.json文件中
with open('tmp.json','w') as f:
#obj为list,list中每一项是dict,含中文时linux系统需设置ensure_ascii=False
json.dump(obj,f,ensure_ascii=False)
json.dumps()将python对象保存为json字符串
import json
dest_json='/disk2/glm-4-9b-chat/data/cmb_clin_qa_glm.json'
#new_line_lst中每一项均为dict
with open(dest_json,'w') as f:
#new_line_lst为含有多个dict的list
for item in new_line_lst:
#将item转为json字符串,含中文时linux系统需设置ensure_ascii=False
item_str = json.dumps(item,ensure_ascii=False)
f.write(item_str+'\n')
本文由mdnice多平台发布