defread_json(file_path):"""
json文件中存储多行内容
读取json文件的每一行的内容
"""withopen(file_path,'r',encoding='utf-8')as f:for line in f.readlines():
line_dict = json.loads(line)print(line_dict)print('++++++++++++')
defsave_json_result(dir_path,person_feat,person_index):"""
json文件中存储多行内容,每行一个dict
"""withopen(dir_path+'text_person.json','w',encoding='utf-8')as f:
out ={'data_id':None,'feat_value':None,'text_LabelIndex':None}for pid in person_feat.keys():
out['data_id']= pid
out['feat_value']= person_feat[pid]
out['text_LabelIndex']= person_index[pid]
json.dump(out,f,ensure_ascii=False)
f.write('\n')
defmake_path(params):"""
Make folders for training and evaluation
"""ifnot os.path.isdir(params.result_path):
os.makedirs(params.result_path)ifnot os.path.isdir(params.ckpt_path):
os.makedirs(params.ckpt_path)ifnot os.path.isdir("log"):
os.makedirs("log")
defwritefile(filename,list,save_dir,sep=' '):"""
该函数负责将处理好的标注数据文件保存
:param filename: 保存的文件名
:param list: 要保存的列表
:param sep: 字符和标注之间的分隔符
:return:
"""
save_file = os.path.join(save_dir,filename)withopen(save_file,'w',encoding='utf-8')as f:for item inlist:if item=='\n':
f.write('\n')else:
f.write(sep.join(item)+'\n')# 读取00和81组成train.txt,82作为test.txtdefconcate_file(dir,files,filename,save_path,sep=' '):
sentences=[]for name in files:
t_file=os.path.join(dir,name+'.txt')for line in codecs.open(t_file,'r','utf8'):
line = line.replace('\r\n','').split(' ')
sentences.append(line)if line[0]=='。'and line[1]=='O':
sentences.append('\n')
writefile(filename,sentences,save_path)