if 元素 not in 字典名.keys():
键[元素] = []
键[元素].append(值)
========================================================
import json
‘’’
对总词典去重,然后存储为两个json文件
对总词典去重,
保存词组非单字的
然后存储为两个json文件
‘’’
def len_any(cidui):
front_after = dict()
after_front = dict()
for i in cidui:
try:
tmp1,tmp2,tmp3 = i.split(' ')[0].split('\t')
except:
continue
if tmp1 not in front_after.keys():
front_after[tmp1]=[]
if tmp2 not in front_after.keys():
front_after[tmp2]=[]
front_after[tmp1].append(tmp2)
front_after[tmp2].append(tmp3)
if tmp3 not in after_front.keys():
after_front[tmp3]=[]
if tmp2 not in after_front.keys():
after_front[tmp2]=[]
after_front[tmp3].append(tmp2)
after_front[tmp2].append(tmp1)
#每个前or后词去重
front_after_res = dict()
for i in front_after.keys():
front_after_res[i] = tuple(set(front_after[i]))
after_front_res = dict()
for i in after_front.keys():
after_front_res[i] = tuple(set(after_front[i]))
with open('front_after_res.json', 'w',encoding='utf-8') as f:
# 设置不转换成ascii json字符串首缩进
f.write(json.dumps(front_after_res, ensure_ascii=False))
with open('after_front_res.json', 'w',encoding='utf-8') as f:
# 设置不转换成ascii json字符串首缩进
f.write(json.dumps(after_front_res, ensure_ascii=False))
lentwo_front_after = dict()
for i in front_after.keys():
if len(i) == 1:
continue
tmmp = [j for j in front_after[i] if len(j) > 1]
if tmmp != []:
lentwo_front_after[i] = list(set(tmmp))
lentwo_after_front = dict()
for i in after_front.keys():
if len(i) == 1:
continue
tmmp = [j for j in after_front[i] if len(j) > 1]
if tmmp != []:
lentwo_after_front[i] = list(set(tmmp))
with open('lentwo_front_after.json', 'w',encoding='utf-8') as f:
# 设置不转换成ascii
f.write(json.dumps(lentwo_front_after, ensure_ascii=False,skipkeys=True))
with open('lentwo_after_front.json', 'w',encoding='utf-8') as f:
# 设置不转换成ascii
f.write(json.dumps(lentwo_after_front, ensure_ascii=False,skipkeys=True))
print(lentwo_front_after)
cidui=open(‘cidui0505.txt’,‘r’,encoding=‘utf-8’)
len_any(cidui)