importreimportjieba.analyseimportcodecsimportpandas as pddefword_replace(xianbingshi,hospital1):"""替换词表"""data=[]
hospital=[]"""去重"""with codecs.open(hospital1,'r','utf8') as f:for line inf:
line=line.strip()if line not inhospital:
hospital.append(line)else:continuehospital.sort(key=len, reverse=True)
with codecs.open(xianbingshi,'r','utf8') as f:"""优先级:医院、手术、检查、症状、疾病、部位、时间"""
for line inf:for ho inhospital:if ho inhospital:
line= line.replace(ho,'[hospital]')
line=line.strip()
data.append(line)print(line)
with codecs.open(r'C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\code\xianbingshi_write_sub.txt','w','utf8') as f:for line indata:
f.write(line+ '\n')
f.close()defword_replace3(xianbingshi2, operation1):
data=[]
operation=[]
with codecs.open(operation1,'r','utf8') as f:for line inf:
line=line.strip()if line not inoperation:
operation.append(line)else:continue
"""排序"""operation.sort(key=len, reverse=True)
with codecs.open(xianbingshi2,'r','utf8') as f:"""优先级:医院、手术、检查、症状、疾病、部位、时间"""
for line inf:for op inoperation:if op inline:
line= line.replace(op, '[operation]')
line=line.strip()
data.append(line)print(line)
with codecs.open(xianbingshi2,'w','utf8') as f:for line indata:
f.write(line+ '\n')
f.close()defword_replace1(xianbingshi2,disease1):
data=[]
disease=[]
with codecs.open(disease1,'r','utf8') as f:for line inf:
line=line.strip()if line not indisease:
disease.append(line)else:continuedisease.sort(key=len, reverse=True)
with codecs.open(xianbingshi2,'r','utf8') as f:"""优先级:医院、手术、检查、症状、疾病、部位、时间"""
for line inf:for di indisease:if di in line and len(di)>1:
line= line.replace(di, '[disease]')
line=line.strip()
data.append(line)print(line)
with codecs.open(xianbingshi2,'w', 'utf8') as f:for line indata:
f.write(line+ '\n')
f.close()defword_replace2(xianbingshi2, symptom1):
data=[]
symptom=[]
with codecs.open(symptom1,'r','utf8') as f:for line inf:
line=line.strip()if line not insymptom:
symptom.append(line)else:continue
"""排序"""symptom.sort(key=len, reverse=True)
with codecs.open(xianbingshi2,'r','utf8') as f:"""优先级:医院、手术、检查、症状、疾病、部位、时间"""
for line inf:for sy insymptom:if sy in line and len(sy) > 1:
line= line.replace(sy, '[symptom]')
line=line.strip()
data.append(line)print(line)
with codecs.open(xianbingshi2,'w', 'utf8') as f:for line indata:
f.write(line+ '\n')
f.close()defword_replace4(xianbingshi2, test1):
data=[]
test=[]
with codecs.open(test1,'r','utf8') as f:for line inf:
line=line.strip()if line not intest:
test.append(line)else:continue
"""排序"""test.sort(key=len, reverse=True)
with codecs.open(xianbingshi2,'r','utf8') as f:"""优先级:医院、手术、检查、症状、疾病、部位、时间"""
for line inf:for te intest:if te inline:
line= line.replace(te, '[test]')
line=line.strip()
data.append(line)print(line)
with codecs.open(xianbingshi2,'w','utf8') as f:for line indata:
f.write(line+ '\n')
f.close()defword_replace5(xianbingshi2, time1):
data=[]
time=[]
with codecs.open(time1,'r','utf8') as f:for line inf:
line=line.strip()if line not intime:
time.append(line)else:continue
"""排序"""time.sort(key=len, reverse=True)
with codecs.open(xianbingshi2,'r','utf8') as f:"""优先级:医院、手术、检查、症状、疾病、部位、时间"""
for line inf:for t intime:if t inline:
line= line.replace(t,'[time]')
line=line.strip()
data.append(line)print(line)
with codecs.open(xianbingshi2,'w', 'utf8') as f:for line indata:
f.write(line+ '\n')
f.close()defword_replace6(xianbingshi2, organ1):
data=[]
organ=[]
with codecs.open(organ1,'r','utf8') as f:for line inf:
line=line.strip()if line not inorgan:
organ.append(line)else:continue
"""排序"""organ.sort(key=len, reverse=True)
with codecs.open(xianbingshi2,'r','utf8') as f:"""优先级:医院、手术、检查、症状、疾病、部位、时间"""
for line inf:for o inorgan:if o in line and len(o) > 1:
line= line.replace(o, '[organ]')
line=line.strip()
data.append(line)print(line)
with codecs.open(xianbingshi2,'w', 'utf8') as f:for line indata:
f.write(line+ '\n')
f.close()defword_replace7(xianbingshi2, symptom1):
data=[]
symptom=[]
with codecs.open(symptom1,'r','utf8') as f:for line inf:
line=line.strip()if line not in symptom and len(line) == 1:
symptom.append(line)print(line)else:continue
"""排序"""symptom.sort(key=len, reverse=True)
with codecs.open(xianbingshi2,'r','utf8') as f:"""优先级:医院、手术、检查、症状、疾病、部位、时间"""
for line inf:for sy insymptom:
line= line.replace(sy, '[symptom]')
line=line.strip()
data.append(line)print(line)
with codecs.open(xianbingshi2,'w', 'utf8') as f:for line indata:
f.write(line+ '\n')
f.close()defword_replace8(xianbingshi2, disease1):
data=[]
disease=[]
with codecs.open(disease1,'r','utf8') as f:for line inf:
line=line.strip()if line not in disease and line == 1:
disease.append(line)else:continue
"""排序"""disease.sort(key=len, reverse=True)
with codecs.open(xianbingshi2,'r','utf8') as f:"""优先级:医院、手术、检查、症状、疾病、部位、时间"""
for line inf:for di indisease:
line= line.replace(di, '[disease]')
line=line.strip()
data.append(line)print(line)
with codecs.open(xianbingshi2,'w', 'utf8') as f:for line indata:
f.write(line+ '\n')
f.close()defword_replace9(xianbingshi2, organ1):
data=[]
organ=[]
with codecs.open(organ1,'r','utf8') as f:for line inf:
line=line.strip()if line not in organ and line == 1:
organ.append(line)else:continue
"""排序"""organ.sort(key=len, reverse=True)
with codecs.open(xianbingshi2,'r','utf8') as f:"""优先级:医院、手术、检查、症状、疾病、部位、时间"""
for line inf:for o inorgan:
line= line.replace(o, '[organ]')
line=line.strip()
data.append(line)print(line)
with codecs.open(xianbingshi2,'w', 'utf8') as f:for line indata:
f.write(line+ '\n')
f.close()if __name__ == '__main__':
disease1=r'C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\TXT\disease_0903.txt'organ1= r"C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\TXT\organ_0903.txt"test1= r"C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\TXT\test_0903.txt"time1= r"C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\time1.txt"operation1= r"C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\TXT\operation_0903.txt"symptom1= r"C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\code\症状.txt"xianbingshi= r'C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\code\xianbingshi_write.txt'xianbingshi2=r'C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\code\xianbingshi_write_sub.txt'hospital1= r'C:\Users\Administrator.SC-201812211013\PycharmProjects\词表工作代码\yiwoqu\TXT\hospital_0903.txt'word_replace(xianbingshi, hospital1)
word_replace3(xianbingshi2, operation1)
word_replace1(xianbingshi2, disease1)
word_replace2(xianbingshi2, symptom1)
word_replace4(xianbingshi2, test1)#word_replace5(xianbingshi2, time1)
word_replace6(xianbingshi2, organ1)
word_replace7(xianbingshi2, symptom1)
word_replace8(xianbingshi2, disease1)
word_replace9(xianbingshi2, organ1)