分词的源码
注:此代码为老师分享如有侵权请联系本人删除
import jieba
#from scipy.misc import imread # 这是一个处理图像的函数
from wordcloud import WordCloud
import matplotlib.pyplot as plt
excludes = {“将军”,“却说”,“荆州”,“二人”,“不可”,“不能”,“如此”,“左右”,“一人”,“不敢”,“一面”,‘大叫’,‘一面’,‘不知’,}
with open(“threekingdoms.txt”, “r”, encoding=‘utf-8’)as fp:
text=fp.read()
words = jieba.lcut(text)
counts = {}
c=[]
for word in words:
if len(word)== 1:
continue
elif word==“孔明曰” or word==“孔明”:
rword=“孔明”
elif word == "孟德" or word == "丞相":
rword = "曹操"
elif word == "关公" or word == "云长":
rword = "关羽"
elif word == "玄德" or word == "玄德曰":
rword = "刘备"
else:
rword=word
counts[rword]=counts.get(rword,0)+1