#中文词频统计
def gettingci():
path3 = r"E:\DLNL\homework4\cn_stopwords.txt"
tingci = open(path3,'r',encoding='utf-8').readline()
return tingci
def getText():
path2 = r"E:\DLNL\shediao.txt"
txt = open(path2,'r',encoding='ANSI').read()
olds = gettingci()
for old in olds:
txt = txt.replace(old, '')
return txt
shediao = getText()
words = jieba.lcut(shediao)
counts={}
for word in words:
if len(word)==1:
continue
else:
counts[word] = counts.get(word,0)+1
items = list(counts.items())
items.sort(key = lambda x:x[1],reverse=True)
for i in range(20):
print(items[i])
#生成词云
w1=wc.WordCloud(width=1000,height=700,background_color='white',\
max_words=20,font_path='msyh.ttc')
w1.generate(str(items))
w1.to_file('ccc.png')
中文词频统计并且生成词云
最新推荐文章于 2024-09-18 20:39:31 发布