代码如下:
import jieba #jieba库用来中文分词 import wordcloud import imageio.v2 as imageio txt = open(r'C:\pycharm\项目\红楼梦文本\红楼梦.txt','r',encoding='utf-8').read() rp_str =', 。 ! :;、‘’ “” ? 《》 () —— \n' for i in rp_str: txt = txt.replace(i,'') txt =''.join(txt.split())#删除文本中的空格 jieba.load_userdict(r'C:\pycharm\项目\venv\Lib\site-packages\jieba\dict.txt') words = jieba.lcut(txt) stopwords = open(r'C:\pycharm\项目\停用词文本\stop_words.txt','r',encoding='utf-8').read() stopwords_list = list(stopwords) remove_words = ['什么', '一个', '我们','笑道','说道','知道','你们','这里','那里','他们','自己','两个','怎么']#移除词,移除无关紧要的词。 words_dic ={}#创建空字典 # 遍历词语 for i in words: if len(i)==1: continue elif i in remove_word