大家好,我是天空之城。
以下部分内容来自公众号数据分析与统计学之美,号主很牛叉,需要的赶紧加他。
import numpy as np
import pandas as pd
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from imageio import imread
import warnings
warnings.filterwarnings("ignore")
with open(r"非暴力沟通.txt", 'r',encoding="utf-8") as f:
txt = f.read()
txt = txt.split()
# print(txt[:20])
# display(txt[:5])
# my_words = ['非暴力', '沟通']
# for i in my_words:
# jieba.add_word(i)
# 自定义停用词
# my_stop_words = []
# # stop_words.extend(my_stop_words)
data_cut = [jieba.lcut(x) for x in txt]
# data_cut =jieba.lcut(txt)
# display(data_cut[:5])
print(data_cut[:20])
all_words = []
for i in data_cut:
all_words.extend(i)
print(all_words)
word_count = pd.Series(all_words).value_counts()
# print(word_count[:20])
with open("stoplist.txt", encoding="utf-8") as f:
stop = f.read()
stop = stop.split()
stop = [" ", "道", "说道", "说"] + stop
# print(stop[:5])
s_data_cut = pd.Series(data_cut)
all_words_after = s_data_cut.apply(lambda x:[i for i in x if i not in stop])
# print(all_words_after[:10])
all_words = []
for i in all_words_after:
all_words.extend(i)
word_count = pd.Series(all_words).value_counts()
print(word_count[:10])
# 1、读取背景图片
background_image = imread("aixin.jpg")
# 2、设置词云参数
wc = WordCloud(font_path="C:/Windows/Fonts/simfang.ttf",
mask=background_image,
background_color="white",
max_words=2000,
max_font_size=200,
random_state=42
)
wc2 = wc.fit_words(word_count)
# 3、绘制词云图
plt.figure(figsize=(16,8))
plt.imshow(wc2)
plt.axis("off")
plt.show()
wc.to_file("ciyun100.png")