数据处理
import numpy as np
import pandas as pd
df = pd.read_csv("../../pandas/weibo_2020.csv",encoding='utf-8')
df.sort_values(by = 'searchCount',ascending=False)
df.head()
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/71a8fe04a6e67b99344fe4998a7e346f.png)
jieba分词
df["year"] = pd.to_datetime(df["date"]).dt.year
df["month"] = pd.to_datetime(df["date"]).dt.month
df["day"] = pd.to_datetime(df["date"]).dt.day
df.head()
search_df = pd.DataFrame()
for i in list(range(1,13)):
search_df = search_df.append(df[df['month']==i].sort_values(by = 'searchCount',ascending = False).head(50))
pass
import jieba
cut_list = []
for i in range(search_df.shape[0]):
cut_list.extend([w for w in jieba.cut(search_df['keyword'].tolist()[i]) if len(w)>1 and w!=" " ])
np.array(cut_list)
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/c55d1fbfc0b914db637b422e78070d5f.png)
方法1:stylecloud
import stylecloud
stylecloud.gen_stylecloud(
text=' '.join(cut_list),
size=1200,
font_path = r'C:/Windows/Fonts/msyh.ttc',
output_name='词云.png',
icon_name='fas fa-angry',
palette='colorbrewer.sequential.Blues_4',
background_color=black
)
![词云.png](https://i-blog.csdnimg.cn/blog_migrate/cb817ed97e9f9e72b2e418a78a3ec595.png)
方法1:WordCloud
import jieba
from wordcloud import WordCloud
import pandas as pd
import matplotlib.pyplot as plt
if __name__=="__main__":
url="https://s.weibo.com/top/summary?cate=realtimehot"
all_words = cut_list
stop = ['的','你','了','将','为','例',' ','多','再','有','是','等','天','次']
words_cut = []
for word in all_words:
if word not in stop:
words_cut.append(word)
word_count = pd.Series(words_cut).value_counts()
back_ground = imread(r"C:\Users\fqp18\Pictures\123.jpg")
wc = WordCloud(
font_path="C:\\Windows\\Fonts\\simhei.ttf",
background_color="white",
max_words=1000,
mask=back_ground,
max_font_size=200,
random_state=50
)
print(word_count)
wc1 = wc.fit_words(word_count)
plt.figure()
plt.imshow(wc1)
plt.axis("off")
plt.show()
wc.to_file("ciyun.png")
![ciyun.png](https://i-blog.csdnimg.cn/blog_migrate/16bcb18e2baf365415ec7cf5d660bcf6.png)