python - 词云

最新推荐文章于 2024-05-08 03:08:29 发布

Floy_p

最新推荐文章于 2024-05-08 03:08:29 发布

阅读量908

点赞数

分类专栏： python 文章标签： python 机器学习数据挖掘

本文链接：https://blog.csdn.net/qq_41752985/article/details/122334189

版权

python 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

python 做词云图的两种方式

数据处理

import numpy as np
import pandas as pd

df = pd.read_csv("../../pandas/weibo_2020.csv",encoding='utf-8')
df.sort_values(by = 'searchCount',ascending=False)
df.head()

在这里插入图片描述

jieba分词

df["year"] = pd.to_datetime(df["date"]).dt.year
df["month"] = pd.to_datetime(df["date"]).dt.month
df["day"] = pd.to_datetime(df["date"]).dt.day
df.head()
search_df = pd.DataFrame()
for i in list(range(1,13)):
    search_df = search_df.append(df[df['month']==i].sort_values(by = 'searchCount',ascending = False).head(50))
    pass
import jieba
cut_list = []
for i in range(search_df.shape[0]):
    cut_list.extend([w for w in jieba.cut(search_df['keyword'].tolist()[i]) if len(w)>1 and w!=" " ])
np.array(cut_list)

在这里插入图片描述

方法1：stylecloud

#方法1 使用stylecloud，使用官网定义好的形状
import stylecloud
stylecloud.gen_stylecloud(
    text=' '.join(cut_list),
    size=1200,
    font_path = r'C:/Windows/Fonts/msyh.ttc', #字体
    output_name='词云.png',
    icon_name='fas fa-angry', #词云形状,命名方式：fas fa-xxx)，默认是：'fas fa-flag'
    # 具体url:https://fontawesome.com/v5.15/icons?d=gallery&p=2
    palette='colorbrewer.sequential.Blues_4',
    background_color=black
)

词云.png

方法1：WordCloud

# 方法2：使用WordCloud 可以自定义图片形状
import jieba
from wordcloud import WordCloud
import pandas as pd
import matplotlib.pyplot as plt


if __name__=="__main__":
    url="https://s.weibo.com/top/summary?cate=realtimehot"
#     top_search = getdata(getpage(url))
    all_words =  cut_list #cut_words(top_search)

    #定义停用词
    stop = ['的','你','了','将','为','例',' ','多','再','有','是','等','天','次']
    words_cut = []
    for word in all_words:
        if word not in stop:
            words_cut.append(word)
    word_count = pd.Series(words_cut).value_counts()
    back_ground = imread(r"C:\Users\fqp18\Pictures\123.jpg")
    wc = WordCloud(
                   font_path="C:\\Windows\\Fonts\\simhei.ttf", #设置字体
                   background_color="white",  #设置词云背景颜色
                   max_words=1000,  #词云允许最大词汇数
                   mask=back_ground,  #词云形状
                   max_font_size=200,   #最大字体大小
                   random_state=50  #配色方案的种数
                  )
    print(word_count)
    wc1 = wc.fit_words(word_count)  #生成词云
    plt.figure()
    plt.imshow(wc1)
    plt.axis("off")
    plt.show()
    wc.to_file("ciyun.png")

Floy_p

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python - 词云

python 做词云图的两种方式数据处理import numpy as npimport pandas as pddf = pd.read_csv("../../pandas/weibo_2020.csv",encoding='utf-8')df.sort_values(by = 'searchCount',ascending=False)df.head()jieba分词df["year"] = pd.to_datetime(df["date"]).dt.yeardf["month"
复制链接

扫一扫

专栏目录