词云示例python

#!/usr/bin/env python

-*- coding

import warnings

warnings.filterwarnings(“ignore”)

import jieba #分词包
import numpy #numpy计算包
import codecs #codecs提供的open方法来指定打开的文件的语言编码,它会在读取的时候自动转换为内部unicode
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams[‘figure.figsize’] = (10.0, 5.0)
from wordcloud import WordCloud#词云包
from wordcloud import WordCloud,ImageColorGenerator

df = pd.read_csv(“entertainment_news.csv”, encoding=‘utf-8’)
df = df.dropna()
print(df.shape)
content=df.content.values.tolist()
print(content[:1])
#jieba.load_userdict(u"data/user_dic.txt")
segment=[]
for line in content:
try:
segs=jieba.lcut(line)
for seg in segs:
if len(seg)>1 and seg!=’\r\n’:
segment.append(seg)
except:
print(“jieba exception:”,line)
continue
print(“segment:”,len(segment),segment[:2])

words_df=pd.DataFrame({‘segment’:segment})
print(“words_df:”,words_df.head())
stopwords=pd.read_csv(“stopwords.txt”,index_col=False,quoting=3,sep="\t",names=[‘stopword’], encoding=‘utf-8’)#quoting=3全不引用
print(“stopwords:”,stopwords.head())
words_df=words_df[words_df.segment.isin(stopwords.stopword)]

words_stat_groupby=words_df.groupby(by=[‘segment’])
print(type(words_stat_groupby))

words_stat=words_df.groupby(by=[‘segment’])[‘segment’].agg({“计数”:numpy.size})
print(“words_stat.head():”)
print(words_stat.head())
words_stat=words_stat.reset_index().sort_values(by=[“计数”],ascending=False)
print(“words_stat.head():”)
print(words_stat.head())

print("…")

from scipy.misc import imread

from scipy.misc.pilutil import imread

from matplotlib.pyplot import imread
matplotlib.rcParams[‘figure.figsize’] = (15.0, 15.0)
from wordcloud import WordCloud,ImageColorGenerator
bimg=imread(‘timg.jpg’)
wordcloud=WordCloud(background_color=“white”,mask=bimg,font_path=‘data/simhei.ttf’,max_font_size=200)
word_frequence = {x[0]:x[1] for x in words_stat.head(1000).values}
wordcloud=wordcloud.fit_words(word_frequence)
bimgColors=ImageColorGenerator(bimg)
plt.axis(“off”)
plt.imshow(wordcloud.recolor(color_func=bimgColors))
plt.show()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小金子的夏天

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值