词云:
import jieba
from imageio import imread
from numpy import unicode
from wordcloud import WordCloud,ImageColorGenerator
import matplotlib.pyplot as plt
jieba.load_userdict("stoplist.txt")
"""seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
print("Full Mode: " + "/ ".join(seg_list)) # 全模式
seg_list = jieba.cut("我来到北京清华大学", cut_all=False)
print("Default Mode: " + "/ ".join(seg_list)) # 精确模式
seg_list = jieba.cut("他来到了网易杭研大厦") # 默认是精确模式
print(", ".join(seg_list))
seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") # 搜索引擎模式
print(", ".join(seg_list))"""
#***************************-------test***************
back_color=imread('girl.jpg') #导入背景图片
wc=WordCloud(background_color='white', #背景yanse
max_words=100, #允许最大词数
mask=back_color, #忽略width和height
max_font_size=100, #显示字体的最大值
font_path="D:\\pythonProject2\\simhei.ttf", #解决显示口字型乱码问题
random_state=42, #为每个词返回一个PIL颜色
)
text=open('data_m_content.txt').read() #打开词源文本
def stop_words(texts):
words_list=[]
#for i in range(0,len(texts)):
word_generator=jieba.cut_for_search(texts) #分词
with open('stoplist.txt',encoding='utf-8') as f:
str_text=f.read()
unicode_text=unicode(str_text)
f.close()
for word in word_generator:
if word.strip() not in unicode_text: #去除停用词
words_list.append(word)
return ' '.join(words_list)
text=stop_words(text)
wc.generate(text)
image_colors=ImageColorGenerator(back_color) #基于彩色图像生成相应彩色
plt.imshow(wc) #显示图片
plt.axis('off') #关闭坐标轴
plt.figure() #绘制词云
plt.imshow(wc.recolor(color_func=image_colors))
plt.axis('off')
wc.to_file('data_m.png') #保存图片
线状图:
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
path="D:\\pythonProject2\\all.csv"
df=pd.read_csv(path)
df.dropna()
data1=[]
matplotlib.rcParams['font.sans-serif']