class Sanguospider(scrapy.Spider):
name = “threespider”
allowed_domains=[“book.douban.com”]
start_urls=[“https://book.douban.com/subject/1019568/comments/”]
def parse(self, response, **kwargs):
data=response.css(“div.comment span.short::text”).extract()
data=" ".join(data)
open(“result.txt”,“a+”,encoding=“utf-8”).write(data)
pip install wordcloud
pip install jieba
from wordcloud import WordCloud
text = “dog cat fish bird cat cat dog”
wc = WordCloud()
wc.generate(text)
wc.to_file(“1.png”)
import jieba
from wordcloud import WordCloud,STOPWORDS
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
txt=open(‘result.txt’,‘r’,encoding=‘utf-8’).read()
result=jieba.lcut(txt)
组合成字符串,用空格隔开
text=’ '.join(result)
蒙版
color_mask = np.array(Image.open(“heart.png”))
创建对象
wc = WordCloud(background_color=‘white’,
font_path=‘msyh.ttf’,
mask=color_mask)
创建
wc.generate(text)
保存
wc.to_file(“1.png”)
plt 显示
plt.imshow(wc)
去掉坐标轴
plt.axis(‘off’)
plt.show()