import requests
import json
from lxml import etree
import re
import jieba
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator
import numpy as np
import PIL.Image as Image
def getCommodityComments(url):
if url[url.find('id=')+14] != '&':
id = url[url.find('id=')+3:url.find('id=')+15]
else:
id = url[url.find('id=')+3:url.find('id=')+14]
url = 'https://rate.taobao.com/feedRateList.htm?auctionNumId='+id+'¤tPageNum=1'
res = requests.get(url)
jc = json.loads(res.text.strip().strip('()'))
max = jc['total']
comments = []
count = 0
page = 1
print('该商品共有评论'+str(max)+'条,具体如下: loading...')
while count<max:
res = requests.get(url[:-1]+str(page))
page = page + 1
jc = json.loads(res.text.strip().strip('()'))
jc = jc['comments']
for j in jc:
comments.append(j['content'])
print(comments[count])
count = count + 1
return comments
goods=input('宝贝链接:')
comments=getCommodityComments(goods)
siglist=[]
for i in comments:
signature=i.strip().replace('评价方未及时做出评价,系统默认好评!','').replace('此用户没有填写评价。','').replace('span','').replace('class','').replace('emoji','')
rep=re.compile('1f\d+\w*|[<>/=]')
signature=rep.sub('',signature)
siglist.append(signature)
text=''.join(siglist)
wordlist=jieba.cut(text,cut_all=True)
word_space_split=" ".join(wordlist)
coloring=np.array(Image.open("1.jpg"))
my_wordcloud=WordCloud(background_color='white',width=2400,height=2400,max_words=2000,
mask=coloring,max_font_size=60,
random_state=42,scale=2,
font_path="simfang.ttf").generate(
word_space_split)
image_colors=ImageColorGenerator(coloring)
plt.imshow(my_wordcloud.recolor(color_func=image_colors))
plt.imshow(my_wordcloud)
plt.axis('off')
plt.show
name=input('图片名称:')
my_wordcloud.to_file('%s.png'%name)
爬取淘宝评论并生成词云
最新推荐文章于 2024-05-22 10:24:02 发布