import pandas as pd
import requests
import re
import time
from snownlp import SnowNLP
import io
import sys
from collections import Counter
import jieba
from matplotlib import pyplot as plt
import wordcloud
import warnings
from gensim import corpora, models
import pyLDAvis.gensim
import pyLDAvis
# =================获取数据==================
nick = []
quality = []
date = []
comment = []
page = 3 # 【每页20条,输入需要翻页的页数+1在page处】
for i in range(1, page, 1):
print("正在爬取第" + str(i) + "页")
first = 'https://rate.tmall.com/list_detail_rate.htm?itemId=557510761368&spuId=1067672569&sellerId=2820842454&order=3¤tPage='
last = '&append=0&content=1&tagId=&posi=&picture=&groupId=&ua=098#E1hvCpvUvbpvUpCkvvvvvjiWPLFW1jDnRLqvtjnEPmPpljimRLLhgjtbRFMhzjEVRTOCvvpvvUmmRvhvCvvvvvvRvpvhMMGvvvvCvvOvCvvvphmgvpvIMMGv/qYvvnGvvUjUphvUNQvvvACvpvQovvv2UhCv2CUvvvWiphvWQO9CvvOWvvVvJhTIvpvUvvmvKtQXQv9UvpCWh81Fvva4YExrs8TrEcqvac7Q+ulQbNotlfh0yj6Ofa1l+boJEcqvaNshVBrQpKFZARp7RAYVyO2vqbVQWl4vAWFIRfU6pwet9E7rjv==&needFold=0&_ksTS=1614312455436_436&callback=jsonp437'
url = first + str(i) + last
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0',
'referer': 'https://list.tmall.com/search_product.htm?q=%D6%B2%B4%E5%D0%E3&type=p&spm=a220m.1000858.a2227oh.d100&from=.list.pc_1_searchbutton',
'cookie': 'cna=MQY3F16TIiUCAXj0KLkDrCSr; isg=BAMDf6e9Z11oDhSkH8Y_-OInkceteJe68MstDTXgvWKY9CAWvElzCjQibgS6z--y; tfstk=c7UCBAb24pvCQyTywW1w8hf5io3CZHLI_HMLOkoQGawNk-V1i0YqhJasqdmKSf1..; l=eBPYPbzmOInY98vFKO5Cnurza779fIRV1kPzaNbMiInca1WdNeRHzNCIr97HldtfgtfU-eKzpOD6ydnM-q4LRE_ceTwhKXIpBqp9-; lid=t_1498050231982_0133; enc=pI1QRFc21Y3%2BeedHEHCwaPQQxxXEGuvHw%2BQPROnTlxuv7WPkm%2FsDxEl8DD1HVRvPSq%2BUF6522Tj3eWc3Oe%2BxqQ%3D%3D; sgcookie=E100yGUBtKmZPVt1SUzKwzJN5TgRxYduHpqQ6LEOTNjgevd9Llmpw%2BiCkCNIEFIKzep26emR9fxKMYiZeNBDsRAMnhFLBsy08lT4W%2Fd1g3G%2FCK0cS; lgc=t_1498050231982_0133; _tb_token_=e76dbbf173ee4; cookie2=18ca72ed93207c71af40832a44f69dfd; xlly_s=1; dnk=t_1498050231982_0133; uc1=existShop=false&cookie16=UtASsssmPlP%2Ff1IHDsDaPRu%2BPw%3D%3D&cookie21=U%2BGCWk%2F7p4mBoUyS4E9C&cookie15=Vq8l%2BKCLz3%2F65A%3D%3D&cookie14=Uoe1hgR2h5poXQ%3D%3D&pas=0; csg=c34b003f; _l_g_=Ug%3D%3D; unb=3334488617; cookie1=B0OtIf8tUp
天猫评论爬取&LDA主题聚类实现(全过程代码)
最新推荐文章于 2023-02-08 13:03:31 发布