详细教程,天猫和淘宝是一样的,在我主页里有淘宝的,在这发下天猫的
import requests
import re
import time
#需要修改的参数
#url_1
#a和b
#cookie不能用时重新修改
def set_up():
#打开商品的主页面链接
url_1 = 'https://detail.tmall.com/item.htm?id=548071255763&ali_trackid=2:mm_28347190_2425761_109058700251:1577888365_191_532209995&spm=a231o.7712113/g.1004.1&pvid=200_11.27.33.24_345_1577888359041'
#找到评论接口,构造相应页数的接口
url_2 = []
for i in range(int(page)):
a='https://rate.tmall.com/list_detail_rate.htm?itemId=548071255763&spuId=929590581&sellerId=1986899349&order=3¤tPage='
b='&append=0&content=1&tagId=&posi=&picture=&groupId=&ua=098%23E1hvMpvRvP%2BvUvCkvvvvvjiPRsS9gjnjPs5ysjrCPmPO0jEmRsd9tjlEPLcZgjlP9phv2nQwwx6rzYswzjqx7u6CvvyvmCvmCvpvRVpEvpCWvVEHvvaSIExrl8TZfvDrQjc6%2BulgEfsUln97%2B3%2B%2BaNLUACAxEcqWafmxdXIaUExrQjZL%2B3%2B%2BafmxfXkKf3qxs4V9%2Bull88oQD76OddyCvmFMMQ2GS6vvtQvvvQCvpvLEvvv2vhCv2UhvvvWvphvWmpvvvQavpv1Cuphvmvvv9bw37srSkphvC9QvvOCzB9hCvvOvUvvvphvPvpvhvv2MMsyCvvpvvvvviQhvCvvv9U8jvpvhvvpvv86CvvyvmvWmlnpvUvGtvpvhvvvvv86CvvyvmW%2BmyXhvORI%3D&needFold=0&_ksTS=1577888442636_496&callback=jsonp497'
url_2.append(a+str(i)+b)
#找到cookie
cookie=''
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0',
'Host': 'rate.tmall.com',
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding': 'gzip, deflate, br',
'Referer': url_1,
'Connection': 'keep-alive',
'Cookie' : cookie ,
'TE': 'Trailers'
}
spider(url_2,headers)
def spider(url_2,headers):
for y in range(len(url_2)):
response = requests.get(url_2[y],headers=headers,timeout=30).content.decode()
#初次评价
evaluate = re.findall(',"rateContent":"(.*?)"',response)
#分类
kinds = re.findall('"auctionSku":"(.*?)",',response)
for i in range(len(evaluate)):
#将每一页的评论添加到总评论里面
Evaluate.append({'商品的种类为:': kinds[i],'商品的初次评价为:': evaluate[i],})
print('第'+str(y+1)+'页的评论获取成功')
#延时,防止检测
time.sleep(5)
print('一共获取了'+str(len(Evaluate))+'评论')
for i in Evaluate:
print(i)
if __name__ == '__main__':
page = input('输入想获取多少页的评论:')
#存放评价的列表
Evaluate =[]
set_up()