爬虫之拉菲红酒评价

  • 版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/kun1280437633/article/details/80655395
# coding:utf-8

import re
import requests
'''
    分析:

    1. 爬取流程

    # 网址:https://detail.tmall.com/item.htm?spm=a220m.1000858.1000725.1.1aa2e182ticMpj&id=566515357894&areaId=310100&user_id=2807304908&cat_id=50072114&is_b=1&rn=5e6f4625e0c8fc377cfe495aee3f12bf

    # 法国拉菲酒的评价 地址url来自于js文件提取出来的请求
    url:https://rate.tmall.com/list_detail_rate.htm?itemId=566515357894&spuId=946656746&sellerId=2807304908&order=3&currentPage=1&append=0&content=1&tagId=&posi=&picture=&ua=098%23E1hvcvvbvnQvUvCkvvvvvjiPPFLwQj3RPsd91jEUPmPygjnmn2SUsjlhPLLyAj3E9phvHnsG2DqOzYswzj6J7%2FafzVFw8DiI3QhvCvmvphvPvpvhvv2MMQhCvvOvChCvvvmivpvUvvCCUdafuueEvpvVvpCmpYFOKphv8vvvphvvvvvvvvCHhQvv9vgvvhZLvvmCvvvvBBWvvvH%2BvvCHhQvvv7QEvpCW9fQ1WB0xdX9fderEd8pBHdoJJZqpgCOqrqpyCW2%2B%2Bfmt%2BeCosR9t%2BFuTWDAviXTAdX3sbSmxdXkOdegmDfesRFoNwyfvTCuwVB3%2BVFyCvvpvvhCviQhvCvvvpZptvpvhvvCvpv%3D%3D&isg=BIOD_0Fh3PRrJJHwUrnUy1iwEkcnBhEj_Wv7VbVg_eJZdKKWPckTiguG6gQ6VG8y&needFold=0&_ksTS=1528703640847_720&callback=jsonp721
    simpleurl:https://rate.tmall.com/list_detail_rate.htm?itemId=566515357894&spuId=946656746&sellerId=2807304908&order=3&currentPage=1
    
'''

class RateSpider:
    def __init__(self):
        self.base_url = "https://rate.tmall.com/list_detail_rate.htm?itemId=566515357894&spuId=946656746&sellerId=2807304908&order=3&callback=jsonp721&"
        self.info = {}


    def run(self):
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
            'Cookie': 'cna=d3pnE7dQMG4CAWVRvy3aSk40; hng=CN%7Czh-CN%7CCNY%7C156; tk_trace=1; t=8f9cc6d9a1c0af2387e31c33ecf4b694; _tb_token_=e1735eb1eb55e; cookie2=13567358448225d441d0eadb8b8c5dc4; _m_h5_tk=d3958dd9cdf03583fbea9e6d5a3fe8f1_1528703266280; _m_h5_tk_enc=d953ce5a3b073817fea24ea16a8baca2; dnk=%5Cu57641280437633; uc1=cookie14=UoTeNmawgQhUpg%3D%3D&lng=zh_CN&cookie16=V32FPkk%2FxXMk5UvIbNtImtMfJQ%3D%3D&existShop=false&cookie21=UIHiLt3xThH8t7YQoFNq&tag=8&cookie15=Vq8l%2BKCLz3%2F65A%3D%3D&pas=0; uc3=nk2=o7lp91dudQD3EdmM&id2=UojWk7em%2F3GueA%3D%3D&vt3=F8dBzr2Pafs8PwPUtXI%3D&lg2=W5iHLLyFOGW7aA%3D%3D; tracknick=%5Cu57641280437633; lid=%E5%9D%A41280437633; _l_g_=Ug%3D%3D; unb=1926861396; lgc=%5Cu57641280437633; cookie1=U%2BNrg3z7z0ezRdp1%2B7SkQdXLlDogAw6QGRt7lOXQyDc%3D; login=true; cookie17=UojWk7em%2F3GueA%3D%3D; _nk_=%5Cu57641280437633; sg=36b; csg=0b11d3d0; enc=YQrKOSM0khhrP6SosUBFkGI9OAC2DSRct8udG5cA9kyhuqldWYT7YS8xnAbd4z6OBYIglYjmlStldSSngNBJlA%3D%3D; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; x=__ll%3D-1%26_ato%3D0; whl=-1%260%260%260; JSESSIONID=7D34D3E3F060EBCFE67E273E3CA85DA7; isg=BGVlWlMbUnr5nrdWINOKFQo6dCGVXB8tz7Fds2dKBByrfoTwL_OdBJl4DOII_jHs',
            'Connection': 'keep-alive',
        }

        for i in range(1,20):

            detail_url = self.base_url + 'currentPage='+str(i)
            # print(detail_url)
            response = requests.get(detail_url, headers=headers)
            content = response.text
            detarl = re.findall('\"rateContent\":\"([^\"]+)\"?', content)
            if detarl != []:
                for i in detarl:
                    print(i)

if __name__ == '__main__':
    spider = RateSpider()
    spider.run()
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值