python3.6 爬取淘宝评论(大衣+内衣)

'''
Created on 2017年12月3日

@author: Geng
'''
import requests
import time
import random
import re
# 内衣
# url = "https://rate.tmall.com/list_detail_rate.htm?itemId=547773818796&spuId=842179060&sellerId=907782288&order=3&append=0&content=1&tagId=&posi=&picture=&ua=098%23E1hvB9vnvPOvUvCkvvvvvjiPPLqWzjY8RLs9sj3mPmPWljl8RLzvljtWRFqWAjlW9phvHnQGNVinzYswzv5b7MJgzRjw9HuCdphvmpvUG9U4V9v1agwCvvpvCvvv2QhvCvvvMMGCvpvVvmvvvhCvmphvLvA4dQvjEGLIAXZTKFEw9Exrs8TJEcqUAj7Q%2Bul1occ63Wv7rjlEgnLv%2B2Kz8Z0vQRAn%2BbyDCwFIAXZTKFEw9Exr08TJnDeDyO2vHd8tvpvIvvvvvhCvvvvvvUEpphvvs9vv9DCvpvQovvmmZhCv2jhvvUEpphvWw4yCvv9vvUvQORQH1UyCvvOUvvVvayptvpvhvvvvv8wCvvpvvUmmdphvmpvWrUpGPvC1nLyCvvpvvvvv&isg=AurqQavURICRWchqI2pb1fXnO1CGWGXUUQpYDnSi0z2Kp4lhXeg-xXOVQeVA&needFold=0"  
# 大衣
url = "https://rate.tmall.com/list_detail_rate.htm?itemId=538581707711&spuId=700193432&sellerId=761456278&order=3&append=0&content=1&tagId=&posi=&picture=&ua=098%23E1hvwvvWvRyvUvCkvvvvvjiPPLqUtjtnRsMvgjEUPmP9AjECR2sO6jrPPFsW1jnm3QhvCvmvphm5vpvhvvCCBvhCvvOvChCvvvvEvpCW9a8ByBzhV4g7%2B3%2BuAj7JVXu4X9nr1CuKHdUf8rCl5F%2FAdcH2afmAdX9XjomxfBeKhqUf8rClHd8rejpiYPeAdX9XjLVxfXeKHs9lBdyCvm9vvhCvvvvvvvvvBJZvvUChvvCHtpvv9ZUvvhcDvvmCb9vvBJZvvUhKuphvmvvvpoH%2BZgApkphvC9hvpyPOAvGCvvpvvPMMRphvCvvvphmrvpvEvvV%2Busyv9X6c9phvHHiaTHk9zHi4c4uOts1N7rH4NYGBRphvCvvvphv%3D&isg=AlJSCV-sXM5zGqAyy2IzjS1voxg-YF2cubLwdhyrjoXnL_YpBPDWDbkN6b3o&needFold=0"

for page in range(1, 99):
    t = str(time.time()*1000).split('.')
    # print(t[0], t[1])
    param = {
        'currentPage' : page,
        '_ksTS' : '{}_{}'.format(str(t[0]), str(t[1])),
        'callback' : 'jsonp{}'.format(str(int(t[1]) + 1))
        }
    # 伪装火狐
    kv = {'User-Agent':'Mozilla/5.0'}
    # 随机休眠
    time.sleep(random.random())
    response = requests.get(url, params = param, headers = kv)
    data = response.text
    text = []
    
    info1 = re.findall(r'\"auctionSku\":\".*?\"', data)
    info2 = re.findall(r'\"rateContent\":\".*?\"', data)
    info3 = re.findall(r'\"rateDate\":\".*?\"', data)
    #print(info2[0])
    for i in range(len(info1)):
        text.append((info1[i].split("\"")[3], info2[i].split("\"")[3], info3[i].split("\"")[3]))
    
    # print(info1[0].split("\"")[3])
    print(text)

 

转载于:https://my.oschina.net/gain/blog/1584571

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值