python 爬取京东商品评论

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import requests
import re
import json
import time
import xlwt
import random

#
#
#配置表格
#不需要明白是干啥的
#有下面4行代码就可以往表格写中文了
#
style=xlwt.XFStyle()
font=xlwt.Font()
font.name='SimSun'
style.font=font

#创建一个表格
w=xlwt.Workbook(encoding='utf-8')
#添加个sheet
ws=w.add_sheet('sheet 1',cell_overwrite_ok=True)
#当前写入表格到第 row行
row=1
#
#写入表格头
#
ws.write(0,0,'content')
ws.write(0,1,'userClientShow')
ws.write(0,2,'creationTime')
ws.write(0,3,'userLevelName')
ws.write(0,4,'productColor')
ws.write(0,5,'userLevelId')
ws.write(0,6,'score')
ws.write(0,7,'referenceName')
ws.write(0,8,'referenceTime')
ws.write(0,9,'isMobile')
ws.write(0,10,'nickname')

#
#接受一个json对象
#将内容写进表格
#一次一页评论
#
def write_json_to_xls(dat):

    global row
    for comment in dat['comments']:
        ws.write(row,0,comment['content'])
        ws.write(row,1,comment['userClientShow'])
        ws.write(row,2,comment['creationTime'])
        ws.write(row,3,comment['userLevelName'])
        ws.write(row,4,comment['productColor'])
        ws.write(row,5,comment['userLevelId'])
        ws.write(row,6,comment['score'])
        ws.write(row,7,comment['referenceName'])
        ws.write(row,8,comment['referenceTime'])
        ws.write(row,9,comment['isMobile'])
        ws.write(row,10,comment['nickname'])
        row+=1

#
#
# 循环获取数据
#
#
hearders = {"Referer": "https://item.jd.hk/2990360.html",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'"}
auto_jquery = 8809536
for i in range(1,10+1):
    #url='https://club.jd.com/comment/productPageComments.action?productId=1475512465&score=0&sortType=5&page=%d&pageSize=100&isShadowSku=0&fold=' % i
    #url = 'https://sclub.jd.com/comment/productPageComments.action?callback=jQuery2663266&productId=2990360&score=2&sortType=5&page=%d&pageSize=10&pin=null&_=1563330030798' % i
    url = 'https://sclub.jd.com/comment/productPageComments.action'
    #print(url)
    try:

        auto_jquery = auto_jquery+1
        jquery = 'jQuery%d' % auto_jquery
        times = random.randint(100,999)
        print times
        auto_tims = int(time.time())
        true_string = '%d%d' %(auto_tims, times)
        print true_string


        params = {'callback':jquery,'productId':'2990360','sortType':'5','page':i,'pageSize':'10','pin':'null','_':true_string,'score':'2'}
        json_req = requests.get(url,params=params,headers=hearders)
        print('11111111111111111111111')
        print (json_req.url)
        print (json_req.text)
        print('22222222222222222222222')

        flag = json_req.text.split('(')
        flag_two = flag[1].split(')')
        print flag_two[0];
        print ('666666666666666666')
        json_flag = json.loads((flag_two[0]))
        print json_flag['comments']
        #dat = json_req.json()
        write_json_to_xls(json_flag)
        print(u'写入第%d页数据'%i)
    except Exception as e:
       print(u'获取数据失败数据',e)
    time.sleep(0.5)


#将数据存进表格
w.save('result.xls')

转载于:https://my.oschina.net/u/588516/blog/3075440

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值