python爬取商品评论_python 爬取京东商品评论

#!/usr/bin/python

# -*- coding: UTF-8 -*-

import requests

import re

import json

import time

import xlwt

import random

#

#

#配置表格

#不需要明白是干啥的

#有下面4行代码就可以往表格写中文了

#

style=xlwt.XFStyle()

font=xlwt.Font()

font.name='SimSun'

style.font=font

#创建一个表格

w=xlwt.Workbook(encoding='utf-8')

#添加个sheet

ws=w.add_sheet('sheet 1',cell_overwrite_ok=True)

#当前写入表格到第 row行

row=1

#

#写入表格头

#

ws.write(0,0,'content')

ws.write(0,1,'userClientShow')

ws.write(0,2,'creationTime')

ws.write(0,3,'userLevelName')

ws.write(0,4,'productColor')

ws.write(0,5,'userLevelId')

ws.write(0,6,'score')

ws.write(0,7,'referenceName')

ws.write(0,8,'referenceTime')

ws.write(0,9,'isMobile')

ws.write(0,10,'nickname')

#

#接受一个json对象

#将内容写进表格

#一次一页评论

#

def write_json_to_xls(dat):

global row

for comment in dat['comments']:

ws.write(row,0,comment['content'])

ws.write(row,1,comment['userClientShow'])

ws.write(row,2,comment['creationTime'])

ws.write(row,3,comment['userLevelName'])

ws.write(row,4,comment['productColor'])

ws.write(row,5,comment['userLevelId'])

ws.write(row,6,comment['score'])

ws.write(row,7,comment['referenceName'])

ws.write(row,8,comment['referenceTime'])

ws.write(row,9,comment['isMobile'])

ws.write(row,10,comment['nickname'])

row+=1

#

#

# 循环获取数据

#

#

hearders = {"Referer": "https://item.jd.hk/2990360.html",

"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'"}

auto_jquery = 8809536

for i in range(1,10+1):

#url='https://club.jd.com/comment/productPageComments.action?productId=1475512465&score=0&sortType=5&page=%d&pageSize=100&isShadowSku=0&fold=' % i

#url = 'https://sclub.jd.com/comment/productPageComments.action?callback=jQuery2663266&productId=2990360&score=2&sortType=5&page=%d&pageSize=10&pin=null&_=1563330030798' % i

url = 'https://sclub.jd.com/comment/productPageComments.action'

#print(url)

try:

auto_jquery = auto_jquery+1

jquery = 'jQuery%d' % auto_jquery

times = random.randint(100,999)

print times

auto_tims = int(time.time())

true_string = '%d%d' %(auto_tims, times)

print true_string

params = {'callback':jquery,'productId':'2990360','sortType':'5','page':i,'pageSize':'10','pin':'null','_':true_string,'score':'2'}

json_req = requests.get(url,params=params,headers=hearders)

print('11111111111111111111111')

print (json_req.url)

print (json_req.text)

print('22222222222222222222222')

flag = json_req.text.split('(')

flag_two = flag[1].split(')')

print flag_two[0];

print ('666666666666666666')

json_flag = json.loads((flag_two[0]))

print json_flag['comments']

#dat = json_req.json()

write_json_to_xls(json_flag)

print(u'写入第%d页数据'%i)

except Exception as e:

print(u'获取数据失败数据',e)

time.sleep(0.5)

#将数据存进表格

w.save('result.xls')

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值