爬去京东某手机的评论
import urllib.request
import urllib.parse
import re
import json
import jsonpath
import time
item_list = []
def main():
start_page = int(input("开始页面:"))
end_page = int(input("结束页面:"))
for page in range(start_page, end_page):
url = 'https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv34153&productId=100000177760&score=0&sortType=5&page='+str(page)+'&pageSize=10&isShadowSku=0&rid=0&fold=1'
header = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64;'
' zh-CN; rv:1.9.2.10) Gecko/20100922'
' Ubuntu/10.10 (maverick) Firefox/3.6.10'
}
request = urllib.request.Request(url=url,headers=header)
json_text = urllib.request.urlopen(request).read().decode('gbk')
json_text = re.sub(r'\(','',json_text)
json_text = re.sub(r'\)','',json_text)
json_text = re.sub(r';','',json_text)
json_text = re.sub(r'fetchJSON_comment98vv34153','',json_text)
obj = json.loads(json_text)
'''
contents = jsonpath.jsonpath(obj, '$..content')
print(contents)
'''
comments_list = obj['comments']
for comment in comments_list:
'''用户头像'''
face = jsonpath.jsonpath(comment, '$..userImage')
'''用户名'''
userName = jsonpath.jsonpath(comment, '$..nickname')
'''商品图片'''
commodityImage = jsonpath.jsonpath(comment, '$..imgUrl')
'''商品视频'''
commodityVideos = jsonpath.jsonpath(comment, '$.videos..remark')
'''评论'''
content = jsonpath.jsonpath(comment, '$..content')[0]
'''手机信息'''
phoneInformation = jsonpath.jsonpath(comment, '$..referenceName')
'''将评论信息保存到字典中'''
item = {
'用户头像':face,
'用户名':userName,
'评论图片':commodityImage,
'评论视频':commodityVideos,
'评论':content,
'手机信息':phoneInformation
}
item_list.append(item)
time.sleep(3)
if __name__ == "__main__":
main()
string = json.dumps(item_list, ensure_ascii=False)
with open('JD.txt','w',encoding='utf8') as fp:
fp.write(string)