python爬虫 - 京东评论

最新推荐文章于 2024-08-24 17:03:21 发布

kyle-fang

最新推荐文章于 2024-08-24 17:03:21 发布

阅读量390

点赞数 2

分类专栏： Python爬虫 python 文章标签： python 产品设计

本文链接：https://blog.csdn.net/fangweijiex/article/details/103748404

版权

python 同时被 2 个专栏收录

72 篇文章 6 订阅

订阅专栏

Python爬虫

12 篇文章 1 订阅

订阅专栏

爬去京东某手机的评论

import urllib.request
import urllib.parse
import re
import json
import jsonpath
import time

item_list = []

def main():
    start_page = int(input("开始页面："))
    end_page = int(input("结束页面："))
    for page in range(start_page, end_page):
        url = 'https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv34153&productId=100000177760&score=0&sortType=5&page='+str(page)+'&pageSize=10&isShadowSku=0&rid=0&fold=1'

        header = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64;'
                            ' zh-CN; rv:1.9.2.10) Gecko/20100922'
                            ' Ubuntu/10.10 (maverick) Firefox/3.6.10'
                }
        request = urllib.request.Request(url=url,headers=header)
        json_text = urllib.request.urlopen(request).read().decode('gbk')
        #print(json_text)
        #将json两边的小括号去掉
        json_text = re.sub(r'\(','',json_text)
        json_text = re.sub(r'\)','',json_text)
        json_text = re.sub(r';','',json_text)
        json_text = re.sub(r'fetchJSON_comment98vv34153','',json_text)
        #print(json_text)
        obj = json.loads(json_text)
        '''
        contents = jsonpath.jsonpath(obj, '$..content')
        print(contents)
        '''
        comments_list = obj['comments']
        #print(contents_list)
        for comment in comments_list:
            '''用户头像'''
            face = jsonpath.jsonpath(comment, '$..userImage')
            #print(face)
            '''用户名'''
            userName = jsonpath.jsonpath(comment, '$..nickname')
            #print(userName)
            '''商品图片'''
            commodityImage = jsonpath.jsonpath(comment, '$..imgUrl')
            #print(commodityImage)
            '''商品视频'''
            commodityVideos = jsonpath.jsonpath(comment, '$.videos..remark')
            #print(commodityVideos)
            '''评论'''
            content = jsonpath.jsonpath(comment, '$..content')[0]
            #print(content)
            '''手机信息'''
            phoneInformation = jsonpath.jsonpath(comment, '$..referenceName')
            #print(phoneInformation)
            '''将评论信息保存到字典中'''
            item = {
            '用户头像':face,
            '用户名':userName,
            '评论图片':commodityImage,
            '评论视频':commodityVideos,
            '评论':content,
            '手机信息':phoneInformation
            }
            #print(item)
            item_list.append(item)

        time.sleep(3)
if __name__ == "__main__":
    main()
    string = json.dumps(item_list, ensure_ascii=False)
    #保存到文件中
    with open('JD.txt','w',encoding='utf8') as fp:
        fp.write(string)