先预览下数据
贴上代码
import time
import requests
import re
import json
from jd_comment import db_util
class CommentSpider:
"""
爬取靳东
"""
commentUrl = "https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&" \
"productId={0}&score=0&sortType=5&page={1}&pageSize=10&isShadowSku=0&rid=0&fold=1"
origin_reffer = "https://item.jd.com/{0}.html"
sleep_seconds = 2
def __init__(self, productId):
self.productId = productId
def build_headers(self, next_page_cookie=None):
Referer = self.origin_reffer.format(self.productId)
if next_page_cookie is None:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36",
"Referer": Referer,
"Host": "club.jd.com",
"Cookie": "unpl=V2_ZzNtbRFfQBJzWxQEfB9UAWJQQF9KBBQVdVhOXHpOXwJkUUBfclRCFnUUR1RnGFQUZAEZXkJcRhZFCEdkeBBVAWMDE1VGZxBFLV0CFSNGF1wjU00zQwBBQHcJFF0uSgwDYgcaDhFTQEJ2XBVQL0oMDDdRFAhyZ0AVRQhHZHsYXgdgBhRVSlBzJXI4dmR9HlsCYQEiXHJWc1chVE9UeR1fBioDE19AUEYTfQBBZHopXw%3d%3d; __jdv=76161171|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_b9267bca67954bc39bf11a990f262cc3|1610205703260; __jdu=1033647996; areaId=2; PCSYCityID=CN_310000_310100_0; shshshfpb=wVjD8v2Dr7inEEgCOGiQ9kQ%3D%3D; shshshfpa=6792afdc-4156-d1ed-8c5a-86d979144193-1591804178; __jda=122270672.1033647996.1610205703.161020