1. 爬虫
JD.py
import requests
from urllib.parse import quote
from urllib.parse import urlencode
from lxml import etree
import logging
import json
import time
class JDSpider:
# 爬虫实现类:传入商品类别(如手机、电脑),构造实例。然后调用getData爬取数据
def __init__(self, categlory):
self.startUrl = "https://search.jd.com/Search?keyword=%s&enc=utf-8" % (quote(categlory)) # jD起始搜索页面
self.commentBaseUrl = "https://club.jd.com/comment/productPageComments.action?"
self.headers = {
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
}
self.productsId = self.getId()
self.comtype = {0: "nagetive", 1: "medium