import requests
from lxml import etree
from concurrent import futures
import json
import pandas as pd
class CrawlDog:
comment_headers = {
'Referer': 'https://item.jd.com/%s.html' % 12615065,
'Accept-Charset': 'utf-8',
'accept-language': 'zh,en-US;q=0.9,en;q=0.8,zh-TW;q=0.7,zh-CN;q=0.6',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/74.0.3729.169 Safari/537.36'
}
def __init__(self, keyword):
"""
初始化
:param keyword: 搜索的关键词
"""
self.keyword = keyword
self.data = pd.DataFrame()
def crawl_message(self, page):
"""
从搜索页获取相应信息并存入数据库
:param page: 搜索页的页码
"""
url = 'https://search.jd.com/Search?keyword={}&enc=utf-8&page={}&s={}'.format(self.keyword, page, (page-1)*30+1)
index_headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,'
'application/si