import scrapy
class JDCommentsSpider(scrapy.Spider):
# 每个爬虫的名称 不能重复 用于启动爬虫
name = 'JDCommentsSpider'
# 爬虫所能允许"活动范围"
allowed_domains = ['jd.com']
# 手动启动爬虫
def start_requests(self):
url_format = 'https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100008348542&score=0&sortType=5&page=1&pageSize=10&isShadowSku=0&rid=0&fold=1'
for page in range(1):
url = url_format.format(page=page)
# 手动的发起请求
yield scrapy.Request(url=url, callback=self.my_parse)
# 定义自己的解析方式
def my_parse(self, response):
# 解析数据
# print(response.text)
pass
from scrapy.cmdline import execute
if __name__ == '__main__':
execute("scrapy crawl JDCommentsSpider".split())
# Define here the models for your scraped items
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/items.html
import scrapy
class ScrapydemoItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
pass
# 明确爬取的目标结构
class JDCommentItem(scrapy.Item):
id = scrapy.field()
nickname = scrapy.field()
score = scrapy.field()
content = scrapy.field()
creationTime = scrapy.field()