scrapy基本结构、爬取流程、定义随机请求头、抓取异步网页请参考:scrapy框架–基础结构加爬取异步加载数据的网址项目完整实例
- items.py
class BooksItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
book_name = scrapy.Field() # 图书名
book_star = scrapy.Field() # 图书评分
book_pl = scrapy.Field() # 图书评论数
book_author = scrapy.Field() # 图书作者
book_publish = scrapy.Field() # 出版社
book_date = scrapy.Field() # 出版日期
book_price = scrapy.Field() # 图书价格
- spider.py
import scrapy
from scrapy import Selector
from books.items import BooksItem
class BookspiderSpider(scrapy.Spider):
name = 'bookspider'
allowed_domains = ['douban.com']
start_urls = ['https://book.douban.com/tag/%E7%BB%8F%E6%B5%8E%E5%AD%A6']
# 自定义start_requests方法 多页爬取
def