catalog
- 返回爬取网站的html
response.text
- 在items.py中定义爬取内容的数据结构
class QuotesItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
text = scrapy.Field()
author = scrapy.Field()
tags = scrapy.Field()
- css选择器用法
quotes = response.css('.quote')
- 输出标签中的文本内容
text = quote.css('.quote::text')
- 取标签中的第一个值
author = quote.css('.author::text').extract_first()
- 取标签中的所有值
tags = quote.css('.tags .tag::text').extract()
- 网站调试
scrapy shell quotes.toscrape.com