import scrapy
class SpiderHouseSpider(scrapy.Spider):
name = 'spider_house'
allowed_domains = ['douguo.com']
start_urls = ['https://www.douguo.com/caipu/%E5%AE%B6%E5%B8%B8%E8%8F%9C']
def parse(self, response, **kwargs):
data_list = response.xpath('//ul[@class="cook-list"]//li')
for data in data_list:
title = data.xpath("./div[1]/a/text()").extract_first().replace('㊙','')
ingredients = data.xpath("./div[1]/p/text()").extract_first()
rating = data.xpath("./div[1]/div[1]/span//text()").extract_first()
author = data.xpath("./div[1]/div[2]/a[1]/text()").extract_first()
print(title,ingredients,rating,author)
打开cmd ,cd 到项目路径下,使用 scrapy crawl 代码文件(注意:不加 .py)运行文件