1.创建 .py 文件
2. pip 安装第三方依赖
pip install scrapy
3. python 文件名 // 启动
import scrapy
class DirectorySpider(scrapy.Spider):
name = 'directory_spider'
start_urls = ['http://www.xiankabao.com']
def parse(self, response):
# 提取链接
links = response.css('a::attr(href)').getall()
for link in links:
yield {'link': link}
# 运行爬虫
if __name__ == "__main__":
from scrapy.crawler import CrawlerProcess
process = CrawlerProcess()
process.crawl(DirectorySpider)
process.start()