#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2018-03-01 21:53:16
# Project: caipu
from pyspider.libs.base_handler import *
class Handler(BaseHandler):
crawl_config = {
}
@every(minutes=24 * 60)
def on_start(self):
self.crawl('http://www.ttmeishi.com/CaiXi/JiaChangCai/', callback=self.index_page)
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
for each in response.doc('.cx_liebiao a').items():
self.crawl(each.attr.href, callback=self.detail_page)
@config(priority=2)
def detail_page(self, response):
imgs = response.doc('.c_bz_img img').items()
contexts = response.doc('.c_bz_neirong').items()
try:
while contexts:
print (contexts.__next__().text())
print (imgs.__next__().attr.src)
except StopIteration as e:
print(e)
return {
"title": response.doc('.content h1').text(),
"biaoqian":response.doc('.c_leibie_a').text(),
"cailiao":response.doc('.c_leibie_sc').text()
}
python 爬虫实战(二)使用pyspider爬取菜谱及图片
最新推荐文章于 2024-02-20 21:31:20 发布