数据来源:
爬动漫花园的话要设置ROBOTSTXT_OBEY = False
items.py
import scrapy
class AnimeItem(scrapy.Item):
name = scrapy.Field()
sub_name = scrapy.Field()
bangumi_spider.py
import scrapy
from Bangumi.items import *
class BangumiSpider(scrapy.Spider):
name = 'BangumiAll'
allowed_domains = ['bangumi.tv']
start_urls = [
'http://bangumi.tv/anime/browser/tv/airtime/2017?sort=date',
'http://bangumi.tv/anime/browser/ova/airtime/2017?sort=date',
'http://bangumi.tv/anime/browser/movie/airtime/2017?sort=date'
]
def parse(self, response):
for li in response.xpath('//ul[@id="browserItemList&#