import scrapy
import json
from douyu.items import DouyuItem
class DySpider(scrapy.Spider):
name = ‘dy’
allowed_domains = [‘douyu.com’]
base_url = ‘http://capi.douyucdn.cn/api/v1/getVerticalRoom?limit=20&offset=’
# 对应多少间隔
offsert = 0
# 爬取的起始网址
start_urls = [base_url + str(offsert)]
def parse(self, response):
data_list = json.loads(response.body)['data']
if len(data_list) == 0:
return
for data in data_list:
item = DouyuItem()
item['name'] = data['nickname'].encode('utf-8')
item['img'] = data['vertical_src']
print(item)
yield item
# 翻页
self.offsert = 20
url = self.base_url + str(self.offsert)
# 回调
yield scrapy.Request(url, callback=self.parse, dont_filter=True)