wzry.py
import scrapy
from LearnScrapy.items import HeroItem
class WzrySpider(scrapy.Spider):
name = 'wzry'
allowed_domains = ['pvp.qq.com']
start_urls = ['https://pvp.qq.com/web201605/herolist.shtml']
hero_detail_base_url = "https://pvp.qq.com/web201605/"
def parse(self, response):
# print(response)
hero_list = response.xpath("//div[contains(@class, 'herolist-content')]/ul[contains(@class, 'herolist')]/li/a/@href").extract()
# print(hero_list)
# for hero_detail in hero_list:
# yield scrapy.Request(url=self.hero_detail_base_url + hero_detail, callback=self.parse_hero_detail, meta={"msg": "ok"})
# yield scrapy.Request(url=response.urljoin(hero_detail), callback=self.parse_hero_detail, meta={"msg": "ok"})
# yield response.follow(url=hero_detail, callback=self.parse_hero_detail, meta={"msg": "ok"})
requests = response.follow_all(urls=hero_list, callback=self.parse_hero_detail, meta={
"msg": "ok"}