看了几天的小甲鱼视频学python基础,居然说要用scrapy才能做成目前想爬的...
换了个详细的scrapy视频教程看点击打开链接
1、爬取网页url数据返回301、302报错
class ModianSpider(scrapy.Spider):
name = 'modian'
allowed_domains = ['modian.com']
start_urls = ['https://zhongchou.modian.com/search?key=%E9%BB%84%E5%A9%B7%E5%A9%B7']
def parse(self, response):
hrefs = response.xpath('//div[@class="myproject clearfix"]/ul/li/a/@href').extract()
for href in hrefs:
yield scrapy.Request(href, meta={'href': href}, callback=self.parse_pro_page)
def parse_pro_page(self, response):
item = ModianItem()
item['id'] = response.xpath('//div/@data-pro_id').extract()
item['title'] = response.xpath('//h3[@class="ti