# -*- coding: utf-8 -*-
import scrapy
from copy import deepcopy
class SuSpider(scrapy.Spider):
name = 'su'
allowed_domains = ['suning.com']
start_urls = ['http://book.suning.com/']
def parse(self, response):
div_list=response.xpath('//div[@class="menu-list"]/div[@class="menu-item"]')
a_list = response.xpath('//div[@class="menu-sub"]/div[@class="submenu-left"]')
for d,div in enumerate(div_list):
item={}
item['b_case']=div.xpath('./dl//a/text()').extract_first()
for i,a in enumerate(a_list):
if d==i:
m_title_list=a.xpath('./p/a')
for m in m_title_list:
item['m_case']=m.xpath('./text()').extract_first()
item['m_url']=m.xpath('./@href').extract_first()
yield scrapy.Request(url=item['m_url'],
callback=self.parse_m_case,
meta={'item':deepcopy(item)})
def parse_m_case(self,response):
item=response.meta.get('item')
li_list=response.xpath('//ul[@class="clearfix"]/li')
for li in li_list:
sale=li.xpath('.//div[@class=" "]//em/text()').extract_first()
print(sale)
if sale=='满125用100':
item['s_title']=li.xpath('.//p[@class="com-cnt"]/a/@title').extract_first()
item['s_url'] = li.xpath('.//p[@class="com-cnt"]/a/@href').extract_first()
item['s_money'] = li.xpath('.//p[@class="prive-tag"]/em//text()').extract_first()
print(item)
爬取苏宁图书
最新推荐文章于 2021-05-16 01:25:55 发布