# -*- coding: utf-8 -*-
import scrapy
from copy import deepcopy
import re
# https://ds.suning.com/ds/generalForTile/000000011541217219__2_0070937195,000000010567515166__2_0070096274,000000012045814624__2_0070875390,000000000648587472_,000000010567523701__2_0070221744,000000010546818691_,000000000648618455_,000000000649307424_-010-2-0070167435-1--ds000000000450.jsonp?callback=ds000000000450
# https://ds.suning.com/ds/generalForTile/000000000649307424_-010-2-0070167435-1--ds.jsonp
# https://pas.suning.com/nsenArrivalPrice_000000011541217219_0070937195_010_0100100_1_24.60_24.60_000000011541217219_9999_0_0__01_______0_0___0_1_1-2__.html?callback=arrivalPrice&_=1595664107117
# 满减
# https://ds.suning.com/ds/generalForTile/000000011669848725_-010-2-0071038629-1--.jsonp
class SuSpider(scrapy.Spider):
name = 'su'
allowed_domains = ['suning.com']
start_urls = ['http://book.suning.com/']
def parse(self, response):
div_list=response.xpath('//div[@class="menu-list"]/div[@class="menu-item"]')
a_list = response.xpath('//div[@class="menu-sub"]/div[@class="submenu-left"]')
for d,div in enumerate(div_list):
item={}
item['b_case']=div.xpath('./dl//a/text()').extract_first()
for i,a in enumerate(a_list):
if d==i:
m_title_list=a.xpath('./p/a')
for m in m_title_list:
item['m_case']=m.xpath('./text()').extract_first()
item['m_url']=m.xpath('./@href').extract_first()
yield scrapy.Request(url=item['m_url'],
callback=self.parse_m_case,
meta={'item':deepcopy(item)})
def parse_m_case(self,response):
item=response.meta.get('item')
li_list=response.xpath('//ul[@class="clearfix"]/li')
for li in li_list:
sale=li.xpath('.//div[@class="res-opt"]/a/@href').extract_first()
# https://product.suning.com/
item['url_1']='https:'+li.xpath('.//div[@class="img-block"]/a/@href').extract_first()
# javascript:addMiniShoppingCart('000000000652743445',652743445,'0070167435');
numid=re.findall(r'javascript:addMiniShoppingCart\((.*?)\)',sale)[0].split(',')
# '000000011670747946',11670747946,'0071019621'
munb=eval(list(numid)[0])
num=eval(list(numid)[2])
url2='https://ds.suning.com/ds/generalForTile/'+munb+'_-010-2-'+num+'-1--.jsonp'
yield scrapy.Request(
url=url2,
callback=self.parse_page,
meta={'item':deepcopy(item)}
)
def parse_page(self,response):
item=response.meta.get('item')
item['youhui']=re.findall(r'full":"(.*?)"',response.text)
if item['youhui']:
# * . ? + $ ^ [ ] ( ) { } | \ /
print(item['url_1'])
获取苏宁有优惠的图书
最新推荐文章于 2023-06-07 06:00:00 发布