import scrapy
from wangyipro.items import WangyiproItem
from selenium import webdriver
class WangyiSpider(scrapy.Spider):
name = 'wangyi'
#allowed_domains = ['www.xxx.com']
start_urls = ['https://news.163.com/']
models_urls=[]
bro=webdriver.Edge(executable_path ='msedgedriver.exe')
def parse(self, response):
li_list=response.xpath('//*[@id="index2016_wrap"]/div[1]/div[2]/div[2]/div[2]/div[2]/div/ul/li')
alist=[3,4]#,6,7
for index in alist:
model_url=li_list[index].xpath('./a/@href').extract_first()
self.models_urls.append(model_url)
for url in self.models_urls:
yield scrapy.Request(url,callback=self.parse_model)
def parse_model(self,response):
div_list=response.xpath('/html/body/div[1]/div[3]/div[4]/div[1]/div[1]/div/