模拟淘宝搜索
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# 创建浏览器驱动对象
driver = webdriver.Firefox()
driver.get('https://www.taobao.com/')
# 显式等待,设置timeout
wait = WebDriverWait(driver, 9)
# 判断输入框是否加载
input = wait.until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, '#q')))
# 判断搜索按钮是否加载
submit = wait.until(
EC.element_to_be_clickable(
(By.CSS_SELECTOR, '.btn-search')))
# 输入搜索词,点击搜索按钮
input.send_keys('Python3网络爬虫')
submit.click()
# 关闭浏览器驱动
driver.quit()
复制CSS选择器
scrapy对接selenium
- 修改
middlewares.py
中的DownloaderMiddleware
- 修改配置文件
settings.py
,line55
# -*- coding: utf-8 -*-
from scrapy import signals
from scrapy.http import HtmlResponse
from selenium import webdriver
class ADownloaderMiddleware:
@classmethod
def from_crawler(cls, crawler):
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_request(self, request, spider):
driver = webdriver.Firefox()
driver.get(request.url)
page_source = driver.page_source
driver.quit()
return HtmlResponse(url=request.url, body=page_source, request=request, encoding='utf-8', status=200)
def process_response(self, request, response, spider):
return response
def spider_opened(self, spider):
spider.logger.info('Spider opened: %s' % spider.name)