python的selenium模块_Python中selenium模块

目录:

介绍安装

基本使用

元素交互操作

项目练习

一、安装介绍

介绍

selenium是一个自动化测试工具,爬虫中使用它主要是为了解决requests无法直接执行JavaScript代码的问题;

selenium本质是通过驱动浏览器,完全模拟浏览器的操作,例如:跳转、输入、点击、下拉等,来拿网页渲染之后的结果,可支持多种浏览器;

安装

pip3 install  selenium

下载chromdriver.exe放到python安装路的scripts目录中  版本为2.29

验证安装是否成功:

from selenium import webdriver

driver = webdriver.Chrome() #弹出浏览器

driver.get("https://www.baidu.com")

print(driver.page_source)#输出页面源码

selenium3默认支持webdriver是Firfox,而Firefox需要安装geckodriver

下载链接:https://github.com/mozilla/geckodriver/releases

无界面浏览器Phantomjs

下载Phantomjs , 解压后把phantomjs.exe所在的bin目录放到环境变量

下载链接:http://phantomjs.org/download.html

验证环境

C:\Users\Administrator>phantomjs

phantomjs> console.log('egon gaga')

egon gaga

undefined

验证安装:

from selenium import webdriver

driver = webdriver.PhantomJS()

driver.get("https://www.baidu.com")

print(driver.page_source)#输出页面源码

二、基本使用

需求:百度搜索美女

#!/usr/bin/env python#-*- coding: utf-8 -*-

__author__ = 'tian'

__data__ = '2018/4/16 14:45'

from selenium importwebdriverfrom selenium.webdriver.common.by import By #按照什么方式查找,

from selenium.webdriver.common.keys import Keys #键盘按键操作

from selenium.webdriver.support importexpected_conditions as ECfrom selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素

browser=webdriver.Chrome()try:

browser.get('https://www.baidu.com')

input_tag= browser.find_element_by_id('kw')

input_tag.send_keys('美女')

input_tag.send_keys(Keys.ENTER)#输入回车

wait=WebDriverWait(browser,10)

wait.until(EC.presence_of_all_elements_located((By.ID,'content_left')))#content_left为百度搜索成功后美女图片列表div

print(browser.page_source)#打印页面内容

print(browser.current_url)#打印当前页面的url

print(browser.get_cookies())#打印cookies

finally:

browser.close()

百度输入美女进行搜索

获取标签属性

from selenium import webdriver

from selenium.webdriver import ActionChains

from selenium.webdriver.common.by import By #按照什么方式查找,By.ID ,By.CSS_SELECTOR

from selenium.webdriver.common.keys import Keys #键盘按键操作

from selenium.webdriver.support import expected_conditions as EC

from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素

import time

driver = webdriver.Chrome()

driver.get('https://www.amazon.cn/')

wait = WebDriverWait(driver,10)

wait.until(EC.presence_of_all_elements_located((By.ID,'cc-lm-tcgShowImgContainer')))

tag = driver.find_element(By.CSS_SELECTOR,'#cc-lm-tcgShowImgContainer img')

#获取标签属性

print(tag.get_attribute('src'))

#获取标签ID,位置,名称,

print(tag.id)

print(tag.location)

print(tag.tag_name)

print(tag.size)

driver.close()

等待元素被加载

selenium只能模拟浏览器的行为,而浏览器解析网页需要时间(执行css,js),一些元素可以需要很长一段时间才能加载出来,为了保证能查找到元素,必须等待。

等待方式分两种:

隐式等待:implicitly_wait()    显式等待某个元素别加载 WebDriverWait(dirver,10)

元素交互操作

点击和清空

from selenium import webdriver

from selenium.webdriver import ActionChains

from selenium.webdriver.common.by import By #按照什么方式查找,By.ID ,By.CSS_SELECTOR

from selenium.webdriver.common.keys import Keys #键盘按键操作

from selenium.webdriver.support import expected_conditions as EC

from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素

import time

driver = webdriver.Chrome()

driver.get('https://www.amazon.cn/')

wait = WebDriverWait(driver,10)

input_tag = wait.until(EC.presence_of_all_elements_located((By.ID,'twotabsearchtextbox')))

button = driver.find_element_by_css_selector('#twotabsearchtextbox')

button.send_keys('iphone8')

import time

time.sleep(3)

input_tag_1 = driver.find_element_by_id('twotabsearchtextbox')

input_tag_1.clear()

input_tag_1.send_keys('iphone7plus')

button = driver.find_element_by_css_selector('#twotabsearchtextbox')

button.send_keys(Keys.ENTER)

driver.close()

Action Chains 联动

from selenium import webdriver

from selenium.webdriver import ActionChains

from selenium.webdriver.common.by import By #按照什么方式查找,By.ID ,By.CSS_SELECTOR

from selenium.webdriver.common.keys import Keys #键盘按键操作

from selenium.webdriver.support import expected_conditions as EC

from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素

import time

driver = webdriver.Chrome()

driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')

wait = WebDriverWait(driver,3)

try:

driver.switch_to_frame('iframeResult') #切换到iframeResult

sourse = driver.find_element_by_id('droppable')

target = driver.find_element_by_id('draggable')

# 方式一:基于同一个动作连串执行

actions = ActionChains(driver) #拿到动作链对象

actions.drag_and_drop(sourse,target)

actions.perform()

#方式二:不同的动作链,每次移动的位置都不同

# ActionChains(driver).click_and_hold(sourse).perform()

# distance = target.location['x']-sourse.location['x']

# track = 0

# while track < distance:

# ActionChains(driver).move_by_offset(xoffset=2,yoffset=0).perform()

# track+=2

# ActionChains(driver).release().perform()

time.sleep(10)

finally:

driver.close()

cookies

from selenium import webdriver

driver = webdriver.Chrome()

driver.get('https://www.zhihu.com/explore')

# driver.add_cookie({}) #添加cookie

print(driver.get_cookies())

driver.close()

选项卡:切换选项卡

from selenium import webdriver

import time

driver = webdriver.Chrome()

driver.get('https://www.baidu.com')

driver.execute_script('window.open()')

print(driver.window_handles) #获取所有的选项卡

time.sleep(10)

driver.switch_to_window(driver.window_handles[1])

driver.get('https://www.taobao.com')

time.sleep(10)

driver.switch_to_window(driver.window_handles[0])

driver.get('https://www.sina.com.cn')

driver.close()

time.sleep(10)

例子:

from selenium import webdriver

import time

driver = webdriver.Chrome()

driver.get('https://www.baidu.com/')

driver.maximize_window()

driver.implicitly_wait(3)

driver.find_element_by_css_selector('#u1 >.lb').click()

driver.find_element_by_link_text("立即注册").click()

driver.switch_to_window(driver.window_handles[0])

# ['CDwindow-406d8bcd-34ce-4ea3-ab1a-3853a8b4fddf', 'CDwindow-37fc67c6-f3a3-4b02-b887-d0dd1973db5d']

driver.get('https://www.baidu.com/')

time.sleep(3)

异常处理

from selenium import webdriver

from selenium.common.exceptions import TimeoutException,NoSuchFrameException

try:

driver = webdriver.Chrome()

driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')

driver.switch_to_frame('iframeResult')

except TimeoutException as e:

print(e)

except NoSuchFrameException as e:

print(e)

finally:

driver.close()

项目练习

1、126发送邮件

from selenium import webdriver

from selenium.webdriver import ActionChains

from selenium.webdriver.common.by import By #按照什么方式查找,By.ID ,By.CSS_SELECTOR

from selenium.webdriver.common.keys import Keys #键盘按键操作

from selenium.webdriver.support import expected_conditions as EC

from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素

import time

driver = webdriver.Chrome()

try:

driver.get('https://www.126.com/')

wait = WebDriverWait(driver,5)

frame = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#x-URS-iframe'))) #等着#x-URS-iframe显示完毕

driver.switch_to_frame(frame)

wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'.m-container'))) #form表单中div加载完成

input_user = driver.find_element_by_name('email').send_keys('test_tx')

input_pwd = driver.find_element_by_name('password').send_keys('xxxxooossoo')

# driver.find_element_by_css_selector("#dologin").click()

driver.find_element_by_css_selector("#dologin").send_keys(Keys.ENTER)

wait.until(EC.presence_of_element_located((By.ID,'dvNavTop')))

write_msg = driver.find_elements_by_css_selector("#dvNavTop li")[1]

write_msg.click()

wait.until(EC.presence_of_element_located((By.CLASS_NAME,'tH0')))

driver.find_element_by_class_name('nui-editableAddr-ipt').send_keys('352932341@qq.com')

title =driver.find_element_by_css_selector('.dG0 .nui-ipt-input')

title.send_keys('测试啊')

frame = wait.until(EC.presence_of_element_located((By.CLASS_NAME,'APP-editor-iframe')))

driver.switch_to_frame(frame)

driver.find_element(By.CSS_SELECTOR,'body').send_keys('发送成功,加工资了')

driver.switch_to_default_content()

driver.find_element_by_class_name('nui-toolbar-item').click()

time.sleep(100)

except Exception as e:

print(e)

finally:

driver.close()

京东商城

from selenium import webdriver

from selenium.webdriver.common.keys import Keys

import time

def get_goods(driver):

try:

goods = driver.find_elements_by_css_selector('.gl-item')

for good in goods:

detail_url = good.find_element_by_tag_name('a').get_attribute('href')

p_name = good.find_element_by_css_selector('.p-name em').text.replace('\n','')

price = good.find_element_by_css_selector('.p-price i').text

p_commit = good.find_element_by_css_selector('.p-commit a').text

msg = '''

商品 :{0}

链接 : {1}

价钱 : {2}

评论 : {3}

'''.format(p_name,price,p_commit)

print(msg,end='\n\n')

button = driver.find_element_by_css_selector(".pn-next em").click()#下一页

time.sleep(2)

get_goods(driver)

except Exception:

pass

def spider(url,keyword):

driver = webdriver.Chrome()

driver.get(url)

driver.implicitly_wait(3)

try:

input_tage = driver.find_element_by_css_selector("#key")

input_tage.send_keys(keyword)

input_tage.send_keys(Keys.ENTER)

get_goods(driver)

finally:

driver.close()

if __name__ == '__main__':

spider('https://www.jd.com/',keyword='iPhone8手机')

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值