插件下载
selenium安装:pip install selenium即可自动安装selenium
geckodriver下载:https://github.com/mozilla/geckodriver/releases
主要看Firefox浏览器、Selenium、geckodriver三个所要求的版本
几个环境变量
1.全局path下增加firefox的路径
2.下载解压后,将geckodriver.exe 发到Python的安装目录的Scripts文件夹下,例如
python版本:D:\python\Scripts 。
Anaconda3版本: D:\Program Files\Anaconda3\Scripts
然后再将Python的安装目录添加到系统环境变量的Path下面。
实例
from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import time
#使用火狐浏览器,需要安装相应插件
#Firefox的驱动geckodriver
driver = webdriver.Firefox()
#模拟登陆
driver.get('https://XX.XXXX.XXX/cas/login')
driver.implicitly_wait(5)
driver.find_element_by_id('username').clear()
driver.find_element_by_id('username').send_keys('XXXXXXXX')
driver.find_element_by_id('password').clear()
driver.find_element_by_id('password').send_keys('XXXXXXXX')
driver.find_element_by_class_name('btn-submit').click()
driver.implicitly_wait(15)
#手动赋值首页
driver.get('https://XX.XXXX.XX/XXX/XXXX/XXX.XXX')
author = driver.find_element_by_xpath('//*[@id="_userName"]').text
print(author)
#模拟鼠标滑过
menu_xpath = "/html/body/div[1]/div/ul/li[2]/a"
more_menu = WebDriverWait(driver=driver, timeout=15).until(EC.visibility_of_element_located((By.XPATH, menu_xpath)))
ActionChains(driver=driver).move_to_element(more_menu).perform()
time.sleep(1) # 仅为能达到悬停效果睡眠,可删除
#获取鼠标滑过弹出页面中按钮的跳转地址
href=driver.find_element_by_xpath('/html/body/div[1]/div/div/div[1]/div/div/a[2]').get_attribute("href")
# driver.find_element_by_xpath('/html/body/div[1]/div/div/div[1]/div/div/a[2]').click()
driver.implicitly_wait(5)
#打开新页面
driver.get(href)
#输出整个页面的html代码
# print(driver.page_source)
# with open('newhello.html', 'w', encoding='utf-8') as f:
# f.write(driver.page_source)
#页面用的是iframe框架,直接在页面去元素获取不到
#因此取框架的src地址,重新使用driver.get获取
dosrc = driver.find_element_by_xpath('/html/body/div[2]/div/iframe').get_attribute("src")
driver.get(dosrc)
#获取表格中的所有tr,使用find_elements_by_xpath
included_names = driver.find_elements_by_xpath('/html/body/div[1]/div[2]/table/tbody/tr')
print(len(included_names))
#遍历获取的tr
for i in included_names:
#获取结点的子节点,在xpath路径前加"."
title=i.find_element_by_xpath('./td[1]/a').text
print(title)