selenium的使用
安装selenium
pip install delenium
下载浏览器驱动
谷歌:http://npm.taobao.org/mirrors/chromedriver/
1.一些基本用操作
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
driver = webdriver.Chrome()
driver.get('https://www.baidu.com')
time.sleep(2)
driver.get('https://news.baidu.com')
# driver.set_window_size(800, 480)
driver.maximize_window()
# 返回上一个
time.sleep(2)
driver.back()
# 前往下一个
time.sleep(2)
driver.forward()
# 刷新
time.sleep(2)
driver.refresh()
# 关闭并退出
time.sleep(2)
driver.quit()
# 设置滚动下拉
js = "window.scrollTo(20000,document.body.scrollHeight)"
driver.execute_script(js)
2.一些选择器
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
driver.get('https://www.baidu.com')
driver.maximize_window()
time.sleep(1)
driver.find_element_by_id('kw').send_keys('selenium')
driver.find_element_by_class_name("s_ipt").send_keys('selenium')
driver.find_element_by_name("wd").send_keys('selenium')
driver.find_element_by_xpath(r'//*[@id="kw"]').send_keys('selenium')
driver.find_elements_by_css_selector("#kw")[0].send_keys('selenium')
# css选择器出来的结果时一个列表,需要取出其中的元素
driver.find_elements_by_css_selector("#su")[0].click()
3.PantomJS无头浏览器
http://phantomjs.org/获取 PhantomJS,点击位于页面中央 的 DOWNLOAD 按钮会跳转到 http://phantomjs.org/download 页面,该页面详细列举了 PhantomJS 在各种平台上的下载方法。
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
driver = webdriver.PhantomJS()
driver.get('https://cn.bing.com/')
time.sleep(1)
title_ = driver.title
print(title_)
driver.find_element_by_id('sb_form_q').send_keys("selenium自动化测试")
driver.save_screenshot("save1.png")
time.sleep(1)
driver.find_element_by_class_name("b_searchboxSubmit").click()
driver.save_screenshot("save2.png")
time.sleep(1)
driver.back()
driver.save_screenshot("save3.png")
phantomJS -h
C:\Users\liwan14x>phantomJS -h
Usage:
phantomJS [switchs] [options] [script] [argument [argument [...]]]
Options:
--cookies-file=<val> Sets the file name to store the persistent cookies
--config=<val> Specifies JSON-formatted configuration file
--debug=<val> Prints additional warning and debug message: 'true' or 'false' (default)
--disk-cache=<val> Enables disk cache: 'true' or 'false' (default)
--disk-cache-path=<val> Specifies the location for the disk cache
--ignore-ssl-errors=<val> Ignores SSL errors (expired/self-signed certificate errors): 'true' or 'false' (default)
--load-images=<val> Loads all inlined images: 'true' (default) or 'false'
--local-url-access=<val> Allows use of 'file:///' URLs: 'true' (default) or 'false'
--local-storage-path=<val> Specifies the location for local storage
--local-storage-quota=<val> Sets the maximum size of the local storage (in KB)
--offline-storage-path=<val> Specifies the location for offline storage
--offline-storage-quota=<val> Sets the maximum size of the offline storage (in KB)
--local-to-remote-url-access=<val> Allows local content to access remote URL: 'true' or 'false' (default)
--max-disk-cache-size=<val> Limits the size of the disk cache (in KB)
--output-encoding=<val> Sets the encoding for the terminal output, default is 'utf8'
--remote-debugger-port=<val> Starts the script in a debug harness and listens on the specified port
--remote-debugger-autorun=<val> Runs the script in the debugger immediately: 'true' or 'false' (default)
--proxy=<val> Sets the proxy server, e.g. '--proxy=http://proxy.company.com:8080'
--proxy-auth=<val> Provides authentication information for the proxy, e.g. ''-proxy-auth=username:password'
--proxy-type=<val> Specifies the proxy type, 'http' (default), 'none' (disable completely), or 'socks5'
--script-encoding=<val> Sets the encoding used for the starting script, default is 'utf8'
--script-language=<val> Sets the script language instead of detecting it: 'javascript'
--web-security=<val> Enables web security, 'true' (default) or 'false'
--ssl-protocol=<val> Selects a specific SSL protocol version to offer. Values (case insensitive): TLSv1.2, TLSv1.1, TLSv1.0, TLSv1 (same as v1.0), SSLv3, or ANY. Default is to offer all that Qt thinks are secure (SSLv3 and up). Not all values may be supported, depending on the system OpenSSL library.
--ssl-ciphers=<val> Sets supported TLS/SSL ciphers. Argument is a colon-separated list of OpenSSL cipher names (macros like ALL, kRSA, etc. may not be used). Default matches modern browsers.
--ssl-certificates-path=<val> Sets the location for custom CA certificates (if none set, uses environment variable SSL_CERT_DIR. If none set too, uses system default)
--ssl-client-certificate-file=<val> Sets the location of a client certificate
--ssl-client-key-file=<val> Sets the location of a clients' private key
--ssl-client-key-passphrase=<val> Sets the passphrase for the clients' private key
--webdriver=<val> Starts in 'Remote WebDriver mode' (embedded GhostDriver): '[[<IP>:]<PORT>]' (default '127.0.0.1:8910')
--webdriver-logfile=<val> File where to write the WebDriver's Log (default 'none') (NOTE: needs '--webdriver')
--webdriver-loglevel=<val> WebDriver Logging Level: (supported: 'ERROR', 'WARN', 'INFO', 'DEBUG') (default 'INFO') (NOTE: needs '--webdriver')
--webdriver-selenium-grid-hub=<val> URL to the Selenium Grid HUB: 'URL_TO_HUB' (default 'none') (NOTE: needs '--webdriver')
-w,--wd Equivalent to '--webdriver' option above
-h,--help Shows this message and quits
-v,--version Prints out PhantomJS version
Any of the options that accept boolean values ('true'/'false') can also accept 'yes'/'no'.
Without any argument, PhantomJS will launch in interactive mode (REPL).
Documentation can be found at the web site, http://phantomjs.org.
from selenium import webdriver
import time
driver = webdriver.Chrome()
driver.get('https://www.python.org/')
driver.maximize_window()
time.sleep(2)
# driver.save_screenshot('11301.png')
JS1 = "document.title='python网站'"
driver.execute_script(JS1)
time.sleep(1)
JS2 = "alert($(document).attr('title'))"
driver.execute_script(JS2)
time.sleep(1)
driver.find_element_by_id('id-search-field').send_keys("pycon")
driver.find_element_by_class_name('search-button').click()
driver.find_elements_by_css_selector('#content > div > section > form > ul > li:nth-child(1) > h3 > a')[0].click()
driver.save_screenshot('1130s2.png')
time.sleep(1)
driver.quit()
4.selenium的等待
分为强制等待 :不管是否加载完成,程序都会等待设置的时间,然后继续加载,执行后面的代码。
显示等待:显式等待只需要判断某个特定的元素出来即可。
隐式等待:等待固定时间,如果加载完成,则继续下一步,如果在等待时间内未加载完成,等待时间一到就停止等待,直接继续执。
# 导入库
from selenium import webdriver
import time
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
# 打开浏览器和页面
start_time = time.time()
dr = webdriver.Chrome()
# dr.implicitly_wait(30) # 隐式等待
dr.get('https://www.csdn.net/')
# time.sleep(2) # 强制等待
try:
WebDriverWait(dr, 20, 0.5).until(EC.presence_of_element_located(By.LINK_TEXT, '首页')) # 显示等待
finally:
print(dr.find_element_by_link_text('首页').get_attribute("href"))
# print(dr.find_element_by_link_text('首页').get_attribute('href'))
dr.quit()
end_time = time.time()
total_time = end_time - start_time
print(total_time)
5.对一些不安全的网站链接设置
capabilities = webdriver.DesiredCapabilities().CHROME
capabilities['acceptInsecureCerts'] = True
driver = webdriver.Chrome(desired_capabilities=capabilities)
pytest-bdd
feature(需求)
Senario(场景)
Given(假定条件,预设条件)
when(操作步骤)
then(结果)
Given when then 后面都可以跟一个或多个and
6.字典转换为xml格式
xml类似于html格式,字典转换为xml格式时,字典的属性会变为xml的标签,其中需要用到两个方法,Element以及torsting
from xml.etree.ElementTree import tostring
from xml.etree.ElementTree import Element
dict = {"name": "zhangsan", "age": 23, "gendre": "male", "phone": "12345678"}
def convert(tag_name, dict):
# 建立一个xml
elem = Element(tag_name)
for key, value in dict.items():
print(key, ":", value)
# 为key建立一个标签
xml_elem = Element(key)
# 将key的值赋给标签
xml_elem.text = str(value)
# 添加到xml文件中
elem.append(xml_elem)
return elem
dict_convert = convert("test", dict)
print("---------")
print(dict_convert)
print(tostring(dict_convert))
# 给最外层的标签加一个属性id
dict_convert.set("id", "001")
print(tostring(dict_convert))
xml的解析
from xml.etree import ElementTree as et
# xml_string = b'<test><name>zhangsan</name><age>23</age><gendre>male</gendre><phone>12345678</phone></test>'
xml_string = b'<test id="001"><name>zhangsan</name><age>23</age><gendre>male</gendre><phone>12345678</phone></test>'
root = et.fromstring(xml_string)
print(root.tag)
print(root.attrib)
for i in root:
print(i)
for i in root:
print(i.text)
print("1-----------------------")
for i in list(root):
print(i.tag, i.text)
print(("2-----------------------"))
test_person = root.iter("test")
for i in test_person:
print(i) # <Element 'test' at 0x0000012C7165F278>
for j in i:
print(j.tag, j.text)
print(("3-----------------------"))
test_person3 = root.findall("name")
print(test_person3)
print(test_person3[0]) # <Element 'name' at 0x0000022EEEEAF278>
print(test_person3[0].tag, test_person3[0].text)
print(("4-----------------------"))
test_person4 = root.find("name")
print(test_person4) # <Element 'name' at 0x0000022EEEEAF278>
print(test_person4.tag, test_person4.text)
7.页面下拉实现代码
js = "return action=document.body.scrollHeight"
# 初始化现在滚动条所在高度为0
height = 0
# 当前窗口总高度
new_height = dr.execute_script(js)
while height < new_height:
# 将滚动条调整至页面底部
for i in range(height, new_height, 100):
dr.execute_script('window.scrollTo(0, {})'.format(i))
time.sleep(0.5)
height = new_height
time.sleep(2)
new_height = dr.execute_script(js)
8.获取猫眼电影前100条的数据
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import csv
number_list = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]
url_list = []
for i in number_list:
url = "https://maoyan.com/board/4?offset=%d" % i
url_list.append(url)
# print("获取到的地址:%s" % url)
dr = webdriver.Chrome()
dr.maximize_window()
# 创建一个csv文件
csv_file = open("writer.csv", "w+", newline='', encoding='utf-8')
# 定义写入方法
writer = csv.writer(csv_file)
for i in url_list:
dr.get(i)
time.sleep(1)
print("正在访问链接为%s的网页" % i)
# js = "return action=document.body.scrollHeight"
# # 初始化现在滚动条所在高度为0
# height = 0
# # 当前窗口总高度
# new_height = dr.execute_script(js)
# while height < new_height:
# # 将滚动条调整至页面底部
# for i in range(height, new_height, 100):
# dr.execute_script('window.scrollTo(0, {})'.format(i))
# time.sleep(0.5)
# height = new_height
# time.sleep(2)
# new_height = dr.execute_script(js)
for j in range(1, 10):
# 获取电影名字
xpath1 = '//*[@id="app"]/div/div/div[1]/dl/dd[%d]/div/div/div[1]/p[1]/a' % j
title = dr.find_element_by_xpath(xpath1).text
# 获取电影主演
xpath2 = '//*[@id="app"]/div/div/div[1]/dl/dd[%d]/div/div/div[1]/p[2]' % j
actor = dr.find_element_by_xpath(xpath2).text
# 获取电影上映时间
xpath3 = '//*[@id="app"]/div/div/div[1]/dl/dd[%d]/div/div/div[1]/p[3]' % j
time_ = dr.find_element_by_xpath(xpath3).text
# 获取电影简介
dr.find_element_by_xpath(xpath1).click()
xpath4 = '//*[@id="app"]/div/div[1]/div/div[3]/div[1]/div[1]/div[2]/span'
content = dr.find_element_by_xpath(xpath4).text
text = []
text.append(title)
text.append(actor)
text.append(time_)
text.append(content)
#将获取到的数据一行一行写入文件 writerows是一次写入多行
writer.writerow(text)
print("正在获取第%s条数据" % j)
dr.get(i)
# print("电影名字:%s" % title)
# print(actor)
# print(time)
# print("内容简介:%s" % content)
print("all_done")
dr.close()