Selenium模块
"""-----------测试自动化------------"""
from selenium import webdriver
# 创建一个谷歌浏览器对象
brower = webdriver.Chrome()
# 创建一个edge浏览器对象
# brower1 = webdriver.Edge()
#打开一个网页
brower.get('https://www.baidu.com')
brower.get('https://jd.com')
brower.get('https://www.qidian.com')
# 退出浏览器
brower.quit()
"""-----------元素定位------------"""
from selenium import webdriver
from selenium.webdriver.common.by import By # 新版本
brower = webdriver.Chrome()
brower.get('https://www.baidu.com')
"""查找一个元素"""
# 通过id查找
# input1 = brower.find_element_by_id('kw') # 旧版本
input1 = brower.find_element(By.ID,'kw')
# 通过class查找
# input2 = brower.find_element_by_class_name('s_ipt')
input2 = brower.find_element(By.CLASS_NAME,'s_ipt')
# 通过xpath查找
# input3 = brower.find_element_by_xpath('//input[@id="kw"')
input3 = brower.find_element(By.XPATH,'//input[@id="kw"]')
# 通过CSS选择器查找
# input4 = brower.find_element_by_css_selector('#kw')
input4 = brower.find_element(By.CSS_SELECTOR,'#kw')
# 通过name查找
# input5 = brower.find_element_by_name('kw')
input5 = brower.find_element(By.NAME,'wd')
""" 查找多个元素 """
brower.get('https://www.qidian.com/all/')
li_list = brower.find_elements(By.XPATH,'//div[@class="book-mid-info"]')
"""
总结:
element:查找一个元素 返回webElement
elements:查找多个元素 返回类型list
"""
# 退出浏览器
brower.quit()
"""-----------智能等待------------"""
from selenium import webdriver
from selenium.webdriver.common.by import By # 新版本
import time
brower = webdriver.Chrome()
# brower.implicitly_wait(30) # 30秒后没有加载完,直接报错,30内加载完,不会报错 “智能等待” 我的pycharm可以不添加
brower.get('https://news.cctv.com/')
a = brower.find_element(By.LINK_TEXT,'国内')
print(a)
# 手动添加休眠时间
# time.sleep(10) """手动设置休眠时间,等待10秒后自动关闭,我这个不需要自动设置
brower.quit()
"""-----------获取浏览器信息------------"""
from selenium import webdriver
from selenium.webdriver.common.by import By # 新版本
brower = webdriver.Chrome()
brower.implicitly_wait(30)
brower.get('https://news.cctv.com/')
# ctrl + u <获取源码>
# 获取网页标题
print(brower.title)
# 获取浏览器名称
print(brower.name)
# 获取当前url的源码
print(brower.page_source)
# 使用正则表达式
# 使用xpath
# 数据清洗
# 类似于request.get()
brower.quit()
"""-----------浏览器操作------------"""
from selenium import webdriver
import time
browser = webdriver.Chrome()
browser.implicitly_wait(30)
browser.get('https://www.baidu.com')
# 控制浏览器最大化
browser.maximize_window()
# 设置浏览器的宽高
browser.set_window_size(600, 800)
# 新建标签页
new_window = 'window.open("{}")'.format('https://news.cctv.com/society/?spm=C94212.P4YnMod9m2uD.EWZW7h07k3Vs.5') # js函数
browser.execute_script(new_window)
# 切换到第一个标签页
browser.switch_to.window(browser.window_handles[0])
# 切换到最后一个标签页
browser.switch_to.window(browser.window_handles[-1])
# 控制浏览器 前进 后退
second_url = 'http://news.baidu.com'
browser.get(second_url)
# time.sleep(3)
browser.back() # 后退到百度页面
# time.sleep(3)
browser.forward() # 前进到新闻页面
browser.quit(
"""-----------操作测试对象------------"""
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
browser.implicitly_wait(30)
browser.get('https://www.baidu.com')
# 锁定id为kw的对象,并清空输入框的内容
browser.find_element(By.ID,'kw').clear()
# 锁定id为kw的对象,并输入 北京
browser.find_element(By.ID,'kw').send_keys('北京')
# 锁定id为su的对象,并点击
browser.find_element(By.ID,'su').click()
# 锁定 文本内容 中有 北京 的标签,并用变量a接收
a = browser.find_element(By.PARTIAL_LINK_TEXT,'百度百科')
# 获取a标签的href的属性值
a.get_attribute('href')
# 获取a标签的文本内容
a.text
browser.quit()
"""-----------键盘事件------------"""
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
browser = webdriver.Chrome()
browser.implicitly_wait(30)
browser.get('https://fanyi.baidu.com/?aldtype=16047#auto/zh')
# 输入字符串
browser.find_element(By.ID,'baidu_translate_input').send_keys('北京')
# 控制字符-tab
browser.find_element(By.ID,'baidu_translate_input').send_keys(Keys.TAB)
# 控制字符-enter
browser.find_element(By.ID,'baidu_translate_input').send_keys(Keys.ENTER)
# 组合键
browser.find_element(By.ID,'baidu_translate_input').send_keys(Keys.CONTROL,'a')
browser.find_element(By.ID,'baidu_translate_input').send_keys(Keys.CONTROL,'V')
time.sleep(3)
browser.quit()
"""-----------iframe内联框架------------"""
from selenium import webdriver
from selenium.webdriver.common.by import By
# 实例化一个浏览器对象
browser = webdriver.Chrome()
# 发起get请求
browser.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
# 当出现iframe标签时,这种时内联页面,直接检索会报错
div = browser.find_element(By.ID,'draggable')
# 进行到iframe作用域 switch_to.frame(id) /browser.switch_to_frame
browser.switch_to.frame('iframeResult')
div = browser.find_element(By.ID,'draggable')
browser.quit()
"""-----------鼠标事件------------"""
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
import time
# ---------------例1---------------
browser = webdriver.Chrome()
browser.get('https://passport.qidian.com/mobile/login')
# 创建动作链实例
chains = ActionChains(browser)
# 定位对象
username = browser.find_element(By.ID,'username')
password = browser.find_element(By.ID,'password')
login_btn = browser.find_element(By.CLASS_NAME,'login-button')
time.sleep(5)
# 左键点击
chains.click(username).perform()
time.sleep(5)
# 右键点击
chains.context_click(username).perform()
time.sleep(5)
# 双击
chains.double_click(password).perform()
# 悬停
chains.move_to_element(login_btn).perform()
time.sleep(5)
# 移动
chains.move_to_element_with_offset(login_btn,20,20).perform()
time.sleep(5)
# 长按
chains.click_and_hold(login_btn).perform()
time.sleep(5)
# 拖动
chains.drag_and_drop(source=username,target=password)
time.sleep(5)
# 偏移拖动
chains.drag_and_drop_by_offset(source=username,xoffset=20,yoffset=20)
browser.quit()
"""-----------滚动条控制------------"""
from selenium import webdriver
import time
browser = webdriver.Chrome()
browser.implicitly_wait(30)
browser.get('https://www.baidu.com')
browser.find_element_by_id("kw").send_keys("北京")
browser.find_element_by_id("su").click()
time.sleep(3)
#将页面滚动条拖到底部
js="var q=document.documentElement.scrollTop=10000"
browser.execute_script(js)
time.sleep(3)
#将滚动条移动到页面的顶部
js="var q=document.documentElement.scrollTop=0"
browser.execute_script(js)
time.sleep(3)
browser.quit()
# 例子
url = 'https://www.jd.com/'
browser.get(url)
for x in range(10):
time.sleep(1)
js = 'var q=document.documentElement.scrollTop=10000'
browser.execute_script(js)
"""-----------防检测------------"""
from selenium.webdriver import ChromeOptions
from selenium import webdriver
import time
options = ChromeOptions()
# ecludeSwitches = enable-automation
options.add_experimental_optiin('excludeSwitches',['enable-automation'])
browser = webdriver.Chrome(chrome_options=options)
browser.get('https://www.toutiao.com/?wid=1651589400638')
time.sleep(5)
browser.quit()
项目:京东
"""
project name :京东数据采集脚本
"""
from selenium.webdriver import ChromeOptions
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
# 项目结构
# 数据清洗 + 下载部分
def down_data():
div_list = browser.find_elements(By.CLASS_NAME, 'gl-i-wrap')
f = open(f'京东_{kw}.csv',mode='a',encoding='utf-8')
for div in div_list:
title = div.find_element(By.XPATH,'.//div[@class="p-name p-name-type-2"]/a/em').text
title = title.replace('\n',' ')
price = div.find_element(By.XPATH,'.//div[@class="p-price"]/strong/i').text
comment = div.find_element(By.XPATH,'.//div[@class="p-commit"]/strong/a').text
shop = div.find_element(By.XPATH,'.//div[@class="p-shop"]/span/a').text
data = [title,price,comment,shop]
f.write(','.join(data))
f.write('\n')
f.close()
# 主函数
if __name__ == '__main__':
kw = input('请问您要查询的商品名称是:')
p = int(input('请问您要下载多少页?:'))
options = ChromeOptions()
# ecludeSwitches = enable-automation
options.add_experimental_option('excludeSwitches', ['enable-automation'])
browser = webdriver.Chrome(chrome_options=options)
for i in range(1,p+1):
print(f'正在下载第{i}页数据......')
url = f'https://search.jd.com/Search?keyword={kw}&page={i*2-1}'
browser.get(url)
# 滚动两次进度条
time.sleep(1)
js = "var q=document.documentElement.scrollTop=10000"
browser.execute_script(js)
time.sleep(1)
js = "var q=document.documentElement.scrollTop=10000"
browser.execute_script(js)
# 调用下载数据函数
down_data()
print(f'第{i}页下载完成!')
browser.quit()
项目:链家网
bj.lianjia.com/zufang/'
browser.get(url)
"""
https://bj.lianjia.com/zufang/pg2/#contentList
https://bj.lianjia.com/zufang/pg3/#contentList
"""
# 获取数据
div_list = browser.find_elements(By.CLASS_NAME,'content__list--item--main')
f = open('链家网.csv',mode='a')
for div in div_list:
data = div.text
data = data.replace('\n',',')
f.write(data)
f.write('\n')
f.close()
browser.quit()
项目:boss直聘
"""
project name : boss直聘-数据分析
"""
from selenium.webdriver import ChromeOptions
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
options = ChromeOptions()
# excludeSwitches = enable-automation
options.add_experimental_option('excludeSwitches',['enable-automation'])
browser = webdriver.Chrome(chrome_options=options)
url = 'https://www.zhipin.com/web/geek/job?query=数据分析&city=101110100'
browser.get(url)
div_list = browser.find_elements(By.CLASS_NAME,'job-card-wrapper')
f = open('boss项目.csv',mode='a')
for div in div_list:
data = div.text
data = data.replace('\n',',')
f.write(data)
f.write('\n')
f.close()
browser.get()