python selenium+phontomjs的详细用法及简单案例

最新推荐文章于 2022-10-27 16:53:53 发布

大蛇王

最新推荐文章于 2022-10-27 16:53:53 发布

阅读量474

点赞数

分类专栏： python 文章标签： selenium phontomjs python

本文链接：https://blog.csdn.net/t8116189520/article/details/78829806

版权

python 专栏收录该内容

69 篇文章 20 订阅

订阅专栏

运行环境python2.7

相关模块需要自行下载安装

将phontomjs插件放入环境变量所在的路径

（提示：1.因为phontomjs是无界面浏览器，所以可以通过截图来直观展示 2.selenium类似于按键精灵，代替手动点击网页）

模拟访问百度并截图

#coding:utf8
# 导入包
from selenium import webdriver

# 使用插件phontomjs
driver=webdriver.PhantomJS()

#访问百度
driver.get("http://www.baidu.com/")

#截图
driver.save_screenshot("baidu.png")








模拟浏览器的前进和后退




#coding:utf8
from selenium import webdriver
obj=webdriver.PhantomJS()
try:
    obj.get('http://www.baidu.com') #访问百度
    obj.save_screenshot('13.png')
    obj.get('http://www.sina.com.cn') #访问新浪
    obj.save_screenshot('14.png')
    obj.back()          #返回百度
    obj.save_screenshot('15.png')
    obj.forward()       #前进到新浪
    obj.save_screenshot('16.png')
except Exception as e:
    print e


设置浏览器的宽高
#coding:utf8
from selenium import webdriver
obj=webdriver.PhantomJS()
obj.set_page_load_timeout(5)
obj.set_window_size('360','360') #设置浏览器宽高为480,800
try:
    obj.get("http://www.baidu.com")
    obj.save_screenshot('12.png') #截图
except Exception as e:
    print e


对页面对话框等做一些操作
#coding:utf-8
from selenium import webdriver
obj=webdriver.PhantomJS()
obj.set_page_load_timeout(50)
try:
    obj.get('http://www.baidu.com')
    print obj.find_element_by_id("cp").text #获取元素的文本信息
    obj.find_element_by_id('kw').clear()  #清除输入框内容
    obj.find_element_by_id('kw').send_keys('hello')  #在输入框内输入hello
    obj.find_element_by_id('su').click()    #用于点击按钮
    # obj.find_element_by_id('su').submit()   #用于提交表单内容
    obj.save_screenshot('17.png')
except Exception as e:
    print e


截取全屏
#coding:utf8
from selenium import webdriver
obj=webdriver.PhantomJS()
obj.set_page_load_timeout(5)

#设置全屏
obj.maximize_window()

try:
    obj.get("http://www.baidu.com")
    obj.save_screenshot('11.png')
except Exception as e:
    print e


定位节点和标签
#coding:utf8
from selenium import webdriver

obj = webdriver.PhantomJS()
obj.set_page_load_timeout(5)
try:
    obj.get('http://www.baidu.com')
    obj.find_element_by_id('kw')  # 通过ID定位
    obj.find_element_by_class_name('s_ipt')  # 通过class属性定位
    obj.find_element_by_name('wd')  # 通过标签name属性定位
    obj.find_element_by_tag_name('input')  # 通过标签属性定位
    obj.find_element_by_css_selector('#kw')  # 通过css方式定位
    obj.find_element_by_xpath("//input[@id='kw']")  # 通过xpath方式定位
    obj.find_element_by_link_text("贴吧")  # 通过xpath方式定位

    print obj.find_element_by_id('kw').tag_name  # 获取标签的类型
except Exception as e:
    print e


添加代理和报头
#coding:utf8
from selenium import webdriver
from random import choice


user_agent=[
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36"
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0"
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299"
    "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"
]

# 引入配置对象DesiredCapabilities
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
dcap = dict(DesiredCapabilities.PHANTOMJS)
#从USER_AGENTS列表中随机选一个浏览器头，伪装浏览器
dcap["phantomjs.page.settings.userAgent"] = (choice(user_agent))
# 不载入图片，爬页面速度会快很多
dcap["phantomjs.page.settings.loadImages"] = False
# 设置代理
service_args = ['--proxy=61.135.217.7:80','--proxy-type=socks5']
#打开带配置信息的phantomJS浏览器
driver = webdriver.PhantomJS( desired_capabilities=dcap,service_args=service_args)
# 隐式等待5秒，可以自己调节
driver.implicitly_wait(5)
# 设置10秒页面超时返回，类似于requests.get()的timeout选项，driver.get()没有timeout选项
# 以前遇到过driver.get(url)一直不返回，但也不报错的问题，这时程序会卡住，设置超时选项能解决这个问题。
driver.set_page_load_timeout(10)
# 设置10秒脚本超时时间
driver.set_script_timeout(10)

driver.get('http://ip38.com/')
driver.save_screenshot("5.png")







键盘事件
#coding:utf8
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
obj=webdriver.PhantomJS()
obj.set_page_load_timeout(10)
try:
    obj.get('http://www.baidu.com')
    obj.find_element_by_id('kw').send_keys(Keys.TAB)
    obj.find_element_by_id('kw').send_keys('hello')
    obj.find_element_by_id('kw').send_keys(Keys.CONTROL,'a')   #ctrl + a 全选输入框内容
    obj.find_element_by_id('kw').send_keys(Keys.CONTROL,'x')   #ctrl + x 剪切输入框内容
    obj.find_element_by_id('kw').send_keys(u'美女')
    obj.find_element_by_id('su').send_keys(Keys.ENTER)
    obj.save_screenshot('18.png')

except Exception as e:
    print e


添加代理ip

#coding:utf8
from selenium import webdriver

browser=webdriver.PhantomJS()
# browser.get('http://ip38.com/')
# browser.save_screenshot("3.png")  # 截图保存

# 利用DesiredCapabilities(代理设置)参数值，重新打开一个sessionId，我看意思就相当于浏览器清空缓存后，加上代理重新访问一次url
proxy=webdriver.Proxy()
proxy.http_proxy='112.74.32.237:6666'
# 将代理设置添加到webdriver.DesiredCapabilities.PHANTOMJS中
proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
browser.get('http://ip38.com/')
browser.save_screenshot("2.png")

# 还原为系统代理
# proxy=webdriver.Proxy()
# # proxy.proxy_type=ProxyType.DIRECT
# proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
# browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
# browser.get('http://ip38.com/')

大蛇王

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python selenium+phontomjs的详细用法及简单案例

运行环境python2.7相关模块需要自行下载安装将phontomjs插件放入环境变量所在的路径（提示：1.因为phontomjs是无界面浏览器，所以可以通过截图来直观展示 2.selenium类似于按键精灵，代替手动点击网页）模拟访问百度并截图#coding:utf8# 导入包from selenium import webdriver# 使用插件p
复制链接

扫一扫

专栏目录