PhantomJS基础
- PhantomJS效率不高,但是能解决很多反爬问题
- 本质是一个浏览器,效率不高,但是对异步处理的数据非常有效
- 只要拿到有效数据就行了
下载 PhantomJS http://phantomjs.org
完成之后需要将 /phantomjs/bin/phantomjs.exe 添加到 环境变量中
import time
from selenium import webdriver
from selenium.wendriver.common.desired_capabilities import DesiredCapabilities
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = ("Mozilla/4.0 .....")
brower = webdriver.PhantomJS() # 方式1
brower = webdriver.PhantomJs(desired_capabilities = dcap) # 方式2
brower.get('http://www.baidu.com') #生成一个无界面的浏览器
#截图
a = brower.get_screenshot_as_file('file_path/test.jpg')
brower.find_element_by_xpath('//*[@id="kw"]').clear()
brower.find_element_by_xpath('//*[@id="kw"]').send_key("scrapy")
brower.find_element_by_xpath('//*[@id="su"]').click()
time.sleep(5)
data = brower.page_source
print (len(data))
brower.quit()
import re
title = re.complie("<title>(.*?)</title>").findall(data)
print (title)
#滚动屏幕
for i in range(20):
js = 'window.scrollto('+str(i*1280)+','+str((i+1)*1280)+')'
brower.execute_script(js)
time.sleep(1)
print (brower.current_url)
data = brower.page_source
with open("D:/test.html", "w", encode='utf-8') as f:
f.write(data)
brower.quit()