目录
一、环境介绍
(1)版本介绍
python版本:3.7
pip版本:20.0.1
pyqt5版本:5.15.3
PIL版本:8.1.1
selenium版本:2.48.0 (推荐使用,3.*版本之后会报错)
**开发工具:pycharm**
需要用到的工具:phantomjs.exe、chromedriver.exe
(2)安装
# 不指定版本
pip install pyqt5
pip install pillow
pip install selenium
# 指定安装版本
pip install pyqt5==版本号
pip install pillow==版本号
pip install selenium==2.48.0(推荐使用,3.*版本之后会报错)
二、实现过程以及功能介绍
本人将通过度娘收集到的三种截图方式做了一个简单的集合分装,只需要传入相应的参数即可。
(1)pyqt5模块实现
文件路径:venv1/Screenshot/Driver/MainWindow.py
import sys
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import *
from PIL import Image,ImageGrab
from Screenshot.Driver import BaseDriver,ImageMerge
import traceback
class MainWindow(QMainWindow, BaseDriver):
def __init__(self, parent=None):
self.app = QApplication(sys.argv)
super(MainWindow, self).__init__()
self.setWindowTitle('截图')
# self.temp_height = 0
self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False) # 禁用最大化,最小化
# self.setWindowFlag(Qt.WindowStaysOnTopHint, True) # 窗口顶置
self.setWindowFlag(Qt.FramelessWindowHint,True) # 窗口无边框
def shotScreen(self):
try:
print('MainWindow->shotScreen', 'url=' + self.win_url)
self.getTempPath(isClean=True)
# 创建浏览器实例
self.browser = QWebEngineView()
self.winId = self.browser.winId()
# 加载页面
self.browser.load(QUrl(self.win_url))
# 设置中心窗口
self.setCentralWidget(self.browser)
# 设置截图窗口
geometry = self.chose_screen()
self.setGeometry(geometry)
# 页面加载完成后执行 check_page回调
res = self.browser.loadFinished.connect(self.check_page)
self.show()
self.app.exit(self.app.exec_())
except:
self.checkShotCallback(file=None,error=traceback.format_exc())
return self
# 获取页面的宽高
def get_page_size(self):
print('MainWindow->get_page_size')
size = self.browser.page().contentsSize()
self.set_height = size.height()
self.set_width = size.width()
return size.width(), size.height()
# 选择桌面窗口
def chose_screen(self):
print('MainWindow->chose_screen')
# 设置窗口的宽度和高度
desktop = QApplication.desktop()
screen_count = desktop.screenCount()
# print('screen_count=',screen_count)
for i in range(0, screen_count):
rect = desktop.availableGeometry(i)
s_width, s_height = rect.width(), rect.height()
if (self.win_width and self.win_height):
if (s_width >self.win_width and s_height > self.win_height):
if(not self.win_x and not self.win_y):
self.win_x,self.win_y = rect.left(),rect.top()
break
else:
if (not self.win_width or s_width > self.win_width):
self.win_width = s_width
if (not self.win_height or s_height > self.win_height):
self.win_height = s_height
self.bbox = (self.win_x, self.win_y, self.win_width, self.win_height)
return QRect(self.win_x, self.win_y, self.win_width, self.win_height)
def check_page(self):
print('MainWindow->check_page')
# 获取页面的宽度和高度
p_width, p_height = self.get_page_size()
# 计算页数, 页面高度%窗口高度
self.page, self.over_flow_size = divmod(p_height, self.height())
print('page='+str(self.page))
self.shotPage = 0
if(self.page == 0):
self.page = 1
# 创建截图合并实例
self.ssm = ImageMerge.ImageMerge(save_path=self.getSavePath())
# 创建定时器
self.timer = QTimer(self)
# 定时执行 exe_command 回调
self.timer.timeout.connect(self.exe_command)
# 设置定时间隔,单位:ms
self.timer.setInterval(2000)
# 启动定时器
self.timer.start()
return self
# 执行截图判断
def exe_command(self):
print('MainWindow->exe_command')
if(self.page > 0):
# 截图后 滚动页面至下一页
self.screen_shot().run_js()
else:
# 关闭定时器
self.timer.stop()
if(self.over_flow_size > 0):
# 截图
self.screen_shot()
# 合并所有截图,
file_path, new_img = self.ssm.image_merge(filename=self.image_name)
# 关闭窗口
self.close()
self.checkShotCallback(file=file_path, error=None)
self.page -= 1
self.shotPage += 1
return self
# 执行js代码
def run_js(self):
print('MainWindow->run_js')
script = """
var scroll = function(dHeight){
var t = document.documentElement.scrollTop;
var h = document.documentElement.scrollHeight;
var ch = document.documentElement.clientHeight;
dHeight = dHeight || 0;
var current = t + dHeight;
if(current > h){
window.scrollTo(0, ch)
}else{
window.scrollTo(0, current)
}
}
"""
height = self.height()
command = script + '\n scroll({})'.format(height)
self.browser.page().runJavaScript(command)
return self
# 截屏
def screen_shot(self):
print('MainWindow->screen_shot')
# 截图保存路径
path = self.temp_path
file_path = str(path.joinpath("{}_{}".format(self.shotPage, self.image_name)))
# 创建 截图工具实例
im = ImageGrab.grab(bbox=self.bbox)
# 保存截图
im.save(file_path)
self.ssm.add_im(file_path)
return self
(2)selenium模块实现
文件路径:venv/Screenshot/Driver/selenium.py
from selenium import webdriver
import Screenshot
import traceback
class Selenium(Screenshot.Driver.BaseDriver):
def __init__(self):
print('Selenium->__init__')
pass
# 通过phantomjs隐式截图
def shotScreenByPhantomjs(self):
print('Selenium->shotScreenByPhantomjs')
picName = '{}/{}'.format(self.getSavePath(), self.image_name)
brower = webdriver.PhantomJS(
executable_path='E:/wens/CompanyProject/python_reptile/venv1/Screenshot/Tools/phantomjs.exe')
if (self.win_width and self.win_height):
brower.set_window_size(width=self.win_width, height=self.win_height)
else:
brower.maximize_window()
brower.get(self.win_url)
brower.get_screenshot_as_file(picName)
brower.close()
self.checkShotCallback(file=picName, error=None)
return self
# 利用谷歌浏览器截图
def shotScreenByChrome(self):
print('Selenium->shotScreenByChrome')
picName = '{}/{}'.format(self.getSavePath(), self.image_name)
driver = webdriver.Chrome(r"E:\wens\CompanyProject\python_reptile\venv1\Screenshot\Tools\chromedriver.exe")
if (self.win_width and self.win_height):
driver.set_window_size(width=self.win_width, height=self.win_height)
else:
driver.maximize_window()
driver.get(self.win_url)
driver.get_screenshot_as_file(picName)
driver.close()
self.checkShotCallback(file=picName,error=None)
return self
def shotScreen(self):
try:
print('Selenium->shotScreen')
Screenshot.switch(self.shot_driver, {
Screenshot.Screenshot.sDriver_Chrome: self.shotScreenByChrome,
Screenshot.Screenshot.sDriver_Phantomjs: self.shotScreenByPhantomjs,
'default': self.shotScreenByPhantomjs
})
except:
self.checkShotCallback(file=None, error=traceback.format_exc())
return self
(3)图片合并处理
文件路径:venv/Screenshot/Driver/ImageMerge.py
from pathlib import Path
from PIL import Image
class ImageMerge():
root_path = None
save_path = None
im_list = []
def __init__(self, save_path=None):
print('ImageMerge->__init__')
self.save_path = save_path
self.im_list = []
self.get_path()
def get_path(self):
print('ImageMerge->get_path')
self.root_path = Path(__file__).parent.parent
if(not self.save_path):
self.save_path = self.root_path.joinpath('image/merge')
if (not self.save_path.exists()):
self.save_path.mkdir(parents=True)
return self
def add_im(self, path):
print('ImageMerge.add_im', path)
im = Image.open(path)
self.im_list.append(im)
return self
def get_new_size(self):
print('ImageMerge->get_new_size')
max_width = 0
total_height = 0
# 计算合成后图片的宽高(以最宽的为准)和高度
for img in self.im_list:
width, height = img.size
if(width > max_width):
max_width = width
total_height += height
return max_width, total_height
def image_merge(self, filename):
print('ImageMerge->image_merge')
file_path = '{}/{}'.format(self.save_path, filename)
if(len(self.im_list)>1):
max_width, total_height = self.get_new_size()
# 产生一张空白图
new_img = Image.new('RGB', (max_width - 15, total_height), 255)
x = y = 0
for img in self.im_list:
width, height = img.size
new_img.paste(img, (x, y))
y += height
new_img.save(file_path)
else:
obj = self.im_list[0]
width, height = obj.size
left, top, right, bottom = 0, 0, width, height
box = (left, top, right, bottom)
region = obj.crop(box)
new_img = Image.new('RGB', (width, height), 255)
new_img.paste(region, box)
new_img.save(file_path)
return file_path, new_img
(4)基类BaseDriver
文件路径:venv/Screenshot/Driver/__init__.py
import shutil
from pathlib import Path
class BaseDriver():
win_width = None
win_height = None
win_url = None
image_path = None
temp_path = None
image_name = None
shot_driver = None
shot_callback = None
win_x = 0
win_y = 0
def setWindowPosition(self, x, y):
self.win_x = x
self.win_y = y
return self
def setWindowSize(self, width, height):
print('BaseDriver->setWindowSize')
self.win_width = width
self.win_height = height
return self
def setSavePath(self, save_path):
print('BaseDriver->setSavePath')
self.image_path = save_path
return self
def setTempPath(self, temp_path):
self.temp_path = temp_path
return self
def url(self, url:str):
print('BaseDriver->url')
self.win_url = url
return self
def filename(self, filename):
print('BaseDriver->filename')
self.image_name = filename
return self
def driver(self, driver):
print('BaseDriver->driver')
self.shot_driver = driver
return self
def shotCallback(self, callback):
print('BaseDriver->shotCallback')
self.shot_callback = callback
return self
def checkShotCallback(self, file=None, error=None):
if (self.shot_callback):
self.shot_callback({
'file': file,
'error': error
})
return self
def getSavePath(self):
print('MainWindow->getSavePath')
if (not self.image_path or self.image_path is None):
self.image_path = Path(__file__).parent.parent.joinpath('image/merge')
if (not self.image_path.exists()):
self.image_path.mkdir(parents=True)
return self.image_path
def getTempPath(self, isClean=False):
print('MainWindow->getTempPath')
if(not self.temp_path or self.temp_path is None):
self.temp_path = Path(__file__).parent.parent.joinpath('image/temp')
if(not self.temp_path.exists()):
self.temp_path.mkdir(parents=True)
elif(isClean):
shutil.rmtree(self.temp_path)
self.temp_path.mkdir(parents=True)
return self.temp_path
def shotScreen(self):
print('BaseDriver->shotScreen')
return self
(5)集合控制器
文件路径:venv/Screenshot/__init__.py
from Screenshot.Driver import selenium,ImageMerge,BaseDriver,MainWindow
from pathlib import Path
import traceback
class Screenshot():
pModel_QT5 = 1
pModel_Selenium = 2
sDriver_Phantomjs = 1
sDriver_Chrome = 2
win_url = ''
win_width = None
win_height = None
save_path = None
temp_path = None
shot_pModel = pModel_Selenium
shot_driver = sDriver_Phantomjs
shot_callback = None
win_x = 0
win_y = 0
def __init__(self):
print('ScreenShot->__init__')
pass
def setWindowPosition(self, x, y):
self.win_x = x
self.win_y = y
return self
def url(self, url):
print('Screenshot->url')
self.win_url = url
return self
def pModel(self, model):
print('Screenshot->pmodel')
self.shot_pModel = model
return self
def driver(self, driver):
print('Screenshot->driver')
self.shot_driver = driver
return self
def setWindowSize(self, width , height):
print('Screenshot->setWindowSize')
self.win_width = width
self.win_height = height
return self
def savePath(self, path):
print('Screenshot->savePath')
self.save_path = Path(path)
if(not self.save_path.exists()):
self.save_path.mkdir(parents=True)
return self
def selenium(self):
print('Screenshot->selenium')
return selenium.Selenium()
def pyqt5(self):
print('Screenshot->pyqt5')
obj = MainWindow.MainWindow()
return obj
def shotCallback(self, callback):
print('Screenshot->shotCallback')
self.shot_callback = callback
return self
def getDriver(self):
print('Screenshot->getDriver')
result = switch(self.shot_pModel, {
self.pModel_QT5: self.pyqt5,
self.pModel_Selenium: self.selenium,
'default': self.selenium
})
return result
def save(self, filename):
print('Screenshot->save')
return self.getDriver()\
.setWindowPosition(x=self.win_x, y=self.win_y)\
.url(self.win_url)\
.driver(self.shot_driver)\
.setWindowSize(width=self.win_width, height=self.win_height)\
.filename(filename)\
.setSavePath(self.getSavePath())\
.setTempPath(temp_path=self.getTempPath())\
.shotCallback(self.shot_callback)\
.shotScreen()
def getSavePath(self):
print('Screenshot->getSavePath')
if(not self.save_path or self.save_path is None):
self.save_path = Path(__file__).parent.joinpath('image/merge')
if(not self.save_path.exists()):
self.save_path.mkdir(parents=True)
return self.save_path
def getTempPath(self):
print('Screenshot->getTempPath')
path = Path(__file__).parent.joinpath('image/temp')
if(not path.exists()):
path.mkdir(parents=True)
return path
def switch(key, options:dict):
print('switch')
item = options.get(key, options.get('default'));
if (hasattr(item, '__call__')):
return item()
else:
return item
(6)实例·例子
文件路径:main.py
from Screenshot import *
from pathlib import Path
url = 'http://blog.sina.com.cn/lm/rank/focusbang//'
save_path = Path(__file__).parent.joinpath('shotScreen')
def shotCallback(res):
print('file_path',res)
Screenshot()\
.setWindowPosition(x=30, y=0)\
.url(url).setWindowSize(1000, 800)\
.savePath(save_path)\
.shotCallback(shotCallback)\
.pModel(model=Screenshot.pModel_Selenium)\
.driver(driver=Screenshot.sDriver_Phantomjs)\
.save(filename='screen.png')
(7)截图效果
三、总结与资源
(1)截图方式总结
以上三总截图方案中只有phantomjs截图是隐式截图,其他两种方式(pyqt5和chrome)截图都是显示截图。
隐式截图:无法看到截图界面,用户在桌面的操作不会影像截图效果;
显示截图:执行过程中,程序会调用浏览器在桌面打开一个浏览器窗口,然后通过截屏实现截图功能;
显示截图中,pyqt5做了智能滚动截图然后合并处理,chrome只是单纯截图网页显示部分。如果网页内容过长,有滚动条的推荐使用隐式截图(phantomjs)和pyqt5方式截图。
(2)资源下载
如需要下载资源参考:python实现网页截图(v1.0.0).rar