python实现网页截图功能——学习篇(01)

一、环境介绍

(1)版本介绍

python版本:3.7
pip版本:20.0.1
pyqt5版本:5.15.3
PIL版本:8.1.1
selenium版本:2.48.0 (推荐使用,3.*版本之后会报错)

**开发工具:pycharm**
需要用到的工具:phantomjs.exe、chromedriver.exe

(2)安装

# 不指定版本
pip install pyqt5
pip install pillow
pip install selenium
# 指定安装版本
pip install pyqt5==版本号
pip install pillow==版本号
pip install selenium==2.48.0(推荐使用,3.*版本之后会报错)

二、实现过程以及功能介绍

本人将通过度娘收集到的三种截图方式做了一个简单的集合分装,只需要传入相应的参数即可。

(1)pyqt5模块实现

文件路径:venv1/Screenshot/Driver/MainWindow.py

import sys
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import *
from PIL import Image,ImageGrab
from Screenshot.Driver import BaseDriver,ImageMerge
import traceback


class MainWindow(QMainWindow, BaseDriver):

    def __init__(self, parent=None):
        self.app = QApplication(sys.argv)
        super(MainWindow, self).__init__()
        self.setWindowTitle('截图')
        # self.temp_height = 0
        self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False) # 禁用最大化,最小化
        # self.setWindowFlag(Qt.WindowStaysOnTopHint, True) # 窗口顶置
        self.setWindowFlag(Qt.FramelessWindowHint,True) # 窗口无边框

    def shotScreen(self):
        try:
            print('MainWindow->shotScreen', 'url=' + self.win_url)
            self.getTempPath(isClean=True)
            # 创建浏览器实例
            self.browser = QWebEngineView()
            self.winId = self.browser.winId()
            # 加载页面
            self.browser.load(QUrl(self.win_url))
            # 设置中心窗口
            self.setCentralWidget(self.browser)
            # 设置截图窗口
            geometry = self.chose_screen()
            self.setGeometry(geometry)
            # 页面加载完成后执行 check_page回调
            res = self.browser.loadFinished.connect(self.check_page)
            self.show()
            self.app.exit(self.app.exec_())
        except:
            self.checkShotCallback(file=None,error=traceback.format_exc())
        return self

    # 获取页面的宽高
    def get_page_size(self):
        print('MainWindow->get_page_size')
        size = self.browser.page().contentsSize()
        self.set_height = size.height()
        self.set_width  = size.width()
        return size.width(), size.height()

    # 选择桌面窗口
    def chose_screen(self):
        print('MainWindow->chose_screen')
        # 设置窗口的宽度和高度
        desktop = QApplication.desktop()
        screen_count = desktop.screenCount()
        # print('screen_count=',screen_count)
        for i in range(0, screen_count):
            rect = desktop.availableGeometry(i)
            s_width, s_height = rect.width(), rect.height()
            if (self.win_width and self.win_height):
                if (s_width >self.win_width and s_height > self.win_height):
                    if(not self.win_x and not self.win_y):
                        self.win_x,self.win_y = rect.left(),rect.top()
                    break
            else:
                if (not self.win_width or s_width > self.win_width):
                    self.win_width = s_width
                if (not self.win_height or s_height > self.win_height):
                    self.win_height = s_height
        self.bbox = (self.win_x, self.win_y, self.win_width, self.win_height)
        return QRect(self.win_x, self.win_y, self.win_width, self.win_height)

    def check_page(self):
        print('MainWindow->check_page')
        # 获取页面的宽度和高度
        p_width, p_height = self.get_page_size()
        # 计算页数, 页面高度%窗口高度
        self.page, self.over_flow_size = divmod(p_height, self.height())
        print('page='+str(self.page))
        self.shotPage = 0
        if(self.page == 0):
            self.page = 1
        # 创建截图合并实例
        self.ssm = ImageMerge.ImageMerge(save_path=self.getSavePath())
        # 创建定时器
        self.timer = QTimer(self)
        # 定时执行 exe_command 回调
        self.timer.timeout.connect(self.exe_command)
        # 设置定时间隔,单位:ms
        self.timer.setInterval(2000)
        # 启动定时器
        self.timer.start()
        return self

    # 执行截图判断
    def exe_command(self):
        print('MainWindow->exe_command')
        if(self.page > 0):
            # 截图后 滚动页面至下一页
            self.screen_shot().run_js()
        else:
            # 关闭定时器
            self.timer.stop()
            if(self.over_flow_size > 0):
                # 截图
                self.screen_shot()
            # 合并所有截图,
            file_path, new_img = self.ssm.image_merge(filename=self.image_name)
            # 关闭窗口
            self.close()
            self.checkShotCallback(file=file_path, error=None)
        self.page -= 1
        self.shotPage += 1
        return self

    # 执行js代码
    def run_js(self):
        print('MainWindow->run_js')
        script = """
            var scroll = function(dHeight){
                var t = document.documentElement.scrollTop;
                var h = document.documentElement.scrollHeight;
                var ch = document.documentElement.clientHeight;
                dHeight = dHeight || 0;
                var current = t + dHeight;
                if(current > h){
                    window.scrollTo(0, ch)
                }else{
                    window.scrollTo(0, current)
                }
            }
        """
        height = self.height()
        command = script + '\n scroll({})'.format(height)
        self.browser.page().runJavaScript(command)
        return self

    # 截屏
    def screen_shot(self):
        print('MainWindow->screen_shot')
        # 截图保存路径
        path = self.temp_path
        file_path = str(path.joinpath("{}_{}".format(self.shotPage, self.image_name)))
        # 创建 截图工具实例
        im = ImageGrab.grab(bbox=self.bbox)
        # 保存截图
        im.save(file_path)
        self.ssm.add_im(file_path)
        return self

(2)selenium模块实现

文件路径:venv/Screenshot/Driver/selenium.py

from selenium import webdriver
import Screenshot
import traceback

class Selenium(Screenshot.Driver.BaseDriver):

    def __init__(self):
        print('Selenium->__init__')
        pass

    # 通过phantomjs隐式截图
    def shotScreenByPhantomjs(self):
        print('Selenium->shotScreenByPhantomjs')
        picName = '{}/{}'.format(self.getSavePath(), self.image_name)
        brower = webdriver.PhantomJS(
            executable_path='E:/wens/CompanyProject/python_reptile/venv1/Screenshot/Tools/phantomjs.exe')
        if (self.win_width and self.win_height):
            brower.set_window_size(width=self.win_width, height=self.win_height)
        else:
            brower.maximize_window()
        brower.get(self.win_url)
        brower.get_screenshot_as_file(picName)
        brower.close()
        self.checkShotCallback(file=picName, error=None)
        return self

    # 利用谷歌浏览器截图
    def shotScreenByChrome(self):
        print('Selenium->shotScreenByChrome')
        picName = '{}/{}'.format(self.getSavePath(), self.image_name)
        driver = webdriver.Chrome(r"E:\wens\CompanyProject\python_reptile\venv1\Screenshot\Tools\chromedriver.exe")
        if (self.win_width and self.win_height):
            driver.set_window_size(width=self.win_width, height=self.win_height)
        else:
            driver.maximize_window()
        driver.get(self.win_url)
        driver.get_screenshot_as_file(picName)
        driver.close()
        self.checkShotCallback(file=picName,error=None)
        return self

    def shotScreen(self):
        try:
            print('Selenium->shotScreen')
            Screenshot.switch(self.shot_driver, {
                Screenshot.Screenshot.sDriver_Chrome: self.shotScreenByChrome,
                Screenshot.Screenshot.sDriver_Phantomjs: self.shotScreenByPhantomjs,
                'default': self.shotScreenByPhantomjs
            })
        except:
            self.checkShotCallback(file=None, error=traceback.format_exc())
        return self

(3)图片合并处理

文件路径:venv/Screenshot/Driver/ImageMerge.py

from pathlib import Path
from PIL import Image

class ImageMerge():

    root_path = None

    save_path = None

    im_list = []


    def __init__(self, save_path=None):
        print('ImageMerge->__init__')
        self.save_path = save_path
        self.im_list = []
        self.get_path()

    def get_path(self):
        print('ImageMerge->get_path')
        self.root_path = Path(__file__).parent.parent
        if(not self.save_path):
            self.save_path = self.root_path.joinpath('image/merge')
        if (not self.save_path.exists()):
            self.save_path.mkdir(parents=True)
        return self

    def add_im(self, path):
        print('ImageMerge.add_im', path)
        im = Image.open(path)
        self.im_list.append(im)
        return self

    def get_new_size(self):
        print('ImageMerge->get_new_size')
        max_width = 0
        total_height = 0
        # 计算合成后图片的宽高(以最宽的为准)和高度
        for img in self.im_list:
            width, height = img.size
            if(width > max_width):
                max_width = width
            total_height += height
        return max_width, total_height

    def image_merge(self, filename):
        print('ImageMerge->image_merge')
        file_path = '{}/{}'.format(self.save_path, filename)
        if(len(self.im_list)>1):
            max_width, total_height = self.get_new_size()
            # 产生一张空白图
            new_img = Image.new('RGB', (max_width - 15, total_height), 255)
            x = y = 0
            for img in self.im_list:
                width, height = img.size
                new_img.paste(img, (x, y))
                y += height
            new_img.save(file_path)
        else:
            obj = self.im_list[0]
            width, height =  obj.size
            left, top, right, bottom = 0, 0, width, height
            box = (left, top, right, bottom)
            region = obj.crop(box)
            new_img = Image.new('RGB', (width, height), 255)
            new_img.paste(region, box)
            new_img.save(file_path)
        return file_path, new_img

(4)基类BaseDriver

文件路径:venv/Screenshot/Driver/__init__.py

import shutil
from pathlib import Path

class BaseDriver():
    win_width = None

    win_height = None

    win_url = None

    image_path = None

    temp_path = None

    image_name = None

    shot_driver = None

    shot_callback = None

    win_x = 0

    win_y = 0

    def setWindowPosition(self, x, y):
        self.win_x = x
        self.win_y = y
        return self

    def setWindowSize(self, width, height):
        print('BaseDriver->setWindowSize')
        self.win_width = width
        self.win_height = height
        return self

    def setSavePath(self, save_path):
        print('BaseDriver->setSavePath')
        self.image_path = save_path
        return self

    def setTempPath(self, temp_path):
        self.temp_path = temp_path
        return self

    def url(self, url:str):
        print('BaseDriver->url')
        self.win_url = url
        return self

    def filename(self, filename):
        print('BaseDriver->filename')
        self.image_name = filename
        return self

    def driver(self, driver):
        print('BaseDriver->driver')
        self.shot_driver = driver
        return self

    def shotCallback(self, callback):
        print('BaseDriver->shotCallback')
        self.shot_callback = callback
        return self

    def checkShotCallback(self, file=None, error=None):
        if (self.shot_callback):
            self.shot_callback({
                'file': file,
                'error': error
            })
        return self

    def getSavePath(self):
        print('MainWindow->getSavePath')
        if (not self.image_path or self.image_path is None):
            self.image_path = Path(__file__).parent.parent.joinpath('image/merge')
        if (not self.image_path.exists()):
            self.image_path.mkdir(parents=True)
        return self.image_path

    def getTempPath(self, isClean=False):
        print('MainWindow->getTempPath')
        if(not self.temp_path or self.temp_path is None):
            self.temp_path = Path(__file__).parent.parent.joinpath('image/temp')
        if(not self.temp_path.exists()):
            self.temp_path.mkdir(parents=True)
        elif(isClean):
            shutil.rmtree(self.temp_path)
            self.temp_path.mkdir(parents=True)
        return self.temp_path

    def shotScreen(self):
        print('BaseDriver->shotScreen')
        return self

(5)集合控制器

文件路径:venv/Screenshot/__init__.py

from Screenshot.Driver import selenium,ImageMerge,BaseDriver,MainWindow
from pathlib import Path
import traceback


class Screenshot():

    pModel_QT5 = 1

    pModel_Selenium = 2

    sDriver_Phantomjs = 1

    sDriver_Chrome = 2

    win_url = ''

    win_width = None

    win_height = None

    save_path = None

    temp_path = None

    shot_pModel = pModel_Selenium

    shot_driver = sDriver_Phantomjs

    shot_callback = None

    win_x = 0

    win_y = 0

    def __init__(self):
        print('ScreenShot->__init__')
        pass

    def setWindowPosition(self, x, y):
        self.win_x = x
        self.win_y = y
        return self

    def url(self, url):
        print('Screenshot->url')
        self.win_url = url
        return self

    def pModel(self, model):
        print('Screenshot->pmodel')
        self.shot_pModel = model
        return self

    def driver(self, driver):
        print('Screenshot->driver')
        self.shot_driver = driver
        return  self

    def setWindowSize(self, width , height):
        print('Screenshot->setWindowSize')
        self.win_width = width
        self.win_height = height
        return self

    def savePath(self, path):
        print('Screenshot->savePath')
        self.save_path = Path(path)
        if(not self.save_path.exists()):
            self.save_path.mkdir(parents=True)
        return self

    def selenium(self):
        print('Screenshot->selenium')
        return selenium.Selenium()

    def pyqt5(self):
        print('Screenshot->pyqt5')
        obj = MainWindow.MainWindow()
        return obj

    def shotCallback(self, callback):
        print('Screenshot->shotCallback')
        self.shot_callback = callback
        return self


    def getDriver(self):
        print('Screenshot->getDriver')
        result = switch(self.shot_pModel, {
            self.pModel_QT5: self.pyqt5,
            self.pModel_Selenium: self.selenium,
            'default': self.selenium
        })
        return result

    def save(self, filename):
        print('Screenshot->save')
        return self.getDriver()\
            .setWindowPosition(x=self.win_x, y=self.win_y)\
            .url(self.win_url)\
            .driver(self.shot_driver)\
            .setWindowSize(width=self.win_width, height=self.win_height)\
            .filename(filename)\
            .setSavePath(self.getSavePath())\
            .setTempPath(temp_path=self.getTempPath())\
            .shotCallback(self.shot_callback)\
            .shotScreen()

    def getSavePath(self):
        print('Screenshot->getSavePath')
        if(not self.save_path or self.save_path is None):
            self.save_path = Path(__file__).parent.joinpath('image/merge')
        if(not self.save_path.exists()):
            self.save_path.mkdir(parents=True)
        return self.save_path

    def getTempPath(self):
        print('Screenshot->getTempPath')
        path = Path(__file__).parent.joinpath('image/temp')
        if(not path.exists()):
            path.mkdir(parents=True)
        return path


def switch(key, options:dict):
    print('switch')
    item = options.get(key, options.get('default'));
    if (hasattr(item, '__call__')):
        return item()
    else:
        return item

(6)实例·例子

文件路径:main.py

from Screenshot import *
from pathlib import Path
url = 'http://blog.sina.com.cn/lm/rank/focusbang//'
save_path = Path(__file__).parent.joinpath('shotScreen')

def shotCallback(res):
    print('file_path',res)
Screenshot()\
    .setWindowPosition(x=30, y=0)\
    .url(url).setWindowSize(1000, 800)\
    .savePath(save_path)\
    .shotCallback(shotCallback)\
    .pModel(model=Screenshot.pModel_Selenium)\
    .driver(driver=Screenshot.sDriver_Phantomjs)\
    .save(filename='screen.png')

(7)截图效果在这里插入图片描述

三、总结与资源

(1)截图方式总结

以上三总截图方案中只有phantomjs截图是隐式截图,其他两种方式(pyqt5和chrome)截图都是显示截图。

隐式截图:无法看到截图界面,用户在桌面的操作不会影像截图效果;
显示截图:执行过程中,程序会调用浏览器在桌面打开一个浏览器窗口,然后通过截屏实现截图功能;

显示截图中,pyqt5做了智能滚动截图然后合并处理,chrome只是单纯截图网页显示部分。如果网页内容过长,有滚动条的推荐使用隐式截图(phantomjs)和pyqt5方式截图。

(2)资源下载

如需要下载资源参考:python实现网页截图(v1.0.0).rar

  • 8
    点赞
  • 41
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值