python实现windouws GUI自动化

好好学习超人强

已于 2024-04-14 14:53:52 修改

阅读量291

点赞数 5

文章标签： python 自动化 opencv

于 2024-04-14 14:12:13 首次发布

本文链接：https://blog.csdn.net/python_2001/article/details/137743226

版权

本文介绍了一种利用图像识别技术（如OpenCV）和鼠标键盘操作（通过pynput库）实现的桌面GUI自动化方法，通过截取屏幕、识别操作图片和定位焦点来模拟用户操作，适用于Windows系统，展示了具体代码实例.

摘要由CSDN通过智能技术生成

技术图象识别和焦点定位技术实现的桌面GUI自动化

效果：

gui自动化测试视频

本人使用的是windows10专业版，测试windows 2012版执行有误

实现思路：每次执行动作时会截取当前屏幕内容，然后使用opencv根据动作图片进行图像识别，获取识别到的图象焦点位置，然后控制鼠标移动到该位置执行相关鼠标键盘操作。鼠标操作是基于python pynput实现的，每个动作及表示鼠标移动到某一位置后进行一些列键盘鼠标操作，可参考pynput的使用，动作图片一般就是你要操作的那个软件的图标的截图，也有识别不出来动作图片的情况

代码实例:未标识的包版本直接下最新版即可

import os
import time

import cv2
import mss # 9.0.1
# numpy 1.26.4
import numpy as np
import pynput
from PIL import Image
# pynput 1.7.6
from pynput.keyboard import Controller as Key_Controller, Key
from pynput.mouse import Controller



class DesktopAutomation:
    def __init__(self, main_image='images/zm.png'):
        self.mouse = Controller()
        self.keyboard = Key_Controller()
        self.x = None
        self.y = None
        self.main_image = main_image
        self.template_gray = None
        self.input_method = "Chinese"

    def _desktop_screenshot(self):
        """截取当前屏幕内容"""
        # 使用mss获取屏幕快照
        if os.path.exists(self.main_image):
            os.remove(self.main_image)
        with mss.mss() as sct:
            # 截取全屏
            monitor = sct.monitors[0]  # 默认选取第一个显示器
            output = sct.grab(monitor)
            # 将屏幕截图转换为PIL Image对象
            img = Image.frombytes('RGB', output.size, output.bgra, 'raw', 'BGRX')

            # 保存为图片文件
            img.save(self.main_image)

    def _obtain_location(self):
        """获取步骤的位置"""
        main_image = cv2.imread(self.main_image)
        template_image = cv2.imread(self.template_gray)

        # 转换为灰度图像（如果需要的话，模板匹配通常在灰度图像上进行）
        main_gray = cv2.cvtColor(main_image, cv2.COLOR_BGR2GRAY)
        try:
            template_image = cv2.cvtColor(template_image, cv2.COLOR_BGR2GRAY)
        except Exception as e:
            print(e)
            pass

        # 使用OpenCV的matchTemplate函数寻找模板在大图片中的位置
        res = cv2.matchTemplate(main_gray, template_image, cv2.TM_CCOEFF_NORMED)

        # 设定阈值，找到匹配区域
        threshold = 0.8
        loc = np.where(res >= threshold)
        # 遍历匹配区域并画出矩形框（假设找到了足够的匹配）
        for pt in zip(*loc[::-1]):
            self.x = pt[0]
            self.y = pt[1]

    def carry_out_location(self, template_gray, location, tune_x, tune_y):
        """执行操作
        template_gray: 步骤图片位置
        location: 操作
        tune_x: 微调焦点x
        tune_y: 微调焦点y
        """
        self.template_gray = template_gray
        time_out = 0
        while True:
            self._desktop_screenshot()
            self._obtain_location()
            if self.x is not None and self.y is not None or time_out > 10:
                break
            time.sleep(1)
            time_out += 1
        location_dict = {
            "button_left": pynput.mouse.Button.left,
            "button_right": pynput.mouse.Button.right,
            "fingerboard": ["space", "ctrl", "enter", "end", "tab", "shift", "caps_lock", "alt", "delete", ]
        }
        print(self.x, self.y, '鼠标位置')
        if self.x is None or self.y is None:
            print("未找到图标")
            return
        self.mouse.position = (self.x + tune_x, self.y + tune_y)
        for i in location:
            if i[0] in ['button_left', 'button_right']:  # 鼠标操作
                for j in range(i[1]):
                    self.mouse.click(location_dict[i[0]])
            elif i[0] == 'fingerboard':  # 快捷键操作
                print(i[1], 'i')
                if type(i[1]) != list or len(i[1]) > 2 or i[1][0] not in location_dict[i[0]]:
                    print("命令有误")
                    return
                self.keyboard.press(getattr(Key, i[1][0]))
                if len(i[1]) > 1:
                    self.keyboard.press(i[1][1])
                    self.keyboard.release(i[1][1])
                self.keyboard.release(getattr(Key, i[1][0]))
            elif i[0] == 'input':  # 键盘输入操作
                if self.input_method == "Chinese":
                    self.keyboard.press(Key.shift)
                    self.keyboard.release(Key.shift)
                    self.input_method = "English"
                self.keyboard.type(i[1])

            time.sleep(1)
        self.x = None
        self.y = None


if __name__ == '__main__':
    browser = DesktopAutomation()
    browser.carry_out_location("images/yj.png", [("button_left", 2)], 5, 5)
    browser.carry_out_location("images/create_1.png", [("button_right", 1)], 20, 50)
    browser.carry_out_location("images/create_2.png", [("button_left", 1)], 10, 10)
    browser.carry_out_location("images/url.png", [("button_left", 1), ("input", "192.168.51.12/shell.php")], 50, 10)
    browser.carry_out_location("images/pwd.png", [("button_left", 1), ("input", "shell")], 50, 10)
    browser.carry_out_location("images/test_links.png", [("button_left", 1)], 1, 1)
    browser.carry_out_location("images/test_links_ret.png", [("button_left", 1)], 1, 1)
    browser.carry_out_location("images/create_links.png", [("button_left", 1)], 1, 1)
    browser.carry_out_location("images/create_links_ret.png", [("button_left", 1)], 1, 1)
    browser.carry_out_location("images/create_1.png", [("button_right", 1)], 20, 50)
    browser.carry_out_location("images/terminal.png", [("button_left", 1)], 1, 1)
    browser.carry_out_location("images/shell.png", [("button_left", 1), ("input", "id"), ("fingerboard", ["enter"])],
                               1, 1)

好好学习超人强

关注

5
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
python实现windouws GUI自动化

实现思路：每次执行动作时会截取当前屏幕内容，然后使用opencv根据动作图片进行图像识别，获取识别到的图象焦点位置，然后控制鼠标移动到该位置执行相关鼠标键盘操作。鼠标操作是基于python pynput实现的，每个动作及表示鼠标移动到某一位置后进行一些列键盘鼠标操作，可参考pynput的使用，动作图片一般就是你要操作的那个软件的图标的截图，也有识别不出来动作图片的情况。本人使用的是windows10专业版，测试windows 2012版执行有误。效果：视频还没通过审核先欠着。
复制链接

扫一扫