技术图象识别和焦点定位技术实现的桌面GUI自动化
效果:
gui自动化测试视频
本人使用的是windows10专业版,测试windows 2012版执行有误
实现思路:每次执行动作时会截取当前屏幕内容,然后使用opencv根据动作图片进行图像识别,获取识别到的图象焦点位置,然后控制鼠标移动到该位置执行相关鼠标键盘操作。鼠标操作是基于python pynput实现的,每个动作及表示鼠标移动到某一位置后进行一些列键盘鼠标操作,可参考pynput的使用,动作图片一般就是你要操作的那个软件的图标的截图,也有识别不出来动作图片的情况
代码实例:未标识的包版本直接下最新版即可
import os
import time
import cv2
import mss # 9.0.1
# numpy 1.26.4
import numpy as np
import pynput
from PIL import Image
# pynput 1.7.6
from pynput.keyboard import Controller as Key_Controller, Key
from pynput.mouse import Controller
class DesktopAutomation:
def __init__(self, main_image='images/zm.png'):
self.mouse = Controller()
self.keyboard = Key_Controller()
self.x = None
self.y = None
self.main_image = main_image
self.template_gray = None
self.input_method = "Chinese"
def _desktop_screenshot(self):
"""截取当前屏幕内容"""
# 使用mss获取屏幕快照
if os.path.exists(self.main_image):
os.remove(self.main_image)
with mss.mss() as sct:
# 截取全屏
monitor = sct.monitors[0] # 默认选取第一个显示器
output = sct.grab(monitor)
# 将屏幕截图转换为PIL Image对象
img = Image.frombytes('RGB', output.size, output.bgra, 'raw', 'BGRX')
# 保存为图片文件
img.save(self.main_image)
def _obtain_location(self):
"""获取步骤的位置"""
main_image = cv2.imread(self.main_image)
template_image = cv2.imread(self.template_gray)
# 转换为灰度图像(如果需要的话,模板匹配通常在灰度图像上进行)
main_gray = cv2.cvtColor(main_image, cv2.COLOR_BGR2GRAY)
try:
template_image = cv2.cvtColor(template_image, cv2.COLOR_BGR2GRAY)
except Exception as e:
print(e)
pass
# 使用OpenCV的matchTemplate函数寻找模板在大图片中的位置
res = cv2.matchTemplate(main_gray, template_image, cv2.TM_CCOEFF_NORMED)
# 设定阈值,找到匹配区域
threshold = 0.8
loc = np.where(res >= threshold)
# 遍历匹配区域并画出矩形框(假设找到了足够的匹配)
for pt in zip(*loc[::-1]):
self.x = pt[0]
self.y = pt[1]
def carry_out_location(self, template_gray, location, tune_x, tune_y):
"""执行操作
template_gray: 步骤图片位置
location: 操作
tune_x: 微调焦点x
tune_y: 微调焦点y
"""
self.template_gray = template_gray
time_out = 0
while True:
self._desktop_screenshot()
self._obtain_location()
if self.x is not None and self.y is not None or time_out > 10:
break
time.sleep(1)
time_out += 1
location_dict = {
"button_left": pynput.mouse.Button.left,
"button_right": pynput.mouse.Button.right,
"fingerboard": ["space", "ctrl", "enter", "end", "tab", "shift", "caps_lock", "alt", "delete", ]
}
print(self.x, self.y, '鼠标位置')
if self.x is None or self.y is None:
print("未找到图标")
return
self.mouse.position = (self.x + tune_x, self.y + tune_y)
for i in location:
if i[0] in ['button_left', 'button_right']: # 鼠标操作
for j in range(i[1]):
self.mouse.click(location_dict[i[0]])
elif i[0] == 'fingerboard': # 快捷键操作
print(i[1], 'i')
if type(i[1]) != list or len(i[1]) > 2 or i[1][0] not in location_dict[i[0]]:
print("命令有误")
return
self.keyboard.press(getattr(Key, i[1][0]))
if len(i[1]) > 1:
self.keyboard.press(i[1][1])
self.keyboard.release(i[1][1])
self.keyboard.release(getattr(Key, i[1][0]))
elif i[0] == 'input': # 键盘输入操作
if self.input_method == "Chinese":
self.keyboard.press(Key.shift)
self.keyboard.release(Key.shift)
self.input_method = "English"
self.keyboard.type(i[1])
time.sleep(1)
self.x = None
self.y = None
if __name__ == '__main__':
browser = DesktopAutomation()
browser.carry_out_location("images/yj.png", [("button_left", 2)], 5, 5)
browser.carry_out_location("images/create_1.png", [("button_right", 1)], 20, 50)
browser.carry_out_location("images/create_2.png", [("button_left", 1)], 10, 10)
browser.carry_out_location("images/url.png", [("button_left", 1), ("input", "192.168.51.12/shell.php")], 50, 10)
browser.carry_out_location("images/pwd.png", [("button_left", 1), ("input", "shell")], 50, 10)
browser.carry_out_location("images/test_links.png", [("button_left", 1)], 1, 1)
browser.carry_out_location("images/test_links_ret.png", [("button_left", 1)], 1, 1)
browser.carry_out_location("images/create_links.png", [("button_left", 1)], 1, 1)
browser.carry_out_location("images/create_links_ret.png", [("button_left", 1)], 1, 1)
browser.carry_out_location("images/create_1.png", [("button_right", 1)], 20, 50)
browser.carry_out_location("images/terminal.png", [("button_left", 1)], 1, 1)
browser.carry_out_location("images/shell.png", [("button_left", 1), ("input", "id"), ("fingerboard", ["enter"])],
1, 1)