公司开发根据UI设计制作了2000多个H5网页。利用python 自动化去挑选2000多个模板中错误的模板。
经过粗略的查看,发现有些模板显示为空白,或者与原UI设计不一致,根据这个来设计方案。
首先考虑分成四步来做:
1.获取H5网页的地址
2.打开这些网页
3.打开网页后与UI设计图进行比对
4.挑选出错误的网页
思路有了。接下来就是实现技术了:
1.我们这边的所有网页资源都是存储在服务器上面,网页的url,name,id等信息使用mysql存储。可以使用 pymysql 来获取网页的信息。
import pymysql
# 编写mysql 类
class MysqlHelper:
def __init__(self):
self.con = pymysql.connect(host='ip地址',user='用户名',password='密码',db='数据库名'
port=端口号,charset='utf8',cursorclass=pymysql.cursors.DictCursor)
self.cursor = self.con.cursor()
def find(self,sql,args=None):
self.cursor.execute(sql,args=args)
self.con.commit()
return self.cursor.fetchone()
def findall(self,sql,args=None):
self.cursor.execute(sql,args=args)
self.con.commit()
return self.cursor.fetchall()
def close(self):
self.cursor.close()
self.con.close()
def get_data(self):
sql = 'select id, html_path, name from h5_templates'
result = self.findall(sql)
self.close()
return result
if __name__ == '__main__':
mydb = MysqlHelper()
result = mydb.get_data()
2.编写截图类。H5网页的图片大都超出了浏览器最大窗口,因此设置不打开浏览器,并且获取H5要展示的实际长宽
来设置浏览器窗口的大小。原图的截取通过截取整个浏览器窗口的图片,然后通过img标签去获取位置,再通过Image
类截图。
from PIL import Image
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
# 截取H5的图片
def take_screenshot(browser,url, save_fn="capture.png"):
# chrome_options = Options()
# chrome_options.add_argument('headless')
# browser = webdriver.Chrome(chrome_options=chrome_options)
browser.get(url) # Load page
width = browser.execute_script("return document.documentElement.scrollWidth")
height = browser.execute_script("return document.documentElement.scrollHeight")
print(width, height)
browser.set_window_size(width, height)
time.sleep(1)
browser.save_screenshot(save_fn)
# browser.close()
# 截取原图
def take_screen_pic(driver,url,path1,path2):
# chrome_options = Options()
# chrome_options.add_argument('headless')
# driver = webdriver.Chrome(chrome_options=chrome_options)
driver.maximize_window()
driver.get(url)
baidu_img = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.XPATH, './html/body/img'))
)
driver.save_screenshot(path1) # 对整个浏览器页面进行截图
left = baidu_img.location['x']
top = baidu_img.location['y']
right = baidu_img.location['x'] + baidu_img.size['width']
bottom = baidu_img.location['y'] + baidu_img.size['height']
im = Image.open(path1)
im = im.crop((left, top, right, bottom)) # 对浏览器截图进行裁剪
im.save(path2)
# driver.close()
3.编写图片对比类
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import cv2
from PIL import Image, ImageChops
from settings import settings
# 均值哈希算法
def aHash(img):
# 缩放为8*8
img = cv2.resize(img, (8, 8), interpolation=cv2.INTER_CUBIC)
# 转换为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# s为像素和初值为0,hash_str为hash值初值为''
s = 0
hash_str = ''
# 遍历累加求像素和
for i in range(8):
for j in range(8):
s = s + gray[i, j]
# 求平均灰度
avg = s / 64
# 灰度大于平均值为1相反为0生成图片的hash值
for i in range(8):
for j in range(8):
if gray[i, j] > avg:
hash_str = hash_str + '1'
else:
hash_str = hash_str + '0'
return hash_str
# 差值感知算法
def dHash(img):
# 缩放8*8
img = cv2.resize(img, (9, 8), interpolation=cv2.INTER_CUBIC)
# 转换灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
hash_str = ''
# 每行前一个像素大于后一个像素为1,相反为0,生成哈希
for i in range(8):
for j in range(8):
if gray[i, j] > gray[i, j + 1]:
hash_str = hash_str + '1'
else:
hash_str = hash_str + '0'
return hash_str
# Hash值对比
def cmpHash(path1, path2, similarity=settings.similarity):
img1 = cv2.imread(path1)
img2 = cv2.imread(path2)
hash1 = aHash(img1)
hash2 = aHash(img2)
print(hash1,hash2)
n = 0
# hash长度不同则返回-1代表传参出错
if len(hash1) != len(hash2):
return -1
# 遍历判断
for i in range(len(hash1)):
# 不相等则n计数+1,n最终为相似度
if hash1[i] != hash2[i]:
n = n + 1
if n <= similarity:
return True, n
else:
return False, n
# 判断是否为纯色
def judge_color(path):
img = Image.open(path)
clrs = img.getcolors(maxcolors=99999)
if clrs is None:
return True, 'beyond'
elif len(clrs) == 1:
return False, '1'
else:
return True, str(len(clrs))
4.编写一个日志记录类
from openpyxl import workbook,load_workbook
def init_excel(path, sheetname=None):
wb = workbook.Workbook()
if not sheetname:
ws = wb.active
else:
ws = wb[sheetname]
ws['A1'] = 'id'
ws['B1'] = 'h5_pic_url'
ws['C1'] = 'acture_pic_url'
ws['D1'] = 'differentia'
wb.save(path)
wb.close()
def write(path, data, row_index, sheetname=None):
wb = load_workbook(path)
if not sheetname:
ws = wb.active
else:
ws = wb[sheetname]
ws.cell(row_index, 1).value = data.get('id')
ws.cell(row_index, 2).value = data.get('name')
ws.cell(row_index, 3).value = data.get('h5_url')
ws.cell(row_index, 4).value = data.get('differentia')
row_index += 1
wb.save(path)
wb.close()
return row_index
if __name__ == '__main__':
init_excel('test.xlsx')
val = {
"id": 1,
"h5_pic_url": 2,
"acture_pic_url": 3,
"differentia": 4
}
row_index = 2
row_index = write('test.xlsx',val,row_index)
print(row_index)
5.编写run
from selenium import webdriver
from mysqlHelper import MysqlHelper
from selenium.webdriver.chrome.options import Options
import time
import os
from screenshot import take_screen_pic,take_screenshot
from image_compare import cmpHash,judge_color
from excelHelper import init_excel,write
from settings import settings
def run():
mydb = MysqlHelper()
data_lst = mydb.get_data()
path = '../imagesave_' + time.strftime('%y%m%d%H%M%S')
if not os.path.exists(path):
os.mkdir(path)
init_excel(path+'/log.xlsx')
row_index = 2
for data in data_lst:
chrome_options = Options()
chrome_options.add_argument('headless')
driver = webdriver.Chrome(chrome_options=chrome_options)
h5id = data.get('id')
h5name = data.get('name')
h5_url = settings.url + str(data.get('html_path'))
img_url = settings.url + str(data.get('thumb_path'))
print(h5_url,img_url)
base_path = path + '/' + h5name
h5_img_path = base_path + '_h5.png'
img_img_path = base_path + '_img.png'
shot_img_path = base_path + '_shot.png'
take_screenshot(driver,h5_url, h5_img_path)
take_screen_pic(driver,img_url, img_img_path, shot_img_path)
driver.close()
os.remove(img_img_path)
if settings.flag == 1:
result, n = cmpHash(h5_img_path, shot_img_path)
elif settings.flag == 2:
result, n = judge_color(h5_img_path)
print(result,n)
if result:
os.remove(h5_img_path)
os.remove(shot_img_path)
else:
val = {
"id": h5id,
"name": h5name,
"h5_url": h5_url,
"differentia": n
}
row_index = write(path+'/log.xlsx',val,row_index)
if __name__ == '__main__':
run()
运行结果如下:
第一张是H5展示的图片,第二张是原图。