1.项目介绍
基于Selenium的GUI发票查询小工具
该工具可以批量的将PDF或PNG和JPG格式的发票信息提取,
执行发票查验,将查询结果图保存到本地。
2.环境依赖
Python3
Chrome
ChromeDerive
3.目录结构
├── Chrome 谷歌浏览器
├── db 存放百度文字识别的token
├── pic 存放验证码图片和分离的色彩图片
├── utils 工具目录
│ ├── Automation.py 爬虫执行步骤
│ ├── BrowerDrive.py 初始化浏览器
│ ├── Dialog.py 对话框
│ ├── TaskEvent.py 任务事件
│ ├── TaskThread.py 任务线程
│ ├── VerifCodeOCR.py 验证码处理
├── v3.py 界面代码
4.项目实现
编写一个基于selenium初始化浏览器对象的一个类
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
class ChromeDrive:
browser = None
def __init__(self, chrome_path, driver_path, stealth_path):
"""
:param chrome_path: chrome浏览器地址
:param driver_path: chrome浏览器驱动地址
:param stealth_path: stealth.min.js文件地址
"""
options = self.initOptions(chrome=chrome_path)
self.browser = self.initBrowser(driver=driver_path, options=options, stealth=stealth_path)
def initOptions(self, chrome):
"""
:param chrome: 浏览器路径 ...\\chrome.exe
:return: 浏览器配置
"""
try:
options = Options()
options.add_argument("--headless")
options.page_load_strategy = 'eager'
options.add_argument('ignore-certificate-errors')
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_argument(
'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/96.0.46664.45 Safari/537.36'
)
options.add_argument('--start-maximized')
options.add_argument('window-size=1920x1080')
options.binary_location = chrome
return options
except Exception as e:
print(e)
return None
def initBrowser(self, driver, options, stealth):
"""
:param driver: 驱动地址 ...\\chromedriver.exe
:param options: 浏览器配置
:param stealth: stealth.min.js文件地址
:return: 浏览器对象
"""
try:
browser = Chrome(executable_path=driver, options=options)
with open(stealth, mode="r", encoding="utf-8") as f:
js = f.read()
browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})
return browser
except Exception as e:
print(e)
return None
爬虫的步骤不编写了,照着来就行
下面介绍验证码的色彩分离,
图像识别基于的是ddddocr
import cv2 as cv
import ddddocr
import os
PIC_DIR = os.path.join(os.getcwd(), 'pic')
CODE_PATH = os.path.join(PIC_DIR, 'code.png')
RED_PATH = os.path.join(PIC_DIR, 'red.png')
YELLOW_PATH = os.path.join(PIC_DIR, 'yellow.png')
BLUE_PATH = os.path.join(PIC_DIR, 'blue.png')
BLACK_PATH = os.path.join(PIC_DIR, 'black.png')
def splitColor(filename):
"""
:param filename: 验证码图片地址
:return: 分离验证码中的色彩: 黑色, 红色, 蓝色, 黄色
"""
try:
img = cv.imread(filename)
height, width = img.shape[:2]
...
for i in range(height):
for j in range(width):
'''取值范围设定'''
bgr = img[i, j]
...
cv.imwrite(RED_PATH, redBlank)
cv.imwrite(YELLOW_PATH, yellowBlank)
cv.imwrite(BLUE_PATH, blueBlank)
cv.imwrite(BLACK_PATH, blackBlank)
except Exception as e:
print(e)
def getPicStr(color):
"""
:param color: 验证码颜色
:return: 验证码文字
"""
try:
ocr = ddddocr.DdddOcr()
fileName = os.path.join(PIC_DIR, f'{color}.png')
with open(fileName, 'rb') as fp:
imageBytes = fp.read()
result = ocr.classification(imageBytes)
return result
except Exception as e:
print(e)
编写界面部分
import sys
from PyQt5.QtWidgets import QApplication, QWidget, QDesktopWidget, QHBoxLayout, QVBoxLayout, QMessageBox
from PyQt5.QtWidgets import QPushButton, QTableWidget, QTableWidgetItem, QLabel
from PyQt5.QtGui import QCloseEvent
from utils.TaskEvent import addTaskBtnClickEvent, initTaskBtnClickEvent, execTaskBtnClickEvent, stopTaskBtnClickEvent
from utils.TaskEvent import reInitTaskBtnClickEvent, reExecTaskBtnClickEvent, logonTaskBtnClickEvent, delTaskBtnClickEvent
class MainWindow(QWidget):
def __init__(self):
super().__init__()
self.tableWidget = None
self.initUI()
def closeEvent(self, event: QCloseEvent) -> None:
reply = QMessageBox.question(
self,
"提示",
"是否要退出?",
QMessageBox.Yes | QMessageBox.No,
QMessageBox.No
)
if reply == QMessageBox.Yes:
event.accept()
else:
event.ignore()
def initUI(self):
self.setWindowTitle('增值税发票查询工具')
self.resize(1200, 720)
qr = self.frameGeometry()
cp = QDesktopWidget().availableGeometry().center()
qr.moveCenter(cp)
mainLayout = QVBoxLayout()
mainLayout.addLayout(self.initHeaderLayout())
mainLayout.addLayout(self.initTableLayout())
mainLayout.addLayout(self.initFooterLayout())
self.setLayout(mainLayout)
def initHeaderLayout(self):
headerLayout = QHBoxLayout()
execBtn = QPushButton("执行")
initBtn = QPushButton("初始化")
addBtn = QPushButton("添加")
stopBtn = QPushButton("终止")
addBtn.clicked.connect(lambda: addTaskBtnClickEvent(window=self))
initBtn.clicked.connect(lambda: initTaskBtnClickEvent(window=self))
execBtn.clicked.connect(lambda: execTaskBtnClickEvent(window=self))
stopBtn.clicked.connect(lambda: stopTaskBtnClickEvent(window=self))
headerLayout.addWidget(execBtn)
headerLayout.addWidget(initBtn)
headerLayout.addWidget(addBtn)
headerLayout.addWidget(stopBtn)
headerLayout.addStretch()
return headerLayout
def initTableLayout(self):
tableLayout = QHBoxLayout()
tableHeader = [
{"field": "fileName", "text": "文件名称", "width": 200},
{"field": "filePath", "text": "文件路径", "width": 300},
{"field": "status", "text": "状态", "width": 100},
{"field": "invoiceCode", "text": "发票代码", "width": 100},
{"field": "invoiceNum", "text": "发票号码", "width": 100},
{"field": "invoiceDate", "text": "发票日期", "width": 100},
{"field": "totalAmount", "text": "不含税金额|校验码", "width": 200},
{"field": "errorMsg", "text": "异常消息", "width": 200},
]
tableWidget = QTableWidget(0, len(tableHeader))
self.tableWidget = tableWidget
for idx, info in enumerate(tableHeader):
item = QTableWidgetItem()
item.setText(info['text'])
self.tableWidget.setHorizontalHeaderItem(idx, item)
self.tableWidget.setColumnWidth(idx, info['width'])
tableLayout.addWidget(tableWidget)
return tableLayout
def initFooterLayout(self):
footerLayout = QHBoxLayout()
versionView = QLabel("version: v3", self)
reInitBtn = QPushButton("重新初始化")
reExecBtn = QPushButton("重新执行")
delBtn = QPushButton("删除校验项")
logonBtn = QPushButton("API配置")
reInitBtn.clicked.connect(lambda: reInitTaskBtnClickEvent(window=self))
reExecBtn.clicked.connect(lambda: reExecTaskBtnClickEvent(window=self))
delBtn.clicked.connect(lambda: delTaskBtnClickEvent(window=self))
logonBtn.clicked.connect(lambda: logonTaskBtnClickEvent(window=self))
footerLayout.addWidget(versionView)
footerLayout.addStretch()
footerLayout.addWidget(reInitBtn)
footerLayout.addWidget(reExecBtn)
footerLayout.addWidget(delBtn)
footerLayout.addWidget(logonBtn)
return footerLayout
if __name__ == '__main__':
app = QApplication(sys.argv)
window = MainWindow()
window.show()
sys.exit(app.exec_())
5.参考文章
防止 Selenium 被检测
参考文章: https://blog.csdn.net/cqcre/article/details/110944075
色彩分离
参考文章: https://blog.csdn.net/m0_50616665/article/details/124810344
ddddocr食用方法
参考文章: https://blog.csdn.net/fun_sn/article/details/125421983
使用PyQt5编写GUI
参考文章: https://www.bilibili.com/video/BV1EY4y187yD
在打包过程中,出现了以下2点问题
1. PyQt5打包后运行selenium有黑框
参考文章: https://www.jianshu.com/p/3fadf8f7c203
2. ddddocr打包
参考文章: https://blog.csdn.net/weixin_46010646/article/details/124926207
评论区qq8533的评论