一、设计UI页面
一些组件需要认识一下,文本框、按钮、进度条等。
二、UI文件生成PY文件
1.快捷生成py文件
py文件会自动生成在当前目录下,与ui文件同名
2.运行一下这个py页面
导入的文件名为你生成的py文件名称
注意包名
import elk.txtToExcel as txtToExcel
#你们的应该是这样就可以了
import txtToExcel
import sys
import elk.txtToExcel as txtToExcel
from PyQt5.QtWidgets import QApplication,QMainWindow
if __name__ == '__main__':
app=QApplication(sys.argv)
mainWindow=QMainWindow()
ui=txtToExcel.Ui_MainWindow()
# 向主窗口添加控件
ui.setupUi(mainWindow)
mainWindow.show()
sys.exit(app.exec_())
如果你的import导入有问题,请修改你项目下代码包的设置
3.编码
在这之前还要说一下objectName属性,它相当于控件对象的名字。你要操作这个控件,可以使用这个名字来调用对应的方法。
开整,首先需要把Object改成QWidget。
import re
import tkinter as tk
from tkinter import filedialog
from PyQt5 import QtCore, QtWidgets
from PyQt5.QtWidgets import *
import os
import xlrd
from openpyxl import Workbook as owb
import json
import requests
from datetime import datetime
import time
import datetime
# 这里需要改一下 ↓↓↓
class Ui_MainWindow(QWidget):
先拿一个最简单的例子来说,给退出按钮实现关闭的功能。
在刚刚生成好的py文件中,有一个全是初始化你那些控件的方法找到它。
在里边增加这么一行代码。
clicked点击
connect执行对应的函数
MainWindow.close关闭窗口函数
self.exitBtn.clicked.connect(MainWindow.close)
按照上边的例子,需要你自己实现包括开始和其他按钮的实现逻辑。
我就放在了第二个方法里了。
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
self.actionBtn.setText(_translate("MainWindow", "开始"))
self.exitBtn.setText(_translate("MainWindow", "退出"))
self.label.setText(_translate("MainWindow", "excel所在路径:"))
self.excelBtn.setText(_translate("MainWindow", "浏览"))
self.label_2.setText(_translate("MainWindow", "设置保存位置:"))
self.saveBtn.setText(_translate("MainWindow", "浏览"))
self.label_3.setText(_translate("MainWindow", "当前进度:"))
#设置进度条初始为0%
self.numBar.setValue(0)
#对应按钮对应的函数
self.excelBtn.clicked.connect(self.setTargetPath)
self.saveBtn.clicked.connect(self.setSavePath)
self.actionBtn.clicked.connect(self.start)
self.thread = None # 初始化线程
# 主函数
def start(self):
pass
# 设置目标文件夹路径
def setTargetPath(self):
# 隐藏控制台
root = tk.Tk()
root.withdraw()
self.targetPath = filedialog.askdirectory()
self.excelText.setText(self.targetPath)
# 查找文件夹下所有的文件
def findAllFile(self, base):
for root, ds, fs in os.walk(base):
for f in fs:
yield f
# 设置保存文件夹路径
def setSavePath(self):
# 隐藏控制台
root = tk.Tk()
root.withdraw()
self.savePath = filedialog.askdirectory()
self.saveText.setText(self.savePath)
在主函数,也就是start这个函数中,我们需要开一个子线程去进行爬取和写入excel操作。并且实时对进度条进行更。
主线程需要传递给子线程两个参数方法如下:
# 主函数
def start(self):
# 创建线程
self.actionBtn.setEnabled(False)
# 执行工作的子线程
self.thread = WorkThread(fileList=filenamelist,savePath = self.savePath)
# 连接信号
self.thread._signal.connect(self.call_backlog) # 进程连接回传到GUI的事件
# 开始线程
self.thread.start()
其中的WorkThread是一个具体执行爬虫和写入excel的类,继承QThread。
对应子线程中构造方法中的两个参数,用来主线程和子线程进行传参
WorkThread(fileList=filenamelist,savePath = self.savePath)
以下均为同一个类
from PyQt5.QtCore import *
from PyQt5 import QtCore
import xlrd
from openpyxl import Workbook as owb
import json
import requests
from datetime import datetime
import time
import datetime
class WorkThread(QtCore.QThread):
# 通过类成员对象定义信号对象
_signal = pyqtSignal(int)
#接收主线程传参fileList,savePath
def __init__(self,fileList,savePath):
super(WorkThread, self).__init__()
self.fileList = fileList
self.savePath = savePath
def __del__(self):
self.wait()
# 具体执行任务的方法
def run(self):
for i in self.fileList:
# 使用open
excel_file = owb()
# 在Excel文件里创建一个工作表sheet_name
excel_table = excel_file.create_sheet('收集结果', 0)
fileName = str(i).replace('.xls', '').replace('.xlsx', '').split('/')[-1]
xlrds = xlrd.open_workbook(i)
# 打开指定的表
table = xlrds.sheet_by_index(0)
#开始执行收集的方法
self.startCollect(table, excel_file, fileName, excel_table, self.savePath)
# 设置进度条为100%
self._signal.emit(100)
收集方法,涉及到动态改变进度条self._signal.emit(((item/(rows+1)))*100)。
def startCollect(self, table, excel_file, fileName, excel_table, folderPath):
savePath = folderPath + '/提取的trace信息文件夹'
# 获取总行数
rows = table.nrows
excel_table.cell(1, 1, 'message:')
excel_table.cell(1, 2, 'trice:')
excel_table.cell(1, 3, 'uri:')
excel_table.cell(1, 4, 'mode:')
excel_table.cell(1, 5, 'clientVersion:')
excel_table.cell(1, 6, 'loginType:')
excel_table.cell(1, 7, 'serverVersion:')
index_line = 2
for item in range(0, rows):
#动态改变进度条
self._signal.emit(((item/(rows+1)))*100)
tid = "TID:" + str(table.cell(item, 0).value)
#爬虫及写入excel方法
self.startChild(tid, index_line, excel_table, table)
index_line += 1
# 保存文档
excel_file.save(savePath + '/' + fileName + '完成' + '.xlsx')
具体实现逻辑,你使用Discover - Elastic是会有请求参数,但你按一条traceid进行请求时。每次只会traceid不一样。所以这里就只动态改变了tid和筛选时间范围。若果增加条件只需要自己重新发一次请求加到body里即可。
这里的条件在网页是涨这个样子,怎加了一个错误级别。并且时间倒序。然后后边解析返回的json对象去第一条报错信息。
trace: "TID:4c692741e59a452cbc8b4d5de06b8837.75.16419144765607809" and level : "ERROR"
注意!!!我这个body是不全的,把一些动态的我加上了,涉及到公司一下东西所以没有放上去,需要你们自己先把条件设置好请求一遍在粘贴到body里,然后把需要动态的地方替换掉即可
def startChild(self, tid, row, excel_table, table):
body = {
"params": {
"ignoreThrottled": True,
"index": "",
"body": {
"version": True,
"size": 500,
"sort": [
{
"@timestamp": {
"order": "asc",
"unmapped_type": "boolean"
}
}
],
"query": {
"bool": {
"must": [],
"filter": [
{
"bool": {
"should": [
{
"match_phrase": {
"trace": tid
}
}
],
"minimum_should_match": 1
}
},
{
"bool": {
"should": [
{
"match_phrase": {
"level": "ERROR"
}
}
],
"minimum_should_match": 1
}
},
{
"range": {
"@timestamp": {
"gte": datetime.datetime.utcfromtimestamp(time.mktime((
datetime.datetime.now() - datetime.timedelta(
days=20)).timetuple())).strftime(
"%Y-%m-%dT%H:%M:%S.%fZ"),
"lte": datetime.datetime.utcfromtimestamp(time.mktime((
datetime.datetime.now() + datetime.timedelta(
days=1)).timetuple())).strftime(
"%Y-%m-%dT%H:%M:%S.%fZ"),
"format": "strict_date_optional_time"
}
}
}
],
"should": [],
"must_not": []
}
}
}
...
}
}
jsonData = json.dumps(body)
headers = {
#需要你自己填写请求头
}
response = requests.post(url='你们的请求url地址', data=jsonData,
headers=headers, timeout=10)
response.encoding = 'utf-8'
result = response.json()
try:
if str(result['rawResponse']['hits']['hits'][0]['_source']['ex_detail']) == '' or str(
result['rawResponse']['hits']['hits'][0]['_source']['ex_detail']) is None:
excel_table.cell(row, 1, str(result['rawResponse']['hits']['hits'][0]['_source']['message']))
excel_table.cell(row, 2, str(result['rawResponse']['hits']['hits'][0]['_source']['trace']))
# 处理uri
uri = str(result['rawResponse']['hits']['hits'][0]['_source']['uri'])
uri.replace("POST", "").replace("GET", "")
# uri = re.sub(r'\s+', "", uri).replace("http://", "")
# uri = re.sub(r'\d', "", uri).replace("GET", "")
# uri = re.sub(r'\.(.*):', "", uri).replace("POST", "")
if uri == '' or uri is None or uri == 'null':
uri = '无uri地址'
excel_table.cell(row, 3, uri)
excel_table.cell(row, 4, str(result['rawResponse']['hits']['hits'][0]['_source']['service']))
clientVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['clientVersion'])
if clientVersion == '' or clientVersion is None or clientVersion == 'null':
clientVersion = '无clientVersion'
excel_table.cell(row, 5, clientVersion)
loginType = str(result['rawResponse']['hits']['hits'][0]['_source']['loginType'])
if loginType == '' or loginType is None or loginType == 'null':
loginType = '无loginType'
excel_table.cell(row, 6, loginType)
serverVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['serverVersion'])
if serverVersion == '' or serverVersion is None or serverVersion == 'null':
serverVersion = '无serverVersion'
excel_table.cell(row, 7, serverVersion)
else:
excel_table.cell(row, 1, str(result['rawResponse']['hits']['hits'][0]['_source']['ex_detail']))
excel_table.cell(row, 2, str(result['rawResponse']['hits']['hits'][0]['_source']['trace']))
# 处理uri
uri = str(result['rawResponse']['hits']['hits'][0]['_source']['uri'])
uri.replace("POST", "").replace("GET", "")
# uri = re.sub(r'\s+', "", uri).replace("http://", "")
# uri = re.sub(r'\d', "", uri).replace("GET", "")
# uri = re.sub(r'\.(.*):', "", uri).replace("POST", "")
if uri == '' or uri is None or uri == 'null':
uri = '无uri地址'
excel_table.cell(row, 3, uri)
excel_table.cell(row, 4, str(result['rawResponse']['hits']['hits'][0]['_source']['service']))
clientVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['clientVersion'])
if clientVersion == '' or clientVersion is None or clientVersion == 'null':
clientVersion = '无clientVersion'
excel_table.cell(row, 5, clientVersion)
loginType = str(result['rawResponse']['hits']['hits'][0]['_source']['loginType'])
if loginType == '' or loginType is None or loginType == 'null':
loginType = '无loginType'
excel_table.cell(row, 6, loginType)
serverVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['serverVersion'])
if serverVersion == '' or serverVersion is None or serverVersion == 'null':
serverVersion = '无serverVersion'
excel_table.cell(row, 7, serverVersion)
except Exception as e:
flage = True
for con in range(0, 4):
try:
self.errorDel(result['id'], row, excel_table)
flage = True
break
except Exception as e:
flage = False
# print(e)
continue
if not flage:
print('4次请求都失败了')
excel_table.cell(row, 2, str(table.cell(row, 0).value))
excel_table.cell(row, 1, 'tid获取信息异常')
#如果获取失败进行二次获取的方法
def errorDel(self, id, row, excel_table):
body = {
"id": str(id)
}
jsonData = json.dumps(body)
headers = {
#请求头信息
}
response = requests.post(url='请求地址', data=jsonData,
headers=headers, timeout=10)
result = response.json()
if str(result['rawResponse']['hits']['hits'][0]['_source']['ex_detail']) == '' or str(
result['rawResponse']['hits']['hits'][0]['_source']['ex_detail']) is None:
excel_table.cell(row, 1, str(result['rawResponse']['hits']['hits'][0]['_source']['message']))
excel_table.cell(row, 2, str(result['rawResponse']['hits']['hits'][0]['_source']['trace']))
# 处理uri
uri = str(result['rawResponse']['hits']['hits'][0]['_source']['uri'])
# uri = re.sub(r'\s+', "", uri).replace("http://", "")
# uri = re.sub(r'\d', "", uri).replace("GET", "")
# uri = re.sub(r'\.(.*):', "", uri).replace("POST", "")
uri.replace("POST", "").replace("GET", "")
if uri == '' or uri is None or uri == 'null':
uri = '无uri地址'
excel_table.cell(row, 3, uri)
excel_table.cell(row, 4, str(result['rawResponse']['hits']['hits'][0]['_source']['service']))
clientVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['clientVersion'])
if clientVersion == '' or clientVersion is None or clientVersion == 'null':
clientVersion = '无clientVersion'
excel_table.cell(row, 5, clientVersion)
loginType = str(result['rawResponse']['hits']['hits'][0]['_source']['loginType'])
if loginType == '' or loginType is None or loginType == 'null':
loginType = '无loginType'
excel_table.cell(row, 6, loginType)
serverVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['serverVersion'])
if serverVersion == '' or serverVersion is None or serverVersion == 'null':
serverVersion = '无serverVersion'
excel_table.cell(row, 7, serverVersion)
else:
excel_table.cell(row, 1, str(result['rawResponse']['hits']['hits'][0]['_source']['ex_detail']))
excel_table.cell(row, 2, str(result['rawResponse']['hits']['hits'][0]['_source']['trace']))
# 处理uri
uri = str(result['rawResponse']['hits']['hits'][0]['_source']['uri'])
# uri = re.sub(r'\s+', "", uri).replace("http://", "")
# uri = re.sub(r'\d', "", uri).replace("GET", "")
# uri = re.sub(r'\.(.*):', "", uri).replace("POST", "")
uri.replace("POST", "").replace("GET", "")
if uri == '' or uri is None or uri == 'null':
uri = '无uri地址'
excel_table.cell(row, 3, uri)
excel_table.cell(row, 4, str(result['rawResponse']['hits']['hits'][0]['_source']['service']))
clientVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['clientVersion'])
if clientVersion == '' or clientVersion is None or clientVersion == 'null':
clientVersion = '无clientVersion'
excel_table.cell(row, 5, clientVersion)
loginType = str(result['rawResponse']['hits']['hits'][0]['_source']['loginType'])
if loginType == '' or loginType is None or loginType == 'null':
loginType = '无loginType'
excel_table.cell(row, 6, loginType)
serverVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['serverVersion'])
if serverVersion == '' or serverVersion is None or serverVersion == 'null':
serverVersion = '无serverVersion'
excel_table.cell(row, 7, serverVersion)