Discover - Elastic通过trace抓取信息到EXCEL


一、设计UI页面

一些组件需要认识一下,文本框、按钮、进度条等。
在这里插入图片描述

在这里插入图片描述

对于UI这些不明白的参考上篇教程,第一个pyqt5程序

二、UI文件生成PY文件

1.快捷生成py文件

在这里插入图片描述
py文件会自动生成在当前目录下,与ui文件同名
在这里插入图片描述

2.运行一下这个py页面

导入的文件名为你生成的py文件名称
在这里插入图片描述

注意包名

import elk.txtToExcel as txtToExcel
#你们的应该是这样就可以了
import txtToExcel
import sys
import elk.txtToExcel as txtToExcel
from PyQt5.QtWidgets import QApplication,QMainWindow

if __name__ == '__main__':
    app=QApplication(sys.argv)
    mainWindow=QMainWindow()
    ui=txtToExcel.Ui_MainWindow()
    # 向主窗口添加控件
    ui.setupUi(mainWindow)
    mainWindow.show()
    sys.exit(app.exec_())

在这里插入图片描述
如果你的import导入有问题,请修改你项目下代码包的设置
在这里插入图片描述

3.编码

在这之前还要说一下objectName属性,它相当于控件对象的名字。你要操作这个控件,可以使用这个名字来调用对应的方法。
在这里插入图片描述

开整,首先需要把Object改成QWidget。

import re
import tkinter as tk
from tkinter import filedialog
from PyQt5 import QtCore, QtWidgets
from PyQt5.QtWidgets import *
import os
import xlrd
from openpyxl import Workbook as owb
import json
import requests
from datetime import datetime
import time
import datetime
#	这里需要改一下	↓↓↓
class Ui_MainWindow(QWidget):

先拿一个最简单的例子来说,给退出按钮实现关闭的功能。
在刚刚生成好的py文件中,有一个全是初始化你那些控件的方法找到它。
在这里插入图片描述
在里边增加这么一行代码。
clicked点击
connect执行对应的函数
MainWindow.close关闭窗口函数

self.exitBtn.clicked.connect(MainWindow.close)

按照上边的例子,需要你自己实现包括开始和其他按钮的实现逻辑。
我就放在了第二个方法里了。

	def retranslateUi(self, MainWindow):
	   _translate = QtCore.QCoreApplication.translate
	   MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
	   self.actionBtn.setText(_translate("MainWindow", "开始"))
	   self.exitBtn.setText(_translate("MainWindow", "退出"))
	   self.label.setText(_translate("MainWindow", "excel所在路径:"))
	   self.excelBtn.setText(_translate("MainWindow", "浏览"))
	   self.label_2.setText(_translate("MainWindow", "设置保存位置:"))
	   self.saveBtn.setText(_translate("MainWindow", "浏览"))
	   self.label_3.setText(_translate("MainWindow", "当前进度:"))
	   #设置进度条初始为0%
	   self.numBar.setValue(0)
	   #对应按钮对应的函数
	   self.excelBtn.clicked.connect(self.setTargetPath)
	   self.saveBtn.clicked.connect(self.setSavePath)
	   self.actionBtn.clicked.connect(self.start)
	   self.thread = None  # 初始化线程
	# 主函数
	def start(self):
		pass
	# 设置目标文件夹路径
	def setTargetPath(self):
	  # 隐藏控制台
	  root = tk.Tk()
	  root.withdraw()
	  self.targetPath = filedialog.askdirectory()
	  self.excelText.setText(self.targetPath)
	# 查找文件夹下所有的文件
	def findAllFile(self, base):
	   for root, ds, fs in os.walk(base):
	       for f in fs:
	           yield f
	# 设置保存文件夹路径
	def setSavePath(self):
	   # 隐藏控制台
	   root = tk.Tk()
	   root.withdraw()
	   self.savePath = filedialog.askdirectory()
	   self.saveText.setText(self.savePath)

在主函数,也就是start这个函数中,我们需要开一个子线程去进行爬取和写入excel操作。并且实时对进度条进行更。
主线程需要传递给子线程两个参数方法如下:

	# 主函数
	def start(self):
	   # 创建线程
	   self.actionBtn.setEnabled(False)
	   # 执行工作的子线程
	   self.thread = WorkThread(fileList=filenamelist,savePath = self.savePath)
	   # 连接信号
	   self.thread._signal.connect(self.call_backlog)  # 进程连接回传到GUI的事件
	   # 开始线程
	   self.thread.start()

其中的WorkThread是一个具体执行爬虫和写入excel的类,继承QThread。

对应子线程中构造方法中的两个参数,用来主线程和子线程进行传参
WorkThread(fileList=filenamelist,savePath = self.savePath)

以下均为同一个类

from PyQt5.QtCore import *
from PyQt5 import QtCore
import xlrd
from openpyxl import Workbook as owb
import json
import requests
from datetime import datetime
import time
import datetime
class WorkThread(QtCore.QThread):
    #  通过类成员对象定义信号对象
    _signal = pyqtSignal(int)
	#接收主线程传参fileList,savePath
    def __init__(self,fileList,savePath):
        super(WorkThread, self).__init__()
        self.fileList = fileList
        self.savePath = savePath
    def __del__(self):
        self.wait()
    # 具体执行任务的方法
    def run(self):
        for i in self.fileList:
            # 使用open
            excel_file = owb()
            # 在Excel文件里创建一个工作表sheet_name
            excel_table = excel_file.create_sheet('收集结果', 0)
            fileName = str(i).replace('.xls', '').replace('.xlsx', '').split('/')[-1]
            xlrds = xlrd.open_workbook(i)
            # 打开指定的表
            table = xlrds.sheet_by_index(0)
            #开始执行收集的方法
            self.startCollect(table, excel_file, fileName, excel_table, self.savePath)
        # 设置进度条为100%
        self._signal.emit(100)

收集方法,涉及到动态改变进度条self._signal.emit(((item/(rows+1)))*100)。

    def startCollect(self, table, excel_file, fileName, excel_table, folderPath):
        savePath = folderPath + '/提取的trace信息文件夹'
        # 获取总行数
        rows = table.nrows
        excel_table.cell(1, 1, 'message:')
        excel_table.cell(1, 2, 'trice:')
        excel_table.cell(1, 3, 'uri:')
        excel_table.cell(1, 4, 'mode:')
        excel_table.cell(1, 5, 'clientVersion:')
        excel_table.cell(1, 6, 'loginType:')
        excel_table.cell(1, 7, 'serverVersion:')
        index_line = 2
        for item in range(0, rows):
            #动态改变进度条
            self._signal.emit(((item/(rows+1)))*100)
            tid = "TID:" + str(table.cell(item, 0).value)
            #爬虫及写入excel方法
            self.startChild(tid, index_line, excel_table, table)
            index_line += 1
        # 保存文档
        excel_file.save(savePath + '/' + fileName + '完成' + '.xlsx')

具体实现逻辑,你使用Discover - Elastic是会有请求参数,但你按一条traceid进行请求时。每次只会traceid不一样。所以这里就只动态改变了tid和筛选时间范围。若果增加条件只需要自己重新发一次请求加到body里即可。
这里的条件在网页是涨这个样子,怎加了一个错误级别。并且时间倒序。然后后边解析返回的json对象去第一条报错信息。

trace: "TID:4c692741e59a452cbc8b4d5de06b8837.75.16419144765607809" and level : "ERROR"

注意!!!我这个body是不全的,把一些动态的我加上了,涉及到公司一下东西所以没有放上去,需要你们自己先把条件设置好请求一遍在粘贴到body里,然后把需要动态的地方替换掉即可

    def startChild(self, tid, row, excel_table, table):
        body = {
            "params": {
                "ignoreThrottled": True,
                "index": "",
                "body": {
                    "version": True,
                    "size": 500,
                    "sort": [
                        {
                            "@timestamp": {
                                "order": "asc",
                                "unmapped_type": "boolean"
                            }
                        }
                    ],

                    "query": {
                        "bool": {
                            "must": [],
                            "filter": [
                                {
                                    "bool": {
                                        "should": [
                                            {
                                                "match_phrase": {
                                                    "trace": tid
                                                }
                                            }
                                        ],
                                        "minimum_should_match": 1
                                    }
                                },
                                {
                                    "bool": {
                                        "should": [
                                            {
                                                "match_phrase": {
                                                    "level": "ERROR"
                                                }
                                            }
                                        ],
                                        "minimum_should_match": 1
                                    }
                                },
                                {
                                    "range": {
                                        "@timestamp": {
                                            "gte": datetime.datetime.utcfromtimestamp(time.mktime((
                                                                                                          datetime.datetime.now() - datetime.timedelta(
                                                                                                      days=20)).timetuple())).strftime(
                                                "%Y-%m-%dT%H:%M:%S.%fZ"),
                                            "lte": datetime.datetime.utcfromtimestamp(time.mktime((
                                                                                                          datetime.datetime.now() + datetime.timedelta(
                                                                                                      days=1)).timetuple())).strftime(
                                                "%Y-%m-%dT%H:%M:%S.%fZ"),
                                            "format": "strict_date_optional_time"
                                        }
                                    }
                                }
                            ],
                            "should": [],
                            "must_not": []
                        }
                    }
                }
                ...
            }

        }
        jsonData = json.dumps(body)
        headers = {
           #需要你自己填写请求头
        }
        response = requests.post(url='你们的请求url地址', data=jsonData,
                                 headers=headers, timeout=10)
        response.encoding = 'utf-8'
        result = response.json()
        try:
            if str(result['rawResponse']['hits']['hits'][0]['_source']['ex_detail']) == '' or str(
                    result['rawResponse']['hits']['hits'][0]['_source']['ex_detail']) is None:
                excel_table.cell(row, 1, str(result['rawResponse']['hits']['hits'][0]['_source']['message']))
                excel_table.cell(row, 2, str(result['rawResponse']['hits']['hits'][0]['_source']['trace']))
                # 处理uri
                uri = str(result['rawResponse']['hits']['hits'][0]['_source']['uri'])
                uri.replace("POST", "").replace("GET", "")
                # uri = re.sub(r'\s+', "", uri).replace("http://", "")
                # uri = re.sub(r'\d', "", uri).replace("GET", "")
                # uri = re.sub(r'\.(.*):', "", uri).replace("POST", "")
                if uri == '' or uri is None or uri == 'null':
                    uri = '无uri地址'
                excel_table.cell(row, 3, uri)

                excel_table.cell(row, 4, str(result['rawResponse']['hits']['hits'][0]['_source']['service']))
                clientVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['clientVersion'])
                if clientVersion == '' or clientVersion is None or clientVersion == 'null':
                    clientVersion = '无clientVersion'
                excel_table.cell(row, 5, clientVersion)
                loginType = str(result['rawResponse']['hits']['hits'][0]['_source']['loginType'])
                if loginType == '' or loginType is None or loginType == 'null':
                    loginType = '无loginType'
                excel_table.cell(row, 6, loginType)
                serverVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['serverVersion'])
                if serverVersion == '' or serverVersion is None or serverVersion == 'null':
                    serverVersion = '无serverVersion'
                excel_table.cell(row, 7, serverVersion)
            else:
                excel_table.cell(row, 1, str(result['rawResponse']['hits']['hits'][0]['_source']['ex_detail']))
                excel_table.cell(row, 2, str(result['rawResponse']['hits']['hits'][0]['_source']['trace']))
                # 处理uri
                uri = str(result['rawResponse']['hits']['hits'][0]['_source']['uri'])
                uri.replace("POST", "").replace("GET", "")
                # uri = re.sub(r'\s+', "", uri).replace("http://", "")
                # uri = re.sub(r'\d', "", uri).replace("GET", "")
                # uri = re.sub(r'\.(.*):', "", uri).replace("POST", "")
                if uri == '' or uri is None or uri == 'null':
                    uri = '无uri地址'
                excel_table.cell(row, 3, uri)

                excel_table.cell(row, 4, str(result['rawResponse']['hits']['hits'][0]['_source']['service']))
                clientVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['clientVersion'])
                if clientVersion == '' or clientVersion is None or clientVersion == 'null':
                    clientVersion = '无clientVersion'
                excel_table.cell(row, 5, clientVersion)
                loginType = str(result['rawResponse']['hits']['hits'][0]['_source']['loginType'])
                if loginType == '' or loginType is None or loginType == 'null':
                    loginType = '无loginType'
                excel_table.cell(row, 6, loginType)
                serverVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['serverVersion'])
                if serverVersion == '' or serverVersion is None or serverVersion == 'null':
                    serverVersion = '无serverVersion'
                excel_table.cell(row, 7, serverVersion)
        except Exception as e:
            flage = True
            for con in range(0, 4):
                try:
                    self.errorDel(result['id'], row, excel_table)
                    flage = True
                    break
                except Exception as e:
                    flage = False
                    # print(e)
                    continue
            if not flage:
                print('4次请求都失败了')
                excel_table.cell(row, 2, str(table.cell(row, 0).value))
                excel_table.cell(row, 1, 'tid获取信息异常')
	#如果获取失败进行二次获取的方法
    def errorDel(self, id, row, excel_table):
        body = {
            "id": str(id)
        }
        jsonData = json.dumps(body)
        headers = {
            #请求头信息
        }
        response = requests.post(url='请求地址', data=jsonData,
                                 headers=headers, timeout=10)
        result = response.json()
        if str(result['rawResponse']['hits']['hits'][0]['_source']['ex_detail']) == '' or str(
                result['rawResponse']['hits']['hits'][0]['_source']['ex_detail']) is None:
            excel_table.cell(row, 1, str(result['rawResponse']['hits']['hits'][0]['_source']['message']))
            excel_table.cell(row, 2, str(result['rawResponse']['hits']['hits'][0]['_source']['trace']))
            # 处理uri
            uri = str(result['rawResponse']['hits']['hits'][0]['_source']['uri'])
            # uri = re.sub(r'\s+', "", uri).replace("http://", "")
            # uri = re.sub(r'\d', "", uri).replace("GET", "")
            # uri = re.sub(r'\.(.*):', "", uri).replace("POST", "")
            uri.replace("POST", "").replace("GET", "")
            if uri == '' or uri is None or uri == 'null':
                uri = '无uri地址'
            excel_table.cell(row, 3, uri)

            excel_table.cell(row, 4, str(result['rawResponse']['hits']['hits'][0]['_source']['service']))
            clientVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['clientVersion'])
            if clientVersion == '' or clientVersion is None or clientVersion == 'null':
                clientVersion = '无clientVersion'
            excel_table.cell(row, 5, clientVersion)
            loginType = str(result['rawResponse']['hits']['hits'][0]['_source']['loginType'])
            if loginType == '' or loginType is None or loginType == 'null':
                loginType = '无loginType'
            excel_table.cell(row, 6, loginType)
            serverVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['serverVersion'])
            if serverVersion == '' or serverVersion is None or serverVersion == 'null':
                serverVersion = '无serverVersion'
            excel_table.cell(row, 7, serverVersion)
        else:
            excel_table.cell(row, 1, str(result['rawResponse']['hits']['hits'][0]['_source']['ex_detail']))
            excel_table.cell(row, 2, str(result['rawResponse']['hits']['hits'][0]['_source']['trace']))
            # 处理uri
            uri = str(result['rawResponse']['hits']['hits'][0]['_source']['uri'])
            # uri = re.sub(r'\s+', "", uri).replace("http://", "")
            # uri = re.sub(r'\d', "", uri).replace("GET", "")
            # uri = re.sub(r'\.(.*):', "", uri).replace("POST", "")
            uri.replace("POST", "").replace("GET", "")
            if uri == '' or uri is None or uri == 'null':
                uri = '无uri地址'
            excel_table.cell(row, 3, uri)

            excel_table.cell(row, 4, str(result['rawResponse']['hits']['hits'][0]['_source']['service']))
            clientVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['clientVersion'])
            if clientVersion == '' or clientVersion is None or clientVersion == 'null':
                clientVersion = '无clientVersion'
            excel_table.cell(row, 5, clientVersion)
            loginType = str(result['rawResponse']['hits']['hits'][0]['_source']['loginType'])
            if loginType == '' or loginType is None or loginType == 'null':
                loginType = '无loginType'
            excel_table.cell(row, 6, loginType)
            serverVersion = str(result['rawResponse']['hits']['hits'][0]['_source']['serverVersion'])
            if serverVersion == '' or serverVersion is None or serverVersion == 'null':
                serverVersion = '无serverVersion'
            excel_table.cell(row, 7, serverVersion)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

叫我柒月

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值