Python脚本之获取Splunk数据保存到本地文件

最新推荐文章于 2024-07-17 18:34:44 发布

大Null

最新推荐文章于 2024-07-17 18:34:44 发布

阅读量384

点赞数 1

分类专栏： Python 文章标签： python splunk

本文链接：https://blog.csdn.net/weixin_44758876/article/details/131085198

版权

Python 专栏收录该内容

22 篇文章 3 订阅

订阅专栏

该Python脚本用于从Splunk服务中按月、日、小时分批导出数据，防止因数据量大导致超时。它使用SplunkSDK连接到服务器，设置查询，然后将结果处理并保存到本地文件系统。脚本支持自定义起止时间、时间粒度和文件保存路径。

摘要由CSDN通过智能技术生成

原文地址：https://program-park.top/2023/06/07/python_19/

需求如题，获取 Splunk 数据，防止数据量过大导致导出数据时超时，按照月、日、时分批次导出保存到本地文件：

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
from datetime import datetime, timedelta
import sys,io
import splunklib.client as client
import splunklib.results as results
import os

# _create_unverified_https_context = ssl._create_unverified_context
# ssl._create_default_https_context = _create_unverified_https_context
reload(sys)
sys.setdefaultencoding('utf8')

# splunk客户端
# option:数据分类标识
class ConnectPhoenix:
    def __init__(self,option):
        self.HOST = "192.6.66.6"
        self.PORT = 8089
        self.USERNAME = "admin"
        self.PASSWORD = "666666"
        self.option = option

    def phoenixService(self):
        phoenix_service = client.connect(
            host=self.HOST,
            port=self.PORT,
            username=self.USERNAME,
            password=self.PASSWORD,
            verify=False,
            app="search")
        return phoenix_service

    # 获取查询SPL
    def get_query(self):
        if self.option == 'original':
            return 'search index=* | table _time,_raw'

    # 获取查询结果
    # period:起始时间
    # delay:终止时间
    def get_results(self, period, delay):
        period = period.strftime('%Y-%m-%dT%H:%M:%S')
        delay = delay.strftime('%Y-%m-%dT%H:%M:%S')
        query = self.get_query()
        kwargs = {'earliest_time': period, 'latest_time': delay}
        phoenix_service = self.phoenixService()
        phoenix_jobs = phoenix_service.jobs
        job = phoenix_jobs.export(query, **kwargs)
        query_results = results.ResultsReader(io.BufferedReader(job))
        return query_results

# 针对查询结果做ETL
# log:查询结果
# option:数据分类标识
class FormatLog:
    def __init__(self,log,option):
        self.log = log
        self.option = option

    def format_log(self):
        if self.option == 'original':
            logdir = self.log['_raw']

        return str(self.log)


# 保存数据到本地文件，按 月、日、时 分批
# option：任务类型
# period：查询起始时间
# delay：查询终止时间
# date_type：文件保存方式，月、日、时
# file_path：文件保存路径
class Savefile:
    def __init__(self, option, period, delay, date_type, file_path):
        self.option = option
        self.period = period
        self.delay = delay
        self.date_type = date_type
        self.file_path = file_path

    # 保存数据到本地
    # time_mark：时间标记，记录本批次查询的起始时间，同时也是文件名
    def save_file(self, time_mark):
        phoenix_server = ConnectPhoenix(self.option)
        query_results = phoenix_server.get_results(time_mark, self.delay)
        if self.date_type == 'day':
            path = self.file_path + '/' + time_mark.strftime('%Y-%m')
            if not os.path.exists(path):
                os.makedirs(path)
            f = io.open(path + '/' + time_mark.strftime('%Y-%m-%d') + '.log', 'a+', encoding='utf-8')
            for result in query_results:
                if isinstance(result, results.Message):
                    pass
                else:
                    formatLog = FormatLog(result, self.option)
                    logdic = formatLog.format_log()
                    f.write(logdic.decode('utf-8') + '\n')
            f.close()
        elif self.date_type == 'month':
            path = self.file_path + '/' + time_mark.strftime('%Y')
            if not os.path.exists(path):
                os.makedirs(path)
            f = io.open(path + '/' + time_mark.strftime('%Y-%m') + '.log', 'a+', encoding='utf-8')
            for result in query_results:
                if isinstance(result, results.Message):
                    pass
                else:
                    formatLog = FormatLog(result, self.option)
                    logdic = formatLog.format_log()
                    f.write(logdic.decode('utf-8') + '\n')
            f.close()
        elif self.date_type == 'hour':
            path = self.file_path + '/' + time_mark.strftime('%Y-%m-%d')
            if not os.path.exists(path):
                os.makedirs(path)
            f = io.open(path + '/' + time_mark.strftime('%Y-%m-%d %H%M%S') + '.log', 'a+', encoding='utf-8')
            for result in query_results:
                if isinstance(result, results.Message):
                    pass
                else:
                    formatLog = FormatLog(result, self.option)
                    logdic = formatLog.format_log()
                    f.write(logdic.decode('utf-8') + '\n')
            f.close()

    # 对日期进行分割，按时间分批次查询数据保存到本地
    def segment_date(self):
        self.period = datetime.strptime(self.period, '%Y-%m-%dT%H:%M:%S')
        self.delay = datetime.strptime(self.delay, '%Y-%m-%dT%H:%M:%S')
        if self.date_type == 'day':
            time_mark = self.delay.replace(hour=0, minute=0, second=0)
            while True:
                if time_mark - self.period < timedelta(days=0):
                    self.save_file(self.period)
                    print(str(self.period) + '|' + str(self.delay))
                    break
                else:
                    self.save_file(time_mark)
                    print(str(time_mark) + '|' + str(self.delay))
                    self.delay = time_mark
                    time_mark = time_mark - timedelta(days=1)
        elif self.date_type == 'month':
            time_mark = self.delay.replace(day=1, hour=0, minute=0, second=0)
            while True:
                if time_mark - self.period < timedelta(days=0):
                    self.save_file(self.period)
                    print(str(self.period) + '|' + str(self.delay))
                    break
                else:
                    self.save_file(time_mark)
                    print(str(time_mark) + '|' + str(self.delay))
                    self.delay = time_mark
                    time_mark = (time_mark - timedelta(days=1)).replace(day=1, hour=0, minute=0, second=0)
        elif self.date_type == 'hour':
            time_mark = self.delay.replace(minute=0, second=0)
            while True:
                if time_mark - self.period < timedelta(hours=0):
                    self.save_file(self.period)
                    print(str(self.period) + '|' + str(self.delay))
                    break
                else:
                    self.save_file(time_mark)
                    print(str(time_mark) + '|' + str(self.delay))
                    self.delay = time_mark
                    time_mark = time_mark - timedelta(hours=1)


if __name__=='__main__':
    # get args from cron cmd
    option = sys.argv[1] if len(sys.argv) > 1 else 'original' # 任务名，默认original
    period = sys.argv[2] if len(sys.argv) > 2 else '2023-06-01T12:00:00' # 查询起始时间，格式: 2023-06-01T12:00:00
    delay = sys.argv[3] if len(sys.argv) > 3 else '2023-06-01T13:00:00' # 查询终止时间，格式: 2023-06-01T13:00:00
    date_type = sys.argv[4] if len(sys.argv) > 4 else 'day' # 目的地址，默认day,可选：month、day、hour
    file_path = int(sys.argv[5]) if len(sys.argv) > 5 else '/data' # 日志存放目录，默认/data

    savefile = Savefile(option, period, delay, date_type, file_path)
    savefile.segment_date()