Python脚本之获取Splunk数据发送到Kafka

该文章介绍了一个Python脚本,用于从Splunk检索前一天的数据,并将这些数据转发到Kafka中间件。脚本包括日志处理类、连接Splunk的类以及数据格式化和发送到Kafka的逻辑。主要涉及日志记录、SplunkSDK、KafkaProducer的使用。
摘要由CSDN通过智能技术生成

原文地址:https://www.program-park.top/2023/05/09/python_17/

  需求如题,要依靠 Python 脚本转发 Splunk 数据到第三方目的地(可以是 Socket 或者 Kafka 中间件等等,这里就只放 Kafka 的部分),以获取前一天的数据为例:

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
import sys,io,os,logging,datetime
import splunklib.client as client
import splunklib.results as results
from logging import handlers
from kafka import KafkaProducer

reload(sys)
sys.setdefaultencoding('utf8')

# log日志,存放路径:/logs
class Logger:
    level_relations = {
            'debug':logging.DEBUG,
            'info':logging.INFO,
            'warning':logging.WARNING,
            'error':logging.ERROR,
            'crit':logging.CRITICAL
        }

    def __init__(self,option,level='info',when='W0',backCount=8):
        logfile = "/logs/{}.log".format(option)
        if not os.path.exists('/logs'):
            os.makedirs("/logs")
        self.logger = logging.getLogger(logfile)
        fmt='%(asctime)s - %(levelname)s: %(message)s'
        format_str = logging.Formatter(fmt)
        self.logger.setLevel(self.level_relations.get(level))
        self.logger.handlers=[]
        #stream_handler = logging.StreamHandler()
        #stream_handler.setFormatter(format_str)
        #self.logger.addHandler(stream_handler)
        file_handler = handlers.TimedRotatingFileHandler(filename=logfile,when=when,backupCount=backCount,encoding='utf-8')
        file_handler.setFormatter(format_str)
        self.logger.addHandler(file_handler)


# splunk客户端
# option:数据分类标识
class ConnectPhoenix:
    def __init__(self,option):
        self.HOST = "127.0.0.1"
        self.PORT = 8089
        self.USERNAME = "admin"
        self.PASSWORD = "passwd"
        self.option = option

    def phoenixService(self):
        phoenix_service = client.connect(
            host=self.HOST,
            port=self.PORT,
            username=self.USERNAME,
            password=self.PASSWORD,
            verify=False,
            app="search")
        return phoenix_service

    # 获取查询SPL
    def get_query(self):
        if self.option == 'test1':
            return 'search index=*'

    # 获取查询结果
    # period:起始时间
    # delay:终止时间
    def get_results(self, period, delay):
        query = self.get_query()
        kwargs = {'earliest_time': '-%dd@d' % (int(period) + int(delay)), 'latest_time':'-%dd@d' % int(delay)}
        phoenix_service = self.phoenixService()
        phoenix_jobs = phoenix_service.jobs
        job = phoenix_jobs.export(query, **kwargs)
        query_results = results.ResultsReader(io.BufferedReader(job))
        return query_results


# 针对查询结果做ETL
# log:查询结果
# option:数据分类标识
class FormatLog:
    def __init__(self,log,option):
        self.log = log
        self.option = option

    def format_log(self):
        if self.option == 'test1':
            # 此处可针对业务对数据做特殊处理
            pass

        return self.log


if __name__=='__main__':
    # get args from cron cmd
    option = sys.argv[1] if len(sys.argv) > 1 else 'test1' # 任务名,默认test1
    period = sys.argv[2] if len(sys.argv) > 2 else 1 # 查询起始时间,默认前1天0点整
    delay = sys.argv[3] if len(sys.argv) > 3 else 0 # 查询终止时间,默认前1天23:59:59
    # OUTPUTIP = sys.argv[4] if len(sys.argv) > 4 else '192.168.66.6' # 目的地址,默认192.168.66.6
    # OUTPUTPORT = int(sys.argv[5]) if len(sys.argv) > 5 else 9092 # 目标端口,默认9092

    log=Logger(option,level='info')
    phoenix_server = ConnectPhoenix(option)
    query_results = phoenix_server.get_results(period, delay)

    # for result in query_results:
    #     print(result)

    # kafka sink
    def send_to_kafka():
        # kafka地址
        kafka_addr = '192.168.66.6:9092'
        # kafka客户端
        producer = KafkaProducer(bootstrap_servers=kafka_addr)
        # 日志计数
        count_num = 0
        for result in query_results:
            if isinstance(result,results.Message):
                pass
            else:
                formatLog = FormatLog(result, option)
                logdic = formatLog.format_log()
                if logdic != "":
                    try:
                        # 参数一:Topic
                        # 参数二:发送数据
                        # 参数三:分区
                        producer.send('test1', str(logdic), partition=0)
                        count_num += 1
                    except Exception as e:
                        log.logger.info("Error: {}".format(e))

        producer.flush()
        producer.close()
        # log记录
        log.logger.info("forwarder process send {} msg to {}".format(count_num,kafka_addr))

    try:
        send_to_kafka()
    except Exception as e:
        log.logger.info("Error: {}".format(e))
    # send_to_kafka();

参考文献

  【1】https://dev.splunk.com/enterprise/docs/devtools/python/sdk-python/howtousesplunkpython/howtorunsearchespython/
  【2】https://blog.csdn.net/d1240673769/article/details/80424161
  【3】https://blog.csdn.net/ffjl1985/article/details/79773719?spm=1001.2101.3001.6650.6&utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7ERate-6-79773719-blog-80424161.235%5Ev33%5Epc_relevant_increate_t0_download_v2_base&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7ERate-6-79773719-blog-80424161.235%5Ev33%5Epc_relevant_increate_t0_download_v2_base&utm_relevant_index=12

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

大Null

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值