splunk pyhton数据接口

最新推荐文章于 2024-08-22 09:32:44 发布

Jepson2017

最新推荐文章于 2024-08-22 09:32:44 发布

阅读量2.6k

点赞数

分类专栏： splunk 文章标签： splunk数据接口

本文链接：https://blog.csdn.net/d1240673769/article/details/80424161

版权

splunk 专栏收录该内容

7 篇文章

订阅专栏

本文实现利用python splunklib包获取splunk数据

参考官方教程： http://dev.splunk.com/view/python-sdk/SP-CAAAEE5

1.通过接口，执行查询语句，并将获取到的查询结果保存在csv中

import splunklib.client as client
import splunklib.results as results

#定义连接信息
HOST="127.0.0.1"#splunk服务器地址
PORT=8089#端口
USERNAME="admin"#登录名
PASSWORD="passwd"#密码


#设置查询语句
search_query='|tstats count from datamodel=xxxx by domain |sort 10 -count'

#定义查询起始日期和结束日期
start_time="2018-05-01"
end_time="20018-05-31"

def main():
    #连接splunk
    service = client.connect(host=HOST,
                             port=PORT,
                             username=USERNAME, 
                             password=PASSWORD,
                             app='search')
    assert isinstance(service, client.Service)
    
    #查询参数
    search_kwargs={
                   'earliest_time':start_time+'T00:00:00.000+08:00',
                   'latest_time':end_time+'T00:00:00.000+08:00'
                   }

    jobs=service.jobs
    #执行查询语句
    print("正在查询...")
    job=jobs.oneshot(search_query,**search_kwargs)
    #将结果写入本地domain.csv中
    with open('domain.csv','w+',encoding='utf-8') as fh:
        fh.write("domain,count\n")
        for result in results.ResultsReader(job):
            fh.write(result['domain']+","+result['count']+"\n")
    print("数据已保存到本地...")

main()

2.保存到dict中

import splunklib.client as client
import splunklib.results as results


#定义连接信息
HOST="localhost"#splunk服务器地址
PORT=8089#端口
USERNAME="admin"#登录名
PASSWORD="admin@1!"#密码

#定义查询语句
search_spl="index=log earliest=0 |stats count by visitorid,referer_type,campain |sort 20 -count"


def get_dict_data(search_spl):
    #配置连接
    service = client.connect(
        host=HOST,
        port=PORT,
        username=USERNAME,
        password=PASSWORD
        )
    #执行查询语句
    rr = results.ResultsReader(service.jobs.export("search "+search_spl))
    for result in rr:
        if isinstance(result,results.Message):
            print '%s: %s' %(result.type,result.message)
        elif isinstance(result, dict):
            #定义一个字典,将每条数据存入字典中
            res_data={}
            for key in result.keys():
                res_data[key]=result[key]
            print(res_data)
    assert rr.is_preview == False


if __name__=='__main__':
    get_dict_data(search_spl)

3.在splunk中配置任务计划，通过接口获取任务计划执行后的结果

import time,re
import splunklib.client as client
import splunklib.results as results

#配置splunk连接信息
HOST = "localhost"
PORT =8089
USERNAME = "admin"
PASSWORD = "passwd"

#定义splunk中计划任务执行的时间间隔（分钟）
cron_gap_min=5

#通过SDK连接到splunk上
service = client.connect(
    host=HOST,
    port=PORT,
    username=USERNAME,
    password=PASSWORD,
    app="search")

#定义需要查找的任务名
match_string="test"

#获取splunk中所有的任务
jobs = service.jobs
#当前程序执行的时间
now_time=int(round(time.time()))

#将数据导出为csv文本
def get_csv_data():
    for job in jobs:
        #使用正则寻找所有此定时任务产生的任务（活动-任务中能看到的任务）
        if re.search(match_string,job.sid):
            #splunk中任务执行的时间
            run_time=re.findall("\d{10}",job.sid)[0]
            run_time=int(run_time)
            #通过对时间的比较找到最近一次运行
            if now_time-run_time < cron_gap_min*60:
                #等到任务完成
                while not job.is_done():
                    time.sleep(1)
                rr = results.ResultsReader(job.results())
                with open('result.csv','w+') as fh:
                    for result in rr:
                        key=result.keys()
                        n=len(key)
                        for i in range(n-1):
                            fh.write(result[key[i]]+",")
                        fh.write(result[key[n-1]]+"\n")
    print("任务完成...")

#将数据转化为字典形式
def get_dict_data():
    for job in jobs:
        #使用正则寻找所有此定时任务产生的任务（活动-任务中能看到的任务）
        if re.search(match_string,job.sid):
            #splunk中任务执行的时间
            run_time=re.findall("\d{10}",job.sid)[0]
            run_time=int(run_time)
            #通过对时间的比较找到最近一次运行
            if now_time-run_time < cron_gap_min*60:
                #等到任务完成
                while not job.is_done():
                    time.sleep(1)
                rr = results.ResultsReader(job.results())
                for result in rr:
                    #定义一个字典,将每条数据存入字典中
                    res_data={}
                    for key in result.keys():
                        res_data[key]=result[key]
                    return res_data
    print("任务完成...")

if __name__=='__main__':
    data=get_dict_data()

curl api

1.通过调用保存的报表获取数据

curl -k -u admin:passwd https://localhost:8089/servicesNS/admin/search/search/jobs/export?output_mode=json -d "search=|savedsearch searchname"
#注：searchname为splunk中报表名字

2.下载数据

#下载保存的报表数据
curl -k -u admin:passwd https://localhost:8089/servicesNS/admin/search/search/jobs/export -d "search=|savedsearch searchname" -d output_mode=csv -o /tmp/log.csv
#注：output_mode可以是json格式

#下载index查询结果
download_log="curl -k -u admin:passwd https://localhost:8089/servicesNS/admin/search/search/jobs/export -d search=search+index%3dri_history+%7chead+10%7ctable+_time%2csrc_ip%2cdst_ip -d output_mode=csv -o /tmp/log.csv"

#下载加速语句查询结果
download_log="curl -k -u admin:passwd https://localhost:8089/servicesNS/admin/search/search/jobs/export -d search=tstats+count+from+datamodel%3dRI_HIS.DS+by+_time%2cDS.src_ip%2cDS.dst_ip+%7chead+10 -d output_mode=csv -o /tmp/log.csv"