Kibana6应该有这个功能了,Kibana5.3是把datatable转成pdf格式,不满足我的需求,所以写了一个简单的脚本。
import csv, yaml, os
import pandas as pd
import numpy as np
from elasticsearch_dsl.connections import connections
es = connections.create_connection(hosts=['http://es ip:es port/'], timeout=99999)
'''修改集群配置,按照需求处理'''
dsl = {
"transient": {
"script.max_compilations_per_minute": 5000
}
}
es.cluster.put_settings(dsl)
'''bool与exists写到两个query子句中,写在一个里面会报错[terms] malformed query,expect [END_OBJECT]
but found [FILED_NAME] 另本程序是取es的每一条记录中的_source字段的子字段,也可以认为是二级字段,但是构建匹配规则时
可以直接按照一级字段处理'''
'''es的dsl query 可以自己去看看'''
query = {
"query": {
"bool": {
"must": [
{"range": {
"@timestamp": {
"gte": "2018-12-25T15:40:00",
"lte": "2018-12-25T19:00:00"
}
}
}
]
}
},
"query": {
"exists": {
"field": "aaa" # 该字段在拉取的数据中必须存在,不然无法构建DataFrame的数据类型
}
},
"size": 99999 # 拉取的数据条数
}
# 这里的cols变量仅为示例,请根据实际需求调整
cols = ['a1', 'a2', 'a3']
zip_lyst_variable = [lyst_a1, lyst_a2, lyst_a3] = [[], [], []]
df = pd.DataFrame(columns=cols)
csv_dict = {}
if __name__ == '__main__':
ini_datatable = es.search(index="topic*-2018.12.25", body=query)["hits"]["hits"]
for data in ini_datatable:
for lyst_variable, lyst_name in zip(zip_lyst_variable, cols):
if lyst_name not in data['_source'].keys():
print("Current data does not have the {} key!\nData is shown as follows:\n{}".format(lyst_name,
data['_source']))
if lyst_variable.append(data['_source'][lyst_name]):
print("The data's field _source is shown as follows:\n{}".format(data['_source']))
for i, j in zip(cols, zip_lyst_variable):
csv_dict[i] = j
df = pd.DataFrame(csv_dict)
print("df.shape:{}".format(df.shape))
file_name = "topic*-2018.12.25.csv"
if os.path.exists(file_name):
print("{} exists.Will remove it first...".format(file_name))
if not df.to_csv(file_name, index=0):
print("Output the csv: {}".format(file_name))
num_of_lines = len(["" for line in open(file_name, "r")])
print("The line number of {} is {}.".format(file_name, num_of_lines))