通过python实现对es数据的聚合统计最后将数值写入excel文件
#!/usr/bin/python3
#-*- coding: utf-8 -*-
from elasticsearch import Elasticsearch
from openpyxl import load_workbook
es = Elasticsearch(hosts='http://localhost', port=9200, http_auth=('admin', 'admin'), timeout=60)
#es聚合查询数据
para = {
"aggs": {
"date": {
"date_histogram": {
"field" : "timestamp",
"interval" : "month",
"format" : "yyyy-MM-dd"
},
"aggs": {
"ip":{
"terms": {
"field": "ip",
"size":0
},
"aggs": {
"aborad":{
"terms": {
"field": "geoip.country_code",
"size":0
}
}
}
}
}
}
}
}
###分页读取数据
queryData = es.search(index="risk_threaten*", doc_type="doc",scroll ='2m', body=para,size =100)
data = queryData['aggregations']['date']['buckets']
print(data)
sid = queryData['_scroll_id']
scroll_size = queryData['hits']['total']
###时间列表
date_list = []
ip_list = []
queryData = es.scroll(scroll_id=sid, scroll='2m')
#Update the scroll ID
sid = queryData['_scroll_id']
scroll_size = queryData['hits']['total']
wt = load_workbook("D:\\python\\es_risk\\threaten_month.xlsx")
sheet = wt.active
#sheet.cell(row=1, column=1, value="datetime")
#sheet.cell(row=1, column=2, value="")##读取数据
sheet.cell(row=1, column=5, value="ip_abroad_total")
row = 1
for ip_data in data:
date = ip_data['key_as_string']
print(date)
total = ip_data['doc_count']
row += 1
ip_list=[]
# sheet.cell(row=row, column=1, value=date); ###ip后面跟着城市
for list_ip in ip_data['ip']['buckets']:
dst_ip =list_ip['key']
abroad = list_ip['aborad']
for region in abroad['buckets']:
if region['key'] != 'CN':
ip_list.append(dst_ip)
s = 0
for ip in ip_list:
s+=1
print(s)
sheet.cell(row=row, column=5, value=s);
print(ip_list)
##保存文件
wt.save("D:\\python\\es_risk\\threaten_month.xlsx")