从es统计nginx日志每十五分钟客户端IP访问次数超过2000的IP
#!/user/bin/python
# coding=utf-8
import json
from elasticsearch import Elasticsearch
import time
import datetime
#import requests
import warnings
'''
连接Elasticsearch
'''
es = Elasticsearch(["localhost:9200"])
warnings.filterwarnings("ignore")
'''
搜索十五分钟内的信息
注意:时区为-8小时
'''
# 获取当前时间
t_new = datetime.datetime.now()
# 格式化时间为es时间戳格式
t_new2 = (t_new-datetime.timedelta(hours=8)).strftime('%Y-%m-%d') + "T" + (t_new-datetime.timedelta(hours=8)).strftime('%H:%M:%S') + ".000Z"
# 获取8小时15分钟前时间
t2 = (t_new-datetime.timedelta(hours=8, minutes=15)).strftime('%Y-%m-%d %H:%M:%S')
#转换为秒级时间戳
ts2 = time.mktime(time.strptime(t2, '%Y-%m-%d %H:%M:%S'))
# 格式化时间为es时间戳格式
t3 = (time.strftime('%Y-%m-%d' , time.localtime(ts2)) + "T" + time.strftime('%H:%M:%S', time.localtime(ts2))) + ".000Z"
'''
查询ip归属地
'''
#def checkip(ip):
# r = requests.get('https://ip.taobao.com//outGetIpInfo?ip=%s' %ip)
# if r.json()['code'] == 0:
# i = r.json()['data']
# city = i['city']
# print(city)
# else:
# print('未查到归属地')
# 测试钉钉机器人
from pip._vendor import requests
boturl = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key="
headers = {'Content-Type': 'application/json;charset=utf-8'}
def msg(text):
json_text= {
"msgtype": "text",
"text": {
"content": text
},
}
print(requests.post(boturl, json.dumps(json_text), headers=headers).content)
'''
es查询规则
'''
es_js = {
"aggs": {
"86cde0c2-9312-4698-bc61-1097cb8e2af8": {
"terms": {
"field": "http_x_forwarded_for.keyword",
"order": {
"_count": "desc"
},
"size": 10
}
}
},
"size": 0,
"fields": [
{
"field": "@timestamp",
"format": "date_time"
}
],
"script_fields": {},
"stored_fields": [
"*"
],
"runtime_mappings": {},
"_source": {
"excludes": []
},
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"match_all": {}
},
{
"bool": {
"minimum_should_match": 1,
"should": [
{
"match_phrase": {
"log.file.path.keyword": "/data/nginx/logs/js.access.log"
}
},
]
}
},
{
"range": {
"@timestamp": {
"gte": t3,
"lte": t_new2,
"format": "strict_date_optional_time"
}
}
}
],
"should": [],
"must_not": [
{
"match_phrase": {
"method.keyword": "HEAD"
}
},
{
"match_phrase": {
"http_x_forwarded_for.keyword": "\"-\""
}
}
]
}
}
}
'''
es查询
'''
es_qcban = {
"aggs": {
"86cde0c2-9312-4698-bc61-1097cb8e2af8": {
"terms": {
"field": "http_x_forwarded_for.keyword",
"order": {
"_count": "desc"
},
"size": 10
}
}
},
"size": 0,
"fields": [
{
"field": "@timestamp",
"format": "date_time"
}
],
"script_fields": {},
"stored_fields": [
"*"
],
"runtime_mappings": {},
"_source": {
"excludes": []
},
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"match_all": {}
},
{
"match_phrase": {
"fields.service.keyword": "qcban-nginx"
}
},
{
"range": {
"@timestamp": {
"gte": t3,
"lte": t_new2,
"format": "strict_date_optional_time"
}
}
}
],
"should": [],
"must_not": [
{
"match_phrase": {
"method.keyword": "HEAD"
}
},
{
"match_phrase": {
"http_x_forwarded_for.keyword": "\"-\""
}
}
]
}
}
}
url_list_str = {'qcban.aaa.com': es_qcban,
'js.aaa.com': es_js}
def client_ip_number(body):
"""
因为有多种格式的es查询,所以将此段代码编写为函数
执行查询
index为索引
"""
# 索引后缀为当天日期
to_day = t_new.strftime('%Y.%m.%d')
query = es.search(index="logstash-nginx_log-" + to_day, body=body)
'''
字典取值
列表遍历,取出大于指定值的ip
'''
aaa = query['aggregations']
bbb = aaa['86cde0c2-9312-4698-bc61-1097cb8e2af8']
ccc = bbb['buckets']
# print(ccc)
for value1 in ccc:
dict2 = (value1['key'], value1['doc_count'])
client_number = (dict2[1])
client_ip = (dict2[0])
if client_number > 2000:
msg('时间:' + str(t_new.strftime('%Y-%m-%d %H:%M:%S')) + '\n' +
'域名:' + str(list(url_list_str.keys())[list(url_list_str.values()).index(body)]) + '\n' +
'客户端IP:' + str(client_ip) + '\n' +
'十五分钟内访问次数:' + str(client_number) + '\n')
else:
continue
for i in url_list_str.values():
client_ip_number(i)
continue
结果: