数据从ELK拉取,因为ELK满足不了需求,写个脚本处理
from elasticsearch import Elasticsearch
import sys
from collections import Counter
import time
import xlsxwriter
import openpyxl
import smtplib
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
import datetime
yesterday = (datetime.datetime.now() + datetime.timedelta(days=-1)).strftime('%Y.%m.%d')
index='xx.logs-' + yesterday
def get_uri(host,index):
start =time.clock()
#初始化链接
es = Elasticsearch([{'host':'172.20.xx.xx','port':9200},{'host':'172.20.xx.xx','port':9200},{'host':'172.20.xx.xx','port':9200}],timeout=180)
indexes = list(es.indices.get_alias().keys())
if index not in indexes:
print ("ERROR:未找到index:" + index)
exit(22)
#定义DSL请求体
query_json={
"query": {
"bool": {
"must":
[
{
"match_all": {}
},
{
"bool":
{
"should":
[
{
"match_phrase":
{
"http_host": {"query": host}
}
}
]
}
}
]
}
},
"aggs": {
"colors": {
"terms": {
"field": "crossingid",
"size": 100000
}
}
}
}
res=es.search(index=index, body=query_json,scroll='5m',size=100000)
results = res['hits']['hits'] # es查询出的结果第一页
total = res['hits']['total'] # es查询出的结果总量
scroll_id = res['_scroll_id'] # 游标用于输出es查询出的所有结果
#从列表results里面取出http_host和uri放到新的列表里面
logs = []
for r in results:
uri = r['_source']['uri']
logs.append(uri)
logs = []
for i in range(0, int(total/100000+1)):
#从列表results里面取出http_host和uri放到新的列表里面
for r in results:
uri = r['_source']['uri']
logs.append(uri)
# scroll参数必须指定否则会报错
query_scroll = es.scroll(scroll_id=scroll_id,scroll='5m')['hits']['hits']
results = query_scroll
end =time.clock()
print("已从ES获取域名%s的uri,耗时:%s秒"%(host,end-start))
return logs
#上面从ES获取nginx日志的数据,下面是对每个项目的所有uri进行比对,选出所有uri访问的次数
def srv_uri(file):
with open(file) as f:
uri = []
for line in f.readlines():
line = line.strip('\n')
uri.append(line)
return uri
以下
hosts = ["xxxx.com","xxxx.com","xxxx.com","xxxx.com"]
uri1 = get_uri(hosts[0],index)
uri2 = get_uri(hosts[1],index)
uri3 = get_uri(hosts[2],index)
uri4 = get_uri(hosts[3],index)
#uri_all = [hosts[0] + i for i in uri1] + [hosts[1] + i for i in uri1] + [hosts[2] + i for i in uri2] + [hosts[3] + i for i in uri3]
uri_all = uri1 + uri2 + uri3 + uri4
#从以下文件中读接口名称
files = ['xxxx','xxxx','xxxx','xxxx','xxxx']
srv1 = srv_uri(files[0])
srv2 = srv_uri(files[1])
srv3 = srv_uri(files[2])
srv4 = srv_uri(files[3])
srv5 = srv_uri(files[4])
#定义比对函数,比对当天被访问uri和全量uri,筛选出当天未被访问的uri
def compare(uri,srv):
no_uri = []
for i in srv:
if (i not in uri):
no_uri.append(i)
return no_uri
"""
xxxxx的域名:xxxx.com
xxxxx的域名:xxxxx.com
xxxxx的域名:xxxx.com
xxxxx的域名:xxxxx.com
"""
##根据域名对应关系,使用函数compare作筛选
no_uri1 = compare(uri1,srv1)
no_uri2 = compare(uri2,srv2)
no_uri3 = compare(uri3,srv3)
no_uri4 = compare(uri4,srv4)
no_uri5 = compare(uri4,srv5)
no_uri_all = no_uri1 + no_uri2 + no_uri3 + no_uri4 + no_uri5
#将5个服务uri访问的次数和未访问的uri写入excel5个sheet并发送邮件
def get_excel(uri,no_uri,sheet_name):
#打开uri.xlsx,如不存在则创建uri.xlsx然后写入数据
try:
wb = openpyxl.load_workbook('uri.xlsx')
except FileNotFoundError:
##使用counter函数将uri列表的元素个数作统计
datas = Counter(uri).most_common()
##将no_uri中的元素处理后加入datas
for i in no_uri:
datas.append((i,0))
print(datas)
f = xlsxwriter.Workbook('uri.xlsx')
sheet = f.add_worksheet(sheet_name) #创建sheet,sheet_name为sheet名称
#将数据写入第 i 行,第 j 列
i = 0
for data in datas:
for j in range(len(data)):
sheet.write(i,j,data[j])
i = i + 1
f.close() #保存文件
else:
ws = wb.create_sheet(title=sheet_name,index=0)
datas = Counter(uri).most_common()
for i in no_uri:
datas.append((i,0))
i = 0
for data in datas:
for j in range(len(data)):
ws.cell(row=i+1, column=j+1).value=data[j]
i = i + 1
wb.save('uri.xlsx')
#get_excel(uri1,no_uri1,files[0])
#get_excel(uri2,no_uri2,files[1])
#get_excel(uri3,no_uri3,files[2])
#get_excel(uri4,no_uri4,files[3])
#get_excel(uri4,no_uri5,files[4])
###将5个服务uri访问的次数和未访问的uri写入excel一个sheet并发送邮件
def get_excel():
##使用counter函数将uri列表的元素个数作统计
datas = Counter(uri_all).most_common()
##将no_uri中的元素处理后加入datas
for i in no_uri_all:
datas.append((i,0))
f = xlsxwriter.Workbook('uri.xlsx')
#创建sheet,sheet名称为uri
sheet = f.add_worksheet('uri')
#将数据写入第 i 行,第 j 列
i = 0
for data in datas:
for j in range(len(data)):
sheet.write(i,j,data[j])
i = i + 1
f.close()
get_excel()
#发送邮件,附件是上面得到的excel表格
def send_mail(excel):
fromaddr = 'xxxx@xxx.com'
password = 'xxxx'
toaddrs = ['xxx@xxxx.com','xxx@xxxx.com']
content = '附件是昨日接口访问的情况统计,请查收。\n'
textApart = MIMEText(content,_subtype='html', _charset='utf-8')
excelFile1 = excel
excelApart1 = MIMEApplication(open(excelFile1, 'rb').read())
excelApart1.add_header('Content-Disposition', 'attachment', filename=excelFile1)
m = MIMEMultipart()
m.attach(textApart)
m.attach(excelApart1)
#邮件内容设置
#邮件主题
m['Subject'] = '接口访问统计表'
#发送方信息
m['From'] = 'xxx@xxxx.com'
#接受方信息
m['To'] = ",".join(toaddrs)
try:
server = smtplib.SMTP('smtp.xxxx.com')
server.login(fromaddr,password)
server.sendmail(fromaddr, toaddrs, m.as_string())
print('success')
server.quit()
except smtplib.SMTPException as e:
print('error:',e) #打印错误
if __name__ == '__main__':
send_mail("uri.xlsx")