python3从ES统计域名uri访问次数

12 篇文章 0 订阅
10 篇文章 0 订阅
本文介绍如何使用Python脚本从ELK(Elasticsearch, Logstash, Kibana)获取特定域名的日志uri,并通过对比筛选出未访问的uri,最后将结果整理成Excel并发送邮件通知。
摘要由CSDN通过智能技术生成

数据从ELK拉取,因为ELK满足不了需求,写个脚本处理

from elasticsearch import Elasticsearch
import sys
from collections import Counter
import time
import xlsxwriter
import openpyxl
import smtplib
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication 
import datetime

yesterday = (datetime.datetime.now() + datetime.timedelta(days=-1)).strftime('%Y.%m.%d')
index='xx.logs-' + yesterday

def get_uri(host,index):
        start =time.clock()
        #初始化链接
        es = Elasticsearch([{'host':'172.20.xx.xx','port':9200},{'host':'172.20.xx.xx','port':9200},{'host':'172.20.xx.xx','port':9200}],timeout=180)

        indexes = list(es.indices.get_alias().keys())
        if index not in indexes:
            print ("ERROR:未找到index:" + index)
            exit(22)
        #定义DSL请求体
        query_json={
          "query": {
            "bool": {
              "must": 
              [
                {
                  "match_all": {}
                },
                {
                "bool": 
                {
                  "should": 
                  [
                   {
                      "match_phrase":
                      {
                      "http_host": {"query": host}
                      }
                    }
                 ]
                }
               }
              ]
            }
          },
          "aggs": {
            "colors": {
              "terms": {
                "field": "crossingid",
                "size": 100000
              }
            }
          }
        }
        res=es.search(index=index, body=query_json,scroll='5m',size=100000)
        results = res['hits']['hits'] # es查询出的结果第一页
        total = res['hits']['total']  # es查询出的结果总量
        scroll_id = res['_scroll_id'] # 游标用于输出es查询出的所有结果


        #从列表results里面取出http_host和uri放到新的列表里面
        logs = []
        for r in results:
            uri = r['_source']['uri']
            logs.append(uri)



        logs = []

        for i in range(0, int(total/100000+1)):
            #从列表results里面取出http_host和uri放到新的列表里面
            for r in results:
                uri = r['_source']['uri']
                logs.append(uri)
            
            # scroll参数必须指定否则会报错
            query_scroll = es.scroll(scroll_id=scroll_id,scroll='5m')['hits']['hits']
            results = query_scroll

        end =time.clock()
        print("已从ES获取域名%s的uri,耗时:%s秒"%(host,end-start))
        return logs


#上面从ES获取nginx日志的数据,下面是对每个项目的所有uri进行比对,选出所有uri访问的次数

def srv_uri(file):
    with open(file) as f:
        uri = []
        for line in f.readlines():
            line = line.strip('\n')
            uri.append(line)
    return uri

以下
hosts = ["xxxx.com","xxxx.com","xxxx.com","xxxx.com"]

uri1 = get_uri(hosts[0],index)
uri2 = get_uri(hosts[1],index)
uri3 = get_uri(hosts[2],index)
uri4 = get_uri(hosts[3],index)
#uri_all = [hosts[0] + i for i in uri1] + [hosts[1] + i for i in uri1] + [hosts[2] + i for i in uri2] + [hosts[3] + i for i in uri3]
uri_all = uri1 + uri2 + uri3 + uri4
#从以下文件中读接口名称
files = ['xxxx','xxxx','xxxx','xxxx','xxxx']

srv1 = srv_uri(files[0])
srv2 = srv_uri(files[1])
srv3 = srv_uri(files[2])
srv4 = srv_uri(files[3])
srv5 = srv_uri(files[4])

#定义比对函数,比对当天被访问uri和全量uri,筛选出当天未被访问的uri
def compare(uri,srv):
    no_uri = []
    for i in srv:
        if (i not in uri):
            no_uri.append(i)
    return no_uri


"""
xxxxx的域名:xxxx.com
xxxxx的域名:xxxxx.com
xxxxx的域名:xxxx.com
xxxxx的域名:xxxxx.com
"""

##根据域名对应关系,使用函数compare作筛选
no_uri1 = compare(uri1,srv1)
no_uri2 = compare(uri2,srv2)
no_uri3 = compare(uri3,srv3)
no_uri4 = compare(uri4,srv4)
no_uri5 = compare(uri4,srv5)
no_uri_all = no_uri1 + no_uri2 + no_uri3 + no_uri4 + no_uri5

#将5个服务uri访问的次数和未访问的uri写入excel5个sheet并发送邮件
def get_excel(uri,no_uri,sheet_name):
    #打开uri.xlsx,如不存在则创建uri.xlsx然后写入数据
    try:
        wb = openpyxl.load_workbook('uri.xlsx')
    except FileNotFoundError:
        ##使用counter函数将uri列表的元素个数作统计
        datas = Counter(uri).most_common()

        ##将no_uri中的元素处理后加入datas
        for i in no_uri:
            datas.append((i,0))

        print(datas)
        f = xlsxwriter.Workbook('uri.xlsx')
        sheet = f.add_worksheet(sheet_name) #创建sheet,sheet_name为sheet名称

        #将数据写入第 i 行,第 j 列
        i = 0
        for data in datas:
            for j in range(len(data)):
                sheet.write(i,j,data[j])
            i = i + 1

        f.close() #保存文件
    else:
        ws = wb.create_sheet(title=sheet_name,index=0)
        datas = Counter(uri).most_common()
        for i in no_uri:
            datas.append((i,0))

        i = 0
        for data in datas:
            for j in range(len(data)):
                ws.cell(row=i+1, column=j+1).value=data[j]
            i = i + 1

        wb.save('uri.xlsx')


#get_excel(uri1,no_uri1,files[0])
#get_excel(uri2,no_uri2,files[1])
#get_excel(uri3,no_uri3,files[2])
#get_excel(uri4,no_uri4,files[3])
#get_excel(uri4,no_uri5,files[4])


###将5个服务uri访问的次数和未访问的uri写入excel一个sheet并发送邮件
def get_excel():
        ##使用counter函数将uri列表的元素个数作统计
        datas = Counter(uri_all).most_common()

        ##将no_uri中的元素处理后加入datas
        for i in no_uri_all:
            datas.append((i,0))

        f = xlsxwriter.Workbook('uri.xlsx')
        #创建sheet,sheet名称为uri
        sheet = f.add_worksheet('uri')

        #将数据写入第 i 行,第 j 列
        i = 0
        for data in datas:
            for j in range(len(data)):
                sheet.write(i,j,data[j])
            i = i + 1

        f.close()


get_excel()


#发送邮件,附件是上面得到的excel表格
def send_mail(excel):
        fromaddr = 'xxxx@xxx.com'
        password = 'xxxx'
        toaddrs = ['xxx@xxxx.com','xxx@xxxx.com']
        content = '附件是昨日接口访问的情况统计,请查收。\n'
        textApart = MIMEText(content,_subtype='html', _charset='utf-8')
    
        excelFile1 = excel
        excelApart1 = MIMEApplication(open(excelFile1, 'rb').read())
        excelApart1.add_header('Content-Disposition', 'attachment', filename=excelFile1)
    
 
        m = MIMEMultipart()
        m.attach(textApart)
        m.attach(excelApart1)
        #邮件内容设置
        #邮件主题
        m['Subject'] = '接口访问统计表'
        #发送方信息
        m['From'] = 'xxx@xxxx.com'
        #接受方信息
        m['To'] = ",".join(toaddrs)
 
        try:
            server = smtplib.SMTP('smtp.xxxx.com')
            server.login(fromaddr,password)
            server.sendmail(fromaddr, toaddrs, m.as_string())
            print('success')
            server.quit()
        except smtplib.SMTPException as e:
            print('error:',e) #打印错误

if __name__ == '__main__':
    send_mail("uri.xlsx")

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值