单进程日志分析

最新推荐文章于 2023-04-17 00:21:51 发布

尹欢一

最新推荐文章于 2023-04-17 00:21:51 发布

阅读量353

点赞数

分类专栏： linux自动化运维文章标签： Python 生成器模式文件操作

本文链接：https://blog.csdn.net/yhy1271927580/article/details/72730785

版权

linux自动化运维专栏收录该内容

16 篇文章 1 订阅

订阅专栏

单进程日志分析

（一） 离线日志分析

# Python write by yhy

# [10/Aug/2016:03:20:40 +0800]
# %d/%b/%Y:%H:%M:%S %z
import datetime
import re
import pprint
import pygal

# 日志读取
def read_log(path):
    with open(path) as f:
        yield from f
# 日志解析为字典，转换时间格式

def count_data(key,value):
    if key not in value.keys():
        value[key] = 0
    value[key] += 1
    return value

p = r'(?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) .*.* \[(?P<time>.*)\] "(?P<method>\w+) (?P<url>.*) (?P<version>[\w|/\.\d]*)" (?P<status>\d{3}) (?P<length>\d+) "(?P<referer>.*)" "(?P<UA>.*)"'
o = re.compile(p)
def parse_log(path):
    for line in read_log(path):
        m = o.search(line)
        if m:
            data = m.groupdict()
            data['time'] = datetime.datetime.strptime(data['time'], '%d/%b/%Y:%H:%M:%S %z').strftime('%Y-%m-%d %H:%M:%S')
            yield data

# 以时间为key重构大字典
def analyse_log(path):
    ret = {}
    def init_data():
        return {
            'ip': {},
            'url': {},
            'UA': {},
            'status': {},
            'throughput': 0
        }
    for item in parse_log(path):
        if item['time'] not in ret.keys():
            ret[item['time']] = init_data()
        data = ret[item['time']]
        for key,value in data.items():
            if key != 'throughput':
                data[key] = count_data(item[key],value)
            data['throughput'] = int(item['length'])
    return ret

# 对大字典进行解析，1.保存大字典，2.渲染出图

def render_log(name,x,y):
    line = pygal.Line()
    line._title = name
    line.x_labels = x
    line.add(name,y)
    line.render_to_file('/Users/yinhuanyi/Desktop/yhy1.svg')




def save_log(ret):
    path = '/Users/yinhuanyi/Desktop/yhy1.log'
    with open(path,'w') as f:
        pprint.pprint(ret,stream=f,indent=4)

if __name__ == '__main__':
    path ='/Users/yinhuanyi/PycharmProjects/Python学习笔记/第一天/access.log-20160811'
    ret = analyse_log(path)
    save_log(ret)
    lst_ret = list(ret.items())
    lst_ret.sort(key=lambda x:x[0])
    table_x = [x[0] for x in lst_ret]
    table_y = [x[1]['throughput'] for x in lst_ret]
    render_log('throughput',table_x,table_y)

（二） 实时日志分析 统计数据保存于influxDB，统计数据展示于grafana

1：通过读取离线日志，实现实时日志滚动效果

# Python write by yhy
import re
import datetime
import threading

# 日志信息读取
def read_log(path):
    with open(path) as f:
        yield from f

# 日志转换字典
p = r'(?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) .*.* \[(?P<time>.*)\] "(?P<method>\w+) (?P<url>.*) (?P<version>[\w|/\.\d]*)" (?P<status>\d{3}) (?P<length>\d+) "(?P<referer>.*)" "(?P<UA>.*)"'
o = re.compile(p)
def parse_log(path):
    for line in read_log(path):
        m = o.search(line.rstrip('\n'))
        if m:
            data = m.groupdict()
            data['time'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            yield data

# 日志写入文件
def data_source(path_src,path_dst,e):
    while not e.is_set():
        for item in parse_log(path_src):
            line = '{ip} - - [{time}] "{method} {url} {version}" {status} {length} "{referer}" "{UA}"\n'.format(**item)
            with open(path_dst,'a') as f:
                f.write(line)
# 主函数入口
if __name__ == '__main__':
    path_src = '/Users/yinhuanyi/PycharmProjects/Python学习笔记/第一天/access.log-20160811'
    path_dst = '/Users/yinhuanyi/PycharmProjects/Python学习笔记/第一天/dataSource1.log'
    e = threading.Event()
    try:
        data_source(path_src,path_dst,e)
    except KeyboardInterrupt:
        e.set()

2：读取实时日志，统计QPS, throughput, error_rate等指标，请求将QPS, throughput, error_rate导入到influxDB, 通过Grafana展示(需要先安装influxdb和grafana-server，并且influxdb需要打开8083端口，可以通过web界面访问influxDB)

# Python write by yhy
import threading
import os
import re
import datetime
import requests

# 读取流数据日志
def read_log(path):
   e = threading.Event()
   offset = 0
   while not e.is_set():
       with open(path) as f:
           if offset > os.stat(path).st_size:
                offset = 0
           f.seek(offset)
           yield from f
           offset = f.tell()

# 将日志转换为字典
p = r'(?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) .*.* \[(?P<time>.*)\] "(?P<method>\w+) (?P<url>.*) (?P<version>[\w|/\.\d]*)" (?P<status>\d{3}) (?P<length>\d+) "(?P<referer>.*)" "(?P<UA>.*)"'
o = re.compile(p)
def parse_log(path):
    for line in read_log(path):
        m = o.search(line.rstrip('\n'))
        if m:
            data = m.groupdict()
            yield data
# 将汇聚的QPS , throughput ，error_rate数据存储在influxdb上
def send(qps,throughput,error_rate):
    line = 'yhyAnalyse qps={},throughput={},error_rate={}'.format(qps,throughput,error_rate)
    requests.post('http://192.168.23.41:8086/write', data=line, params={'db':'monitor'})

# 汇聚 QPS , throughput ，error_rate
def aggregate(path,interval = 10):
    qps = 0
    throughput = 0
    error = 0
    start = datetime.datetime.now()
    for item in parse_log(path):
        qps += 1
        throughput = int(item['length'])
        if int(item['status']) > 300:
            error += 1
        current = datetime.datetime.now()
        if (current - start).total_seconds() > interval:
            error_rate = error / qps
            send(qps,throughput,error_rate)
            qps = 0
            throughput = 0
            error = 0
            start = current
# 主函数入口
if __name__ == '__main__':
    path = '/Users/yinhuanyi/PycharmProjects/Python学习笔记/第一天/dataSource1.log'
    aggregate(path)

3：效果展示

尹欢一

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
单进程日志分析

单进程日志分析（一）离线日志分析 # Python write by yhy# [10/Aug/2016:03:20:40 +0800]# %d/%b/%Y:%H:%M:%S %zimport datetimeimport reimport pprintimport pygal# 日志读取def read_log(path): with open(path) as f:
复制链接

扫一扫