单进程日志分析

单进程日志分析

(一) 离线日志分析


# Python write by yhy

# [10/Aug/2016:03:20:40 +0800]
# %d/%b/%Y:%H:%M:%S %z
import datetime
import re
import pprint
import pygal

# 日志读取
def read_log(path):
    with open(path) as f:
        yield from f
# 日志解析为字典,转换时间格式

def count_data(key,value):
    if key not in value.keys():
        value[key] = 0
    value[key] += 1
    return value

p = r'(?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) .*.* \[(?P<time>.*)\] "(?P<method>\w+) (?P<url>.*) (?P<version>[\w|/\.\d]*)" (?P<status>\d{3}) (?P<length>\d+) "(?P<referer>.*)" "(?P<UA>.*)"'
o = re.compile(p)
def parse_log(path):
    for line in read_log(path):
        m = o.search(line)
        if m:
            data = m.groupdict()
            data['time'] = datetime.datetime.strptime(data['time'], '%d/%b/%Y:%H:%M:%S %z').strftime('%Y-%m-%d %H:%M:%S')
            yield data

# 以时间为key重构大字典
def analyse_log(path):
    ret = {}
    def init_data():
        return {
            'ip': {},
            'url': {},
            'UA': {},
            'status': {},
            'throughput': 0
        }
    for item in parse_log(path):
        if item['time'] not in ret.keys():
            ret[item['time']] = init_data()
        data = ret[item['time']]
        for key,value in data.items():
            if key != 'throughput':
                data[key] = count_data(item[key],value)
            data['throughput'] = int(item['length'])
    return ret

# 对大字典进行解析,1.保存大字典,2.渲染出图

def render_log(name,x,y):
    line = pygal.Line()
    line._title = name
    line.x_labels = x
    line.add(name,y)
    line.render_to_file('/Users/yinhuanyi/Desktop/yhy1.svg')




def save_log(ret):
    path = '/Users/yinhuanyi/Desktop/yhy1.log'
    with open(path,'w') as f:
        pprint.pprint(ret,stream=f,indent=4)

if __name__ == '__main__':
    path ='/Users/yinhuanyi/PycharmProjects/Python学习笔记/第一天/access.log-20160811'
    ret = analyse_log(path)
    save_log(ret)
    lst_ret = list(ret.items())
    lst_ret.sort(key=lambda x:x[0])
    table_x = [x[0] for x in lst_ret]
    table_y = [x[1]['throughput'] for x in lst_ret]
    render_log('throughput',table_x,table_y)

(二) 实时日志分析 统计数据保存于influxDB,统计数据展示于grafana


  • 1:通过读取离线日志,实现实时日志滚动效果
# Python write by yhy
import re
import datetime
import threading

# 日志信息读取
def read_log(path):
    with open(path) as f:
        yield from f

# 日志转换字典
p = r'(?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) .*.* \[(?P<time>.*)\] "(?P<method>\w+) (?P<url>.*) (?P<version>[\w|/\.\d]*)" (?P<status>\d{3}) (?P<length>\d+) "(?P<referer>.*)" "(?P<UA>.*)"'
o = re.compile(p)
def parse_log(path):
    for line in read_log(path):
        m = o.search(line.rstrip('\n'))
        if m:
            data = m.groupdict()
            data['time'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            yield data

# 日志写入文件
def data_source(path_src,path_dst,e):
    while not e.is_set():
        for item in parse_log(path_src):
            line = '{ip} - - [{time}] "{method} {url} {version}" {status} {length} "{referer}" "{UA}"\n'.format(**item)
            with open(path_dst,'a') as f:
                f.write(line)
# 主函数入口
if __name__ == '__main__':
    path_src = '/Users/yinhuanyi/PycharmProjects/Python学习笔记/第一天/access.log-20160811'
    path_dst = '/Users/yinhuanyi/PycharmProjects/Python学习笔记/第一天/dataSource1.log'
    e = threading.Event()
    try:
        data_source(path_src,path_dst,e)
    except KeyboardInterrupt:
        e.set()
  • 2:读取实时日志,统计QPS, throughput, error_rate等指标,请求将QPS, throughput, error_rate导入到influxDB, 通过Grafana展示(需要先安装influxdb和grafana-server,并且influxdb需要打开8083端口,可以通过web界面访问influxDB)
# Python write by yhy
import threading
import os
import re
import datetime
import requests

# 读取流数据日志
def read_log(path):
   e = threading.Event()
   offset = 0
   while not e.is_set():
       with open(path) as f:
           if offset > os.stat(path).st_size:
                offset = 0
           f.seek(offset)
           yield from f
           offset = f.tell()

# 将日志转换为字典
p = r'(?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) .*.* \[(?P<time>.*)\] "(?P<method>\w+) (?P<url>.*) (?P<version>[\w|/\.\d]*)" (?P<status>\d{3}) (?P<length>\d+) "(?P<referer>.*)" "(?P<UA>.*)"'
o = re.compile(p)
def parse_log(path):
    for line in read_log(path):
        m = o.search(line.rstrip('\n'))
        if m:
            data = m.groupdict()
            yield data
# 将汇聚的QPS , throughput ,error_rate数据存储在influxdb上
def send(qps,throughput,error_rate):
    line = 'yhyAnalyse qps={},throughput={},error_rate={}'.format(qps,throughput,error_rate)
    requests.post('http://192.168.23.41:8086/write', data=line, params={'db':'monitor'})

# 汇聚 QPS , throughput ,error_rate
def aggregate(path,interval = 10):
    qps = 0
    throughput = 0
    error = 0
    start = datetime.datetime.now()
    for item in parse_log(path):
        qps += 1
        throughput = int(item['length'])
        if int(item['status']) > 300:
            error += 1
        current = datetime.datetime.now()
        if (current - start).total_seconds() > interval:
            error_rate = error / qps
            send(qps,throughput,error_rate)
            qps = 0
            throughput = 0
            error = 0
            start = current
# 主函数入口
if __name__ == '__main__':
    path = '/Users/yinhuanyi/PycharmProjects/Python学习笔记/第一天/dataSource1.log'
    aggregate(path)
  • 3:效果展示
    这里写图片描述
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值