单进程日志分析
(一)
离线日志分析
# Python write by yhy
# [10/Aug/2016:03:20:40 +0800]
# %d/%b/%Y:%H:%M:%S %z
import datetime
import re
import pprint
import pygal
# 日志读取
def read_log(path):
with open(path) as f:
yield from f
# 日志解析为字典,转换时间格式
def count_data(key,value):
if key not in value.keys():
value[key] = 0
value[key] += 1
return value
p = r'(?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) .*.* \[(?P<time>.*)\] "(?P<method>\w+) (?P<url>.*) (?P<version>[\w|/\.\d]*)" (?P<status>\d{3}) (?P<length>\d+) "(?P<referer>.*)" "(?P<UA>.*)"'
o = re.compile(p)
def parse_log(path):
for line in read_log(path):
m = o.search(line)
if m:
data = m.groupdict()
data['time'] = datetime.datetime.strptime(data['time'], '%d/%b/%Y:%H:%M:%S %z').strftime('%Y-%m-%d %H:%M:%S')
yield data
# 以时间为key重构大字典
def analyse_log(path):
ret = {}
def init_data():
return {
'ip': {},
'url': {},
'UA': {},
'status': {},
'throughput': 0
}
for item in parse_log(path):
if item['time'] not in ret.keys():
ret[item['time']] = init_data()
data = ret[item['time']]
for key,value in data.items():
if key != 'throughput':
data[key] = count_data(item[key],value)
data['throughput'] = int(item['length'])
return ret
# 对大字典进行解析,1.保存大字典,2.渲染出图
def render_log(name,x,y):
line = pygal.Line()
line._title = name
line.x_labels = x
line.add(name,y)
line.render_to_file('/Users/yinhuanyi/Desktop/yhy1.svg')
def save_log(ret):
path = '/Users/yinhuanyi/Desktop/yhy1.log'
with open(path,'w') as f:
pprint.pprint(ret,stream=f,indent=4)
if __name__ == '__main__':
path ='/Users/yinhuanyi/PycharmProjects/Python学习笔记/第一天/access.log-20160811'
ret = analyse_log(path)
save_log(ret)
lst_ret = list(ret.items())
lst_ret.sort(key=lambda x:x[0])
table_x = [x[0] for x in lst_ret]
table_y = [x[1]['throughput'] for x in lst_ret]
render_log('throughput',table_x,table_y)
(二)
实时日志分析
统计数据保存于influxDB,统计数据展示于grafana
- 1:通过读取离线日志,实现实时日志滚动效果
# Python write by yhy
import re
import datetime
import threading
# 日志信息读取
def read_log(path):
with open(path) as f:
yield from f
# 日志转换字典
p = r'(?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) .*.* \[(?P<time>.*)\] "(?P<method>\w+) (?P<url>.*) (?P<version>[\w|/\.\d]*)" (?P<status>\d{3}) (?P<length>\d+) "(?P<referer>.*)" "(?P<UA>.*)"'
o = re.compile(p)
def parse_log(path):
for line in read_log(path):
m = o.search(line.rstrip('\n'))
if m:
data = m.groupdict()
data['time'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
yield data
# 日志写入文件
def data_source(path_src,path_dst,e):
while not e.is_set():
for item in parse_log(path_src):
line = '{ip} - - [{time}] "{method} {url} {version}" {status} {length} "{referer}" "{UA}"\n'.format(**item)
with open(path_dst,'a') as f:
f.write(line)
# 主函数入口
if __name__ == '__main__':
path_src = '/Users/yinhuanyi/PycharmProjects/Python学习笔记/第一天/access.log-20160811'
path_dst = '/Users/yinhuanyi/PycharmProjects/Python学习笔记/第一天/dataSource1.log'
e = threading.Event()
try:
data_source(path_src,path_dst,e)
except KeyboardInterrupt:
e.set()
- 2:读取实时日志,统计QPS, throughput, error_rate等指标,请求将QPS, throughput, error_rate导入到influxDB, 通过Grafana展示(需要先安装influxdb和grafana-server,并且influxdb需要打开8083端口,可以通过web界面访问influxDB)
# Python write by yhy
import threading
import os
import re
import datetime
import requests
# 读取流数据日志
def read_log(path):
e = threading.Event()
offset = 0
while not e.is_set():
with open(path) as f:
if offset > os.stat(path).st_size:
offset = 0
f.seek(offset)
yield from f
offset = f.tell()
# 将日志转换为字典
p = r'(?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) .*.* \[(?P<time>.*)\] "(?P<method>\w+) (?P<url>.*) (?P<version>[\w|/\.\d]*)" (?P<status>\d{3}) (?P<length>\d+) "(?P<referer>.*)" "(?P<UA>.*)"'
o = re.compile(p)
def parse_log(path):
for line in read_log(path):
m = o.search(line.rstrip('\n'))
if m:
data = m.groupdict()
yield data
# 将汇聚的QPS , throughput ,error_rate数据存储在influxdb上
def send(qps,throughput,error_rate):
line = 'yhyAnalyse qps={},throughput={},error_rate={}'.format(qps,throughput,error_rate)
requests.post('http://192.168.23.41:8086/write', data=line, params={'db':'monitor'})
# 汇聚 QPS , throughput ,error_rate
def aggregate(path,interval = 10):
qps = 0
throughput = 0
error = 0
start = datetime.datetime.now()
for item in parse_log(path):
qps += 1
throughput = int(item['length'])
if int(item['status']) > 300:
error += 1
current = datetime.datetime.now()
if (current - start).total_seconds() > interval:
error_rate = error / qps
send(qps,throughput,error_rate)
qps = 0
throughput = 0
error = 0
start = current
# 主函数入口
if __name__ == '__main__':
path = '/Users/yinhuanyi/PycharmProjects/Python学习笔记/第一天/dataSource1.log'
aggregate(path)
- 3:效果展示