目前需要简单实现一个靠nginx日志分析网站的pv,然后进行监测。考虑到用awk直接输出,对服务器会造成比较高,时间长等问题。用python简单实现一个需求
1,利用stdin特性。直接采用”<”操作导入文档流
2,做成一个可迭代类。可以在该类进行数据处理,持续输出。比直接用for/while 方便扩写
#!/usr/bin/env python
#coding:utf-8
#filename:data.py
'''
解析http日志文件
example: python data.py $1 $2 < log
params : $1 偏移量(切分的第几条数据)
$2 输出TOP-X
'''
#!/usr/bin/env python
#coding:utf-8
# filename: data
# Author: wangwei
# Description: 对php下的access_log日志进行分析
import sys
import re
class Counter():
def __init__(self):
self.store = dict()
def put(self, value):
if self.store.has_key(value):
self.store[value] += 1
else:
self.store[value] = 1
def __getattr__(self, item):
if self.store.has_key(item):
return self.store[item]
return 0
class RequestItem():
def __init__(self, method, request):
'''
请求数据对象
:param method: 请求方法偏移量
:param request: 请求body偏移量
'''
self.num = 0
self.method = method
self.request = request
self.RequestMethod = Counter()
self.RequestBody = Counter()
def __str__(self):
return str(self.num)
def put(self, item):
self.num += 1
if self.method > 0 :
method = item[self.method].upper()[1:]
if method not in ['POST','GET']:return
self.RequestMethod.put(method)
if self.request > 0 :
request = item[self.request]
self.RequestBody.put(request)
class RequestMain():
def __init__(self):
self.record = dict()
def put(self, key, value):
if self.record.has_key(key) == False:
self.record[key] = RequestItem(5,8)
self.record[key].put(value)
class Record():
def __init__(self, offset):
'''
解析数据日志
:param offset: 数据偏移量
'''
self.offsets = offset
self.extension= dict()
def __iter__(self):
return self
def next(self):
data = sys.stdin.readline()
if not data:
raise StopIteration
return data.split()[self.offsets].split('?')[0],data.split()
if __name__ == '__main__':
main = RequestMain()
for record in Record(int(sys.argv[1])):
main.put(record[0],record[1])
mainRecord = sorted(main.record.items(), key=lambda item: item[1].num, reverse=True)
try:
topx = int(sys.argv[2])
except:
topx = 10
for key in mainRecord:
if topx < 0 : break
topx -= 1
method = 'POST' if int(key[1].RequestMethod.POST) > int(key[1].RequestMethod.GET) else "GET"
print key[1], key[0],method