import datetime
import re
login = '''192.168.1.1 - - [19/Feb/2020:10:23:39] "GET /o2o/media.html?menu=3 HTTP/1.1" 200 16691 "-" "Mozillia/5.0 (compatible; EasouSpider; +http://www.csdn.net/search/spider.html)"'''
def convert_time(timestr):
return datetime.datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S")
def convert_request(request):
return dict(zip(('method', 'url', 'protocol'), request.split()))
def extractv1(lines):
fields = []
flag = False
tmp = ""
for line in lines.split():
if not flag:
if line.startswith("[") or line.startswith('"'):
tmp = line[1:]
if tmp.endswith(']') or tmp.endswith('"'):
fields.append(tmp[:-1])
continue
else:
flag = True
else:
fields.append(line)
continue
elif flag:
if line.endswith(']') or line.endswith('"'):
tmp += "%s %s" % (" ", line[:-1])
fields.append(tmp)
tmp = ""
flag = False
else:
tmp += "%s %s" % (" ", line)
continue
return fields
def test_v1():
names = ['remote', '', '', 'datetime', 'request', 'status', 'size', '', 'useragent']
ops = [None, None, None, convert_time, convert_request, int, int, None, None]
d = {}
for i, field in enumerate(extractv1(login)):
key = names[i]
if ops[i] is not None:
d[key] = (ops[i](field))
else:
d[key] = (field)
print d
def extractv2(line):
pattern = '''(?P<remote>[\d\.]{7,}) - - \[(?P<datetime>[^\[\]]+)\] "(?P<request>[^"]+)" (?P<status>\d+) (?P<size>\d+) "([^"]+)" "(?P<useragent>[^"]+)"'''
matcher = re.compile(pattern).match(line)
return matcher.groupdict()
def test_v2():
ops = {
'datetime': convert_time,
'status': int,
"size": int,
'request': convert_request
}
d = {k: ops.get(k, lambda x: x)(v) for k, v in extractv2(login).items()}
print d
def main():
test_v2()
if __name__ == "__main__":
main()
python-日志分析
最新推荐文章于 2024-08-05 11:21:18 发布