python-日志分析

import datetime
import re

login = '''192.168.1.1 - - [19/Feb/2020:10:23:39] "GET /o2o/media.html?menu=3 HTTP/1.1" 200 16691 "-" "Mozillia/5.0 (compatible; EasouSpider; +http://www.csdn.net/search/spider.html)"'''


def convert_time(timestr):
    return datetime.datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S")


def convert_request(request):
    return dict(zip(('method', 'url', 'protocol'), request.split()))


def extractv1(lines):
    fields = []
    flag = False
    tmp = ""
    for line in lines.split():
        if not flag:
            if line.startswith("[") or line.startswith('"'):
                tmp = line[1:]
                if tmp.endswith(']') or tmp.endswith('"'):
                    fields.append(tmp[:-1])
                    continue
                else:
                    flag = True
            else:
                fields.append(line)
            continue
        elif flag:
            if line.endswith(']') or line.endswith('"'):
                tmp += "%s %s" % (" ", line[:-1])
                fields.append(tmp)
                tmp = ""
                flag = False
            else:
                tmp += "%s %s" % (" ", line)
            continue
    return fields


def test_v1():
    names = ['remote', '', '', 'datetime', 'request', 'status', 'size', '', 'useragent']
    ops = [None, None, None, convert_time, convert_request, int, int, None, None]
    d = {}
    for i, field in enumerate(extractv1(login)):
        key = names[i]
        if ops[i] is not None:
            d[key] = (ops[i](field))
        else:
            d[key] = (field)
    print d


def extractv2(line):
    pattern = '''(?P<remote>[\d\.]{7,}) - - \[(?P<datetime>[^\[\]]+)\] "(?P<request>[^"]+)" (?P<status>\d+) (?P<size>\d+) "([^"]+)" "(?P<useragent>[^"]+)"'''
    matcher = re.compile(pattern).match(line)
    return matcher.groupdict()


def test_v2():
    ops = {
        'datetime': convert_time,
        'status': int,
        "size": int,
        'request': convert_request
    }
    d = {k: ops.get(k, lambda x: x)(v) for k, v in extractv2(login).items()}
    print d


def main():
    test_v2()


if __name__ == "__main__":
    main()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值