使用正则解析web网站访问日志
import re
import datetime
logline='''192.168.149.133 -- [19/Feb/2020:10:12:56 +0800] "GET /www.baidu.com/zhidao?cmpid=3 HTTP/1.1" 200 16691 "-" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0"'''
pattern='''(?P<srcip>(\d+\.){3}\d+) -- \[(?P<time>[^\[\]]+)\] "(?P<action>\w+) (?P<url>\S+) (?P<protocol>[\w/\d.]+)" (?P<status>\d+) (?P<length>\d+) "(?P<referer>[^"]+)" "(?P<useragent>[^"]+)'''
ops={
'time':lambda timestr:datetime.datetime.strptime(timestr,'%d/%b/%Y:%H:%M:%S %z').strftime("%Y-%m-%d %H:%M:%S"),
'status':int,
'length':int
}
regex=re.compile(pattern)
def extract(line):
matcher=regex.match(line)
info={k:ops.get(k,lambda x:x)(v) for k,v in matcher.groupdict().items()}
return info
result=extract(logline)
print(result)