import re
def count_something(log_file, patt):
ori_dict = {}
with open(log_file) as fobj:
for line in fobj:
m = patt.search(line)
if m:
key = m.group()
ori_dict[key] = ori_dict.get(key, 0) + 1 #字典值等于get方法,默认为0
return ori_dict
if __name__ == '__main__':
apatt = re.compile('(\d+\.){3}\d+') #预编辑
bpatt = re.compile('Firefox|MSIE') #预编辑
log_file = '/var/log/httpd/access_log'
print(count_something(log_file, apatt))
print(count_something(log_file, bpatt))
面向过程的写法
import re
class CountSomething:
def __init__(self, file, patt):
self.file = file
self.patt = patt
def __call__(self):
ori_dict = {}
with open(self.file) as fobj:
for line in fobj:
m = self.patt.search(line)
if m:
key = m.group()
ori_dict[key] = ori_dict.get(key, 0) + 1
return ori_dict
if __name__ == '__main__':
log_file = '/var/log/httpd/access_log'
apatt = re.compile('(\d+\.){3}\d+')
bpatt = re.compile('Firefox|MSIE')
c = CountSomething(log_file, apatt)
print(c())
d = CountSomething(log_file, bpatt)
print(d())
面向对象的写法
import re
from collections import Counter
def count_thing(file, patt):
c = Counter()
with open(file) as fobj:
for line in fobj:
m = patt.search(line)
if m:
key = m.group()
c.update([key])
return c
if __name__ == '__main__':
file = '/var/log/httpd/access_log'
patt = re.compile('(\d+\.){3}\d+')
print(count_thing(file, patt))
利用Counter模块,进行统计。