python做表格的日志分析_小项目一---Python日志分析

1 #日志分析完整代码(新增几个小模块)

2 #Author: Baozi

3 #-*- codeing:utf-8 -*-

4 #日志分析项目

5 '''

6 1.新建一个python文件test.py7 2.从日志文件中复制一条日志信息用于测试。logline存储这个日志字符串8 '''

9 importthreading10 from queue importQueue11 importdatetime12 importre13 importrandom14 importtime15 from pathlib importPath16 from user_agents importparse17 from collections importdefaultdict18

19 #logline = '''138.60.212.153 - - [19/Feb/2013:10:23:29 +0800] "GET /020/media.html?menu=3 HTTP/1.1" 200 16997 "-" "Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)"'''

20 #pattern = '''(?P[\d.]{7,}) - - \[(?P[\w/ +:]+)\] "(?P\w+) (?P\S+) (?P[\w/\d.]+)" (?P\d+) (?P\d+) .+ "(?P.+)"'''

21 pattern = '''(?P[\d.]{7,}) - - \[(?P[\w/ +:]+)\] "(?P[^"]+)" (?P\d+) (?P\d+) .+ "(?P.+)"'''

22

23 ops ={24 'datetime': lambda timestr:datetime.datetime.strptime(timestr,'%d/%b/%Y:%H:%M:%S %z'),25 'status':int,26 'length':int,27 'request':lambda request:dict(zip(('method','url','ptorocol'),request.split())),28 'useragent':lambdauseragent:parse(useragent)29 }30 regex =re.compile(pattern)31

32 defextract(line):33 matcher =regex.match(line)34 print(matcher.groupdict())35 #matcher.groupdict()函数返回一个包含所有match匹配的命名分组的字典

36 info = {k:ops.get(k,lambda x:x)(v) for k,v inmatcher.groupdict().items()}37 returninfo38

39 defopenfile(path:str):40 with open(path)as f:41 for line inf:42 d =extract(line)43 ifd:44 yieldd45 else:46 #TODO 不合格的数据

47 continue

48

49 def load(*path:str):50 #文件装载

51 for file inpath:52 p =Path(file)53 if notp.exists():54 continue

55 ifp.is_dir():56 for x inp.iterdir():57 ifx.if_file():58 yield fromopenfile(str(x))59 elifp.is_file():60 yield fromopenfile(str(p))61 ###################################滑动窗口实现##############################################

62 defwindows(src:Queue,handler,width:int,interval:int):63 start = datetime.datetime.strptime('1971/01/01 00:00:00 +0800','%Y/%m/%d %H:%M:%S %z')64 current = datetime.datetime.strptime('1971/01/01 00:00:01 +0800','%Y/%m/%d %H:%M:%S %z')65 buffer = [] #窗口中待计算的数据

66 delta = datetime.timedelta(seconds=width-interval)67

68 whileTrue:69 data =src.get()70 ifdata:71 buffer.append(data)72 current =data['datetime']73

74 if (current - start).total_seconds() >=interval:75 ret =handler(buffer)76 print(ret)77 start =current78 #buffer的处理

79 buffer = [x for x in buffer if x['datetime'] > current -delta]80

81 #处理函数

82 #状态码分析

83 defstatus_handler(iterable):84 #一批时间窗口内的数据

85 status ={}86 for item initerable:87 key = item['status']88 if key not instatus.keys():89 status[key] =090 status[key] = 1

91 total =sum(status.values())92 return {k:v/total*100 for k,v instatus.items()}93

94 #浏览器分析

95 ua_dict = defaultdict(lambda:0)96 defbrowser_handler(iterable:list):97 for item initerable:98 ua = item['useragent']99 key =(ua.browser.family,ua.browser.version_string)100 ua_dict[key] =1

101 returnua_dict102

103 defhandler(iterable):104 vals = [x['value'] for x initerable]105 return sum(vals) /len(vals)106

107 defdonothing_handler(iterable:list):108 print(iterable)109 returniterable110 ###########################数据分发器实现#####################################

111 #数据分发器:这里做一个简单的一对多副本发送,一个数据通过分发器,发送到n个消费者

112 defdispatcher(src):113 queues =[]114 threads =[]115

116 defreq(handler,width,interval):117 q =Queue()118 queues.append(q)119 t = threading.Thread(target=windows,args=(q,handler,width,interval))120 threads.append(t)121

122 defrun():123 for t inthreads:124 t.start()125

126 for x in src:#一条数据送到n个消费者各自的队列中

127 for q inqueues:128 q.put(x)129 returnreq,run130

131 req,run = dispatcher(load('test.log'))132 #req注册窗口

133 #req(donothing_handler,1,1)

134 #req(status_handler,2,2)

135 req(browser_handler,2,2)136

137 #启动

138 run()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值