python做表格的日志分析_python分析日志脚本

1 #!/usr/bin/env python

2 #coding:utf-8

3

4 importsys,time5

6 classDisplayFormat(object):7

8 defformat_size(self,size):9 KB = 1024 #KB -> B 1024

10 MB = 1048576 #MB -> B 1024 * 1024

11 GB = 1073741824 #GB -> B 1024 * 1024 * 1024

12 TB = 1099511627776 #TB -> B 1024 * 1024 * 1024

13

14 if size >=TB:15 size = str(size >> 40) + 'T'

16 elif size <17 size="str(size)">

18 elif size >= GB and size <19 size="str(size">> 30) + 'G'19>

20 elif size >= MB and size <21 size="str(size">> 20) + 'M'21>

22 else:23 size = str(size >> 10) + 'K'

24

25 returnsize26

27 formatstring = '%-18s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'

28

29 defecho_line(self):30 '''输出头部横线'''

31 print self.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,)32

33 defecho_head(self):34 '''输出头部信息'''

35 print self.formatstring % ('IP','Traffic','Time','Time%',200,404,403,503,500,302,304)36

37 defecho_error(self):38 '''输出错误信息'''

39 print 'Usage:' + sys.argv[0] + 'filepath [number]'

40

41 defecho_time(self):42 '''输出脚本执行时间'''

43 print 'The script is running %s second' %time.clock()44

45

46 classHostInfo(object):47

48 #定义一个主机ip 的所有状态列表

49 host_info = ['200','404','403','503','500','302','304','size','time']50

51 def __init__(self,host):52 '''初始化一个主机信息字典'''

53 self.host = host ={}.fromkeys(self.host_info,0)54

55 defadd_1(self,status_size,is_size):56 '''对访问次数,http返回的状态码,ip流量进行加1操作'''

57 if status_size == 'time':58 self.host['time'] += 1

59 elifis_size:60 self.host['size'] = self.host['size'] +status_size61 else:62 self.host[status_size] += 1

63

64 defget_value(self,value):65 '''取出字典的值'''

66 returnself.host[value]67

68

69 classAnalysisFile(object):70

71 def __init__(self):72 '''初始化一个空字典'''

73 self.empty ={}74 self.total_request_time,self.total_traffic,self.total_200,\75 self.total_404,self.total_403,self.total_503,self.total_500,\76 self.total_302,self.total_304 =0,0,0,0,0,0,0,0,077

78 defsplit_line_todict(self,line):79 '''传入文件的每一行取出0、8、9字段 生成字典 并返回这个字典'''

80 line_split =line.split()81 line_dict = {'remote_host':line_split[0],'status':line_split[8],'bytes_sent':line_split[9]}82 returnline_dict83

84 defread_log(self,logs):85 for line inlogs:86 try:87 dict_line =self.split_line_todict(line)88 host = dict_line['remote_host']89 status = dict_line['status']90 exceptValueError:91 continue

92 exceptIndexError:93 continue

94

95 if host not inself.empty:96 host_info_obj =HostInfo(host)97 self.empty[host] =host_info_obj98 else:99 host_info_obj =self.empty[host]100

101 host_info_obj.add_1('time',False)102

103 if status inhost_info_obj.host_info:104 host_info_obj.add_1(status,False)105

106 try:107 bytes_sent = int(dict_line['bytes_sent'])108 exceptValueError:109 bytes_sent =0110

111 host_info_obj.add_1(bytes_sent,True)112

113 returnself.empty114

115 defreturn_sorted_list(self,true_dict):116 '''循环读取字典,计算总的流量、总的访问次数以及总的http返回码'''

117 for host_key intrue_dict:118 host_value =true_dict[host_key]119 time = host_value.get_value('time')120 self.total_request_time = self.total_request_time +time121 size = host_value.get_value('size')122 self.total_traffic = self.total_traffic +size123

124 #获取http返回状态码的次数

125 v_200 = host_value.get_value('200')126 v_404 = host_value.get_value('404')127 v_403 = host_value.get_value('403')128 v_503 = host_value.get_value('503')129 v_500 = host_value.get_value('500')130 v_302 = host_value.get_value('302')131 v_304 = host_value.get_value('304')132

133 #重新规划字典

134 true_dict[host_key] = {'200':v_200,'404':v_404,'403':v_403,\135 '503':v_503,'500':v_500,'302':v_302,\136 '304':v_304,'size':size,'time':time}137

138

139 #计算http返回状态码的总量

140 self.total_200 = self.total_200 +v_200141 self.total_404 = self.total_404 +v_404142 self.total_403 = self.total_403 +v_403143 self.total_503 = self.total_503 +v_503144 self.total_500 = self.total_500 +v_500145 self.total_302 = self.total_302 +v_302146 self.total_304 = self.total_304 +v_304147

148 #对总的访问次数和访问流量进行降序排序,并生成一个有序的列表

149 sorted_list = sorted(true_dict.items(),key=lambda i:(i[1]['size'],\150 i[1]['time']),reverse=True)151

152 returnsorted_list153

154

155 classMain(object):156

157 defmain(self):158 '''主调函数'''

159 #初始化DisplayFormat类的实例

160 displayformat =DisplayFormat()161

162 args =len(sys.argv)163 if args == 1:164 displayformat.echo_error()165 elif args == 2 or args == 3:166 log_file = sys.argv[1]167 try:168 files = open(log_file,'r')169 if args == 3:170 lines = int(sys.argv[2])171 else:172 lines =0173 exceptIOError,e:174 print

175 printe176 displayformat.echo_error()177 exceptVaueError,e:178 print

179 printe180 displayformat.echo_error()181

182 else:183 displayformat.echo_error()184

185

186 #AnalysisFile类的实例化

187 fileanalysis =AnalysisFile()188

189 #调用read_log方法

190 news_dict =fileanalysis.read_log(files)191

192 #调用return_sorted_list方法

193 new_list =fileanalysis.return_sorted_list(news_dict)194

195 #计算所有ip的总量

196 total_ip =len(new_list)197

198 iflines:199 new_list =new_list[0:lines]200 files.close()201

202 #打印出总的ip数,总访问流量,总的访问次数

203 print

204 total_request_time =fileanalysis.total_request_time205 total_traffic =displayformat.format_size(fileanalysis.total_traffic)206 print '总IP数量: %s 总的访问流量: %s 总的请求次数: %d' %(total_ip,\207 total_traffic,\208 total_request_time)209

210 #打印头部信息,和横线

211 print

212 displayformat.echo_head()213 displayformat.echo_line()214

215 #循环读取news_list列表取出time项目 计算time百分比 通过displayformat格式化输出主机信息

216 for i innew_list:217 time = i[1]['time']218 time_percentage = (float(time) / float(fileanalysis.total_request_time)) * 100

219 print displayformat.formatstring %(i[0],\220 displayformat.format_size(i[1]['size']),\221 time,str(time_percentage)[0:5],\222 i[1]['200'],i[1]['404'],i[1]['403'],\223 i[1]['503'],i[1]['500'],i[1]['302'],i[1]['304'])224

225 if not lines or total_ip ==lines:226 displayformat.echo_line()227 print displayformat.formatstring % (total_ip,total_traffic,total_request_time,'100%',\228 fileanalysis.total_200,fileanalysis.total_404,\229 fileanalysis.total_403,fileanalysis.total_503,\230 fileanalysis.total_500,fileanalysis.total_302,\231 fileanalysis.total_304)232

233 #显示执行脚本的时间

234 print

235 displayformat.echo_time()236

237 if __name__ == '__main__':238 main =Main()239 main.main()

17>
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值