为了对每个月的切割过的30个日志文件统计出访问最多的ip地址进行排序,整理了下面的脚本,主要思路是处理每一个日志文件的ip排序,最后进行字典合并,计算出月ip排序。



#!/usr/bin/env python


import os

import sys

import commands

import re






ipsort = {}

def IpSort(logfile):

ipadd = r'\.'.join([r'\d{1,3}']*4)

        re_ip = re.compile(ipadd)

        for line in open(logfile):

                match = re_ip.match(line)

                if match:

                        ip = match.group( )

                        ipsort[ip] = ipsort.get(ip, 0) + 1

#return ipsort

#logfile=ipsort

#print logfile,"\n",ipsort,"\n"

#ipnum[logfile] = ipsort

def ReadFile():


filedir = raw_input("Enter the path>").strip()

filelist = os.listdir(filedir)

filenum = 0

for line in filelist:

if line[-4:] == '.txt':

IpSort(line)

#s,r = commands.getstatusoutput("cat %s/%s |awk wc -l" %(filedir,line))






def mergeipnum(*ipns):

_keys = set(sum([ipn.keys() for ipn in ipns],[]))

#print _keys,

_ipnum_dic = {}

for _key in _keys:

#pass

#print _key

_ipnum_dic[_key] = sum([ipn.get(_key,0) for ipn in ipns])


print _ipnum_dic,





ReadFile()

mergeipnum(ipsort)