test.py
#!/usr/bin/python
import os
import sys
import string
import json
#awk -F ',' 'substr($0,21,2)=='14'{print $0}' * > 14.log
def run():
logfile = '14.log'
res = dict()
if not os.path.exists(logfile):
print 'error:' + logfile + ' not existed.'
print 'hint:awk -F \',\' \'substr($0,21,2)==\'14\'{print $0}\' WB_LOG_LOAD* > 14.log'
exit()
#time zone
dts = set()
dts.add('2015-04-29 00')
dts.add('2015-04-29 01')
dts.add('2015-04-29 02')
dts.add('2015-04-29 03')
dts.add('2015-04-29 04')
dts.add('2015-04-29 05')
dts.add('2015-04-29 06')
dts.add('2015-04-29 07')
dts.add('2015-04-29 08')
f = open(logfile,'r')
line = f.readline()
while line:
if line[0:13] not in dts:
#print line[0:13]
line = f.readline()
continue
arr = line.split(',')
if len(arr) != 7:
line = f.readline()
continue
#log id and setkey are existed.
if arr[2] and arr[5]:
#print arr[1][3:]
key = arr[5]
item = res.get(key, -1)
if -1 == item:
item = {'pv':0, 'uvSet':set([])}
res[key] = item
item['pv'] = item['pv'] + 1
item['uvSet'].add(arr[2])
#print len(arr)
line = f.readline()
f.close()
for item in res:
print item + ' pv:' + str(res[item]['pv']) + ' uv:' + str(len(res[item]['uvSet']))
main.py
#!/usr/bin/python
import os
import sys
import test
if '__main__' == __name__:
num = 20
if len(sys.argv) > 1:
num = int(sys.argv[1])
#run(num)
test.run()