数据格式:
2015-04-10 15:32:54
udid:user1
setKey:flag1
2015-04-10 15:33:00
udid:user2
setKey:flag2
...
#!/usr/bin/python
import os
import sys
def run(num=1):
#file param
filePrefix='icap/flowtips'
fileEx='.txt'
'''
input
'''
records=list()
for i in range(num + 1):
filePath=''.join([filePrefix,str(i),fileEx])
if not os.path.exists(filePath):
continue
f=open(filePath,'r')
line=f.readline()
record=list()#record=''
while line:
#print line
if not len(line.strip()):
records.append('\t'.join(record))
record=list()#record=''
else:
record.append(line.strip()) #+= line.strip() + '\t'
line=f.readline()
f.close()
'''
analysis
'''
data=dict()
for s in records:
#print s
arr = s.split('\t')
if len(arr) < 3:
continue
#key
key = arr[0].strip()[0:10]# + '\t' + arr[2].strip()[7:]
#value
val = data.get(key, -1)
if -1 == val:
#create a new dict as value assosition to the key
val = {'pv':0, 'uvSet':set([])}
data[key] = val
#pv++
val['pv'] = val['pv'] + 1
#add item to set
val['uvSet'].add(arr[1])
'''
output
'''
for key in data:
val = data[key]
print ( '%s\t%d\t%d' % ( key, val['pv'], len(val['uvSet']) ) )
if '__main__' == __name__:
num = 20
if len(sys.argv) > 1:
num = int(sys.argv[1])
run(num)
用dict()存储统计,set([])统计uv