#!/usr/bin/env python
def hadoopMap(fd):
lines={}
for line in fd:
if lines.has_key(line):
lines[line]+=1
else:
lines[line]=1
return lines
def hadoopReduce(result,mapResult):
for key in mapResult:
if result.has_key(key):
result[key]+=mapResult[key]
else:
result[key]=mapResult[key]
print result
return result
fd=(open("test.0",'r'),open("test.1"))
mapData=map(hadoopMap,fd)
for fds in fd:
fds.close()
print mapData
reduceResult=reduce(hadoopReduce,mapData,{})
print reduceResult
python简单实现hadoop map reduce统计功能
最新推荐文章于 2024-01-03 10:50:22 发布