书上的源代码为:
'''
Created on Feb 28, 2011
@author: Peter
'''
from mrjob.job import MRJob
class MRmean(MRJob):
def __init__(self, *args, **kwargs):
super(MRmean, self).__init__(*args, **kwargs)
self.inCount = 0
self.inSum = 0
self.inSqSum = 0
def map(self, key, val): #needs exactly 2 arguments
if False: yield
inVal = float(val)
self.inCount += 1
self.inSum += inVal
self.inSqSum += inVal*inVal
def map_final(self):
mn = self.inSum/self.inCount
mnSq = self.inSqSum/self.inCount
yield (1, [self.inCount, mn, mnSq])
def reduce(self, key, packedValues):
cumVal=0.0; cumSumSq=0.0; cumN=0.0
for valArr in packedValues: #get values from streamed inputs
nj = float(valArr[0])
cumN += nj
cumVal += nj*float(valArr[1])
cumSumSq += nj*float(valArr[2])
mean = cumVal/cumN
var = (cumSumSq - 2*mean*cumVal + cumN*mean*mean)/cumN
yield (mean, var) #emit mean and var
def steps(self):
return ([self.mr(mapper=self.map, mapper_final=self.map_final,\
reducer=self.reduce,)])
if __name__ == '__main__':
MRmean.run()
在执行该程序过程中,出现AttributeError: 'MRmean' object has no attribute 'mr'
解决办法
按照最新的官方文档(http://mrjob.readthedocs.io/en/latest/job.html?highlight=mrjob.job)