python编写mongodb中的map/reduce
目的:求下面user_info表中班级2中的各个学科的分数总和和平均分。
具体的内容如下:
{"uid":"a123","type":"man","class":2,"score":{"math":80,"english":60,"chinese":90}}
{"uid":"b123","type":"female","class":2,"score":{"math":100,"english":90,"chinese":80}}
{"uid":"c123","type":"man","class":2,"score":{"math":60,"english":50,"chinese":88}}
{"uid":"d123","type":"female","class":2,"score":{"math":79,"english":87,"chinese":78}}
{"uid":"e123","type":"female","class":1,"score":{"math":79,"english":87,"chinese":78}}
{"uid":"b123","type":"female","class":2,"score":{"math":100,"english":90,"chinese":80}}
{"uid":"c123","type":"man","class":2,"score":{"math":60,"english":50,"chinese":88}}
{"uid":"d123","type":"female","class":2,"score":{"math":79,"english":87,"chinese":78}}
{"uid":"e123","type":"female","class":1,"score":{"math":79,"english":87,"chinese":78}}
1,使用遍历读取的方式
使用find()遍历class为2的值,进行统计计算,具体的python代码如下:
#!/usr/bin/env python # -*- encoding:utf-8 -*- from pymongo import Connection from bson.code import Code from bson.son import SON mongoconn = Connection('192.168.0.203',27017) db = mongoconn['things']["user_info"] db.drop() db.insert({"uid":"a123","type":"man","class":2,"score":{"math":80,"english":60,"chinese":90}}) db.insert({"uid":"b123","type":"female","class":2,"score":{"math":100,"english":90,"chinese":80}}) db.insert({"uid":"c123","type":"man","class":2,"score":{"math":60,"english":50,"chinese":88}}) db.insert({"uid":"d123","type":"female","class":2,"score":{"math":79,"english":87,"chinese":78}}) db.insert({"uid":"e123","type":"female","class":1,"score":{"math":79,"english":87,"chinese":78}}) print time.time() score = {"math":0, "english":0, "chinese":0, } a = db.find({"class":2}) b = a.count() print b for i in a: score["math"] += i["score"]["math"] score["english"] += i["score"]["english"] score["chinese"] += i["score"]["chinese"] math_avg = float(score["math"])/b english_avg = float(score["english"])/b chinese_avg = float(score["chinese"])/b print "-------------------------------------------------" print score print "--------------------------------------------------" print "math average score is :",math_avg print "english average score is :",english_avg print "chinese avarege score is :",chinese_avg
2,使用mongodb自带的map/reduce来统计
a,map/reduce简介
map/reduce是一个并行的分布式模型。用来大规模数据的计算。具体的工作过程主要分为map和reduce2个部分。每一个阶段都由key-value即健值对的形式作为输入和输出。
具体的key-value的格式多种多样,由具体的程序来定义。
map阶段读入数据,成生key-value。
reduce读入由map函数生成的key-value进行计算,返回结果
mongodb中的map/reduce过程:
读入collection
执行map函数,用emit函数生成key-value
执行reduce函数,遍历map的输出,进行统计
返回结果collection
b,python的具体实现
#!/usr/bin/env python # -*- encoding:utf-8 -*- from pymongo import Connection from bson.code import Code from bson.son import SON mongoconn = Connection('192.168.0.203',27017) db = mongoconn['things']["user_info"] db.drop() db.insert({"uid":"a123","type":"man","class":2,"score":{"math":80,"english":60,"chinese":90}}) db.insert({"uid":"b123","type":"female","class":2,"score":{"math":100,"english":90,"chinese":80}}) db.insert({"uid":"c123","type":"man","class":2,"score":{"math":60,"english":50,"chinese":88}}) db.insert({"uid":"d123","type":"female","class":2,"score":{"math":79,"english":87,"chinese":78}}) db.insert({"uid":"e123","type":"female","class":1,"score":{"math":79,"english":87,"chinese":78}}) map = Code("function() {" " emit(this.class,this.score);" "}" ) reduce = Code("function(key,values){" " var result={math:0,english:0,chinese:0};" " for (var i = 0; i < values.length; i++) {" " result.math += values[i].math;" " result.english += values[i].english;" " result.chinese += values[i].chinese;" " return result;" "}" ) results = db.map_reduce(map,reduce,"class_user") a = db.find({"class":2}) b = a.count() for i in results.find({"_id":2}): print "*********************************************************" print i print "math average score is :",i["value"]["math"]/b print "english average score is :",i["value"]["english"]/b print "chinese avarege score is :",i["value"]["chinese"]/b
转载于:https://blog.51cto.com/liran728729/1204872