两种方式:shell与java api
第一种:shell
/*
Map-reduce
*/
db.mapreduce.insert(
{
_id: ObjectId("50a8240b927d5d8b5891743c"),
cust_id: "abc123",
ord_date: new Date("Oct 04, 2012"),
status: 'A',
price: 25,
items: [ { sku: "mmm", qty: 5, price: 2.5 },
{ sku: "nnn", qty: 5, price: 2.5 } ]
});
db.mapreduce.insert(
{
cust_id: "abc123",
ord_date: new Date("Oct 04, 2012"),
status: 'A',
price: 30,
items: [ { sku: "mmm", qty: 5, price: 2.5 },
{ sku: "nnn", qty: 5, price: 2.5 } ]
});
//map程序,将每个文档的cust_id作为key,price作为value
var mapFunction1 = function() {
emit(this.cust_id, this.price);
};
//reduce程序
//经过分组后的信息,keyCustId是经过分组后的key,valuesPrices是一个数组。接收的参数像这种格式"custid":[25,30,40]
var reduceFunction1 = function(keyCustId, valuesPrices) {
return Array.sum(valuesPrices);
};
//调用将结果输出到map_reduce_example集合当中,如果这个集合存在了则覆盖其内容。
db.mapreduce.mapReduce(
mapFunction1,
reduceFunction1,
{ out: "map_reduce_example" }
)
db.map_reduce_example.find();
db.mapreduce.find();
/*
计算订单总量和平均数量
key:sku
value:{qty,count}
*/
var mapFunction2 = function() {
for (var idx = 0; idx < this.items.length; idx++) {
var key = this.items[idx].sku;
var value = {
count: 1,
qty: this.items[idx].qty
};
emit(key, value);
}
};
//计算每组的订单总量
var reduceFunction2 = function(keySKU, countObjVals) {
reducedVal = { count: 0, qty: 0 };
for (var idx = 0; idx < countObjVals.length; idx++) {
reducedVal.count += countObjVals[idx].count;
reducedVal.qty += countObjVals[idx].qty;
}
return reducedVal;
};
//计算订单平均值
var finalizeFunction2 = function (key, reducedVal) {
reducedVal.avg = reducedVal.qty/reducedVal.count;
return reducedVal;
};
//map-reduce方式计算订单总量和平均值
db.mapreduce.mapReduce(
mapFunction2,
reduceFunction2,
{
out: { merge: "map_reduce_example_" },
query: { ord_date:
{ $gt: new Date('01/01/2012') }
},
finalize: finalizeFunction2
}
)
db.map_reduce_example_.find();
执行结果
集合map_reduce_example:
/* 1 */
{
"_id" : "abc123",
"value" : 55.0
}
集合map_reduce_example_:
/* 1 */
{
"_id" : "mmm",
"value" : {
"count" : 2.0,
"qty" : 10.0,
"avg" : 5.0
}
}
/* 2 */
{
"_id" : "nnn",
"value" : {
"count" : 2.0,
"qty" : 10.0,
"avg" : 5.0
}
}
第二种:Java API
mapreduce集合的数据
/* 1 */
{
"_id" : ObjectId("50a8240b927d5d8b5891743c"),
"cust_id" : "abc123",
"ord_date" : ISODate("2012-10-03T16:00:00.000Z"),
"status" : "A",
"price" : 25.0,
"items" : [
{
"sku" : "mmm",
"qty" : 5.0,
"price" : 2.5
},
{
"sku" : "nnn",
"qty" : 5.0,
"price" : 2.5
}
]
}
/* 2 */
{
"_id" : ObjectId("58538e0153962ba9f30a64f1"),
"cust_id" : "abc123",
"ord_date" : ISODate("2012-10-03T16:00:00.000Z"),
"status" : "A",
"price" : 30.0,
"items" : [
{
"sku" : "mmm",
"qty" : 5.0,
"price" : 2.5
},
{
"sku" : "nnn",
"qty" : 5.0,
"price" : 2.5
}
]
}
对以上数据做两个mapreduce示例程序:
package db.mongo.feature;
import com.mongodb.MongoClient;
import com.mongodb.client.*;
import db.mongo.util.Constants;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.junit.Before;
import org.junit.Test;
public class MongoMapreduce {
MongoDatabase database = null;
@Before
public void connect(){
MongoClient client = new MongoClient(Constants.MONGO_HOST,Constants.MONGO_PORT);
//当前获取这个数据库时并没有这个名称的数据库,不会报错,当有数据添加的时候才会创建
database = client.getDatabase("test");
}
public MongoCollection<Document> getCollection(String collectionName) {
return this.database.getCollection(collectionName);
}
public void mongoCursor(MapReduceIterable<Document> documents){
MongoCursor<Document> cursor = documents.iterator();
while (cursor.hasNext()){
System.out.println(cursor.next().toJson());
}
cursor.close();
}
/**
*
* <p>
* mapreduce编程模型分为map与reduce两个阶段,map函数和reduce函数都是使用JavaScript的函数来写
* </p>
*/
@Test
public void simpleMapReduce(){
MongoCollection<Document> mapreduce = getCollection("mapreduce");
map程序,将每个文档的cust_id作为key,price作为value
String mapFunction = "function() {\n" +
" emit(this.cust_id, this.price);\n" +
"}";
//reduce程序
//经过分组后的信息,keyCustId是经过分组后的key,valuesPrices是一个数组。接收的参数像这种格式"custid":[25,30,40]
String reduceFunction = "function(keyCustId, valuesPrices) {\n" +
" return Array.sum(valuesPrices);\n" +
"}";
MapReduceIterable<Document> documents = mapreduce.mapReduce(mapFunction, reduceFunction);
mongoCursor(documents);
}
/**
* 计算订单总量和平均数量
* <p>
* 还可以对计算结果进行过滤
* @see MapReduceIterable#filter(Bson)
* @see MapReduceIterable#limit(int)
* @see MapReduceIterable#sort(Bson)
* 由于mapreduce编程模型本身就是设计为分布式计算模型,所以还可以添加是否从分片中计算
* @see MapReduceIterable#sharded(boolean)
*
* </p>
*/
@Test
public void totalOrderAndAvgCount(){
MongoCollection<Document> mapreduce = getCollection("mapreduce");
String mapFunction = "function() {\n" +
" for (var idx = 0; idx < this.items.length; idx++) {\n" +
" var key = this.items[idx].sku;\n" +
" var value = {\n" +
" count: 1,\n" +
" qty: this.items[idx].qty\n" +
" };\n" +
" emit(key, value);\n" +
" }\n" +
"}";
String reduceFunction = "function(keySKU, countObjVals) {\n" +
" reducedVal = { count: 0, qty: 0 };\n" +
"\n" +
" for (var idx = 0; idx < countObjVals.length; idx++) {\n" +
" reducedVal.count += countObjVals[idx].count;\n" +
" reducedVal.qty += countObjVals[idx].qty;\n" +
" }\n" +
"\n" +
" return reducedVal;\n" +
"}";
String finalizeFunction = "function (key, reducedVal) {\n" +
"\n" +
" reducedVal.avg = reducedVal.qty/reducedVal.count;\n" +
"\n" +
" return reducedVal;\n" +
"\n" +
"}";
MapReduceIterable<Document> documents = mapreduce.mapReduce(mapFunction, reduceFunction).finalizeFunction(finalizeFunction);
mongoCursor(documents);
}
}
simpleMapReduce方法执行结果:
{ "_id" : "abc123", "value" : 55.0 }
totalOrderAndAvgCount方法执行结果:
{ "_id" : "mmm", "value" : { "count" : 2.0, "qty" : 10.0, "avg" : 5.0 } }
{ "_id" : "nnn", "value" : { "count" : 2.0, "qty" : 10.0, "avg" : 5.0 } }