————————场景—————–
mongodb中数据集合(ABC123)包含的指标进行统计分析,指标(指标在mongodb中实际以字母、数字组合的编码表示)包括:
A1:用户标识、A2:用户编码、A3:用户名称、A4:地市、A5:区县、A6:手机号码、A7:账期、A8:入网时间、A9:ARPU值
需地市、区县分组统计的指标(统计指标的结果标识编码可自定义):
X1:用户数、X2:总ARPU值、X3:平均用户入网时间
即需要统计分析:
用户数: count(用户编码)
总ARPU值: sum(ARPU值)
平均入网时间:sum(入网时间)/count(用户编码)
———————————————-
mongodb java api mongo-2.7.3.jar
mongodb group 分组函数使用:
BasicDBList basicDBList = (BasicDBList)db.getCollection(“mongodb中集合编码或者编码”)
.group(DBObject key, –分组字段,即group by的字段
DBObject cond, –查询中where条件
DBObject initial, –初始化各字段的值
String reduce, –每个分组都需要执行的Function
String finial –终结Funciton对结果进行最终的处理
);
————————代码——————————
将分组字段、统计的结果字段、统计公式中用到的字段分别保存在HashMap中:
分组字段:
HashMap dimMap = new HashMap();
dimMap.put(“A4”,”A4”); //地市
dimMap.put(“A5”,”A5”); //区县
统计的结果字段:
HashMap forIdxMap = new HashMap();
dimMap.put(“X1”,”count(A2)”); //用户数
dimMap.put(“X2”,”sum(A9)”); //总ARPU值
dimMap.put(“X3”,”sum(A8)/count(A2)”); //平均用户入网时间
统计公式中用到的字段:
HashMap indexMap = new HashMap();
dimMap.put(“A2”,”A2”); //用户编码
dimMap.put(“A8”,”A8”); //入网时间
dimMap.put(“A9”,”A9”); //ARPU值
java调用:
BasicDBList basicDBList = (BasicDBList)db.getCollection(“ABC123”)
.group(GroupUtil.generateFormulaKeyObject(dimMap),
new BasicDBObject(),
GroupUtil.generateFormulaInitObject(indexMap),
GroupUtil.generateFormulaReduceObject(indexMap),
GroupUtil.generateFormulaFinalizeObject(forIdxMap, indexMap));
GroupUtil.java:
-
package com.test;
-
-
import java.util.HashMap;
-
import java.util.Iterator;
-
import com.mongodb.BasicDBObject;
-
import com.mongodb.DBObject;
-
-
public
class GroupUtil
-
{
-
/**
-
* 方法描述:根据用户选择的维度编码和指标编码,生成Group中的key
-
* @param dimMap 维度编码
-
* @return key 对象
-
*/
-
public static DBObject generateFormulaKeyObject(HashMap dimMap)
-
{
-
DBObject key =
new BasicDBObject();
-
Iterator dimIt = dimMap.keySet().iterator();
-
while (dimIt.hasNext())
-
{
-
String dimId = (String)dimIt.next();
-
key.put(dimId,
true);
-
}
-
return key;
-
}
-
-
/**
-
* 方法描述:根据用户选择的维度编码和指标编码,生成Group中的属性初始化值
-
* @param dimMap 维度编码
-
* @return key 对象
-
*/
-
public static DBObject generateFormulaInitObject(HashMap indexMap)
-
{
-
DBObject initial =
new BasicDBObject();
-
//设置计算指标中使用的指标对应的统计值:sum、count、avg、max、min
-
Iterator indexIt = indexMap.keySet().iterator();
-
while (indexIt.hasNext())
-
{
-
DBObject index =
new BasicDBObject();
-
index.put(
"count",
0);
-
index.put(
"sum",
0);
-
index.put(
"max",
0);
-
index.put(
"min",
0);
-
index.put(
"avg",
0);
-
index.put(
"self",
0);
-
String indexId = (String)indexIt.next();
-
initial.put(indexId, index);
-
}
-
-
return initial;
-
}
-
-
/**
-
* 方法描述:根据用户选择的指标编码,生成Group中的reduce函数
-
* @param indexMap 指标编码
-
* @return reduce函数
-
*/
-
public static String generateFormulaReduceObject(HashMap indexMap)
-
{
-
StringBuffer reduceBuf =
new StringBuffer();
-
-
reduceBuf.append(
"function(doc, prev) {");
-
reduceBuf.append(
"var tempVal;");
-
Iterator indexIt = indexMap.keySet().iterator();
-
while (indexIt.hasNext())
-
{
-
String indexId = (String)indexIt.next();
-
//计算指标数量
-
reduceBuf.append(
"prev.").append(indexId).append(
".count ++;");
-
//计算指标总计
-
reduceBuf.append(
"if(isNaN(").append(
"prev.").append(indexId).append(
".sum").append(
")){");
-
reduceBuf.append(
"prev.").append(indexId).append(
".sum = 0;");
-
reduceBuf.append(
"}");
-
reduceBuf.append(
"prev.").append(indexId).append(
".sum += parseFloat(doc.").append(indexId).append(
");");
-
reduceBuf.append(
"if(isNaN(").append(
"prev.").append(indexId).append(
".self").append(
")){");
-
reduceBuf.append(
"prev.").append(indexId).append(
".self = 0;");
-
reduceBuf.append(
"}");
-
reduceBuf.append(
"prev.").append(indexId).append(
".self = parseFloat(doc.").append(indexId).append(
");");
-
-
reduceBuf.append(
"print(prev.").append(indexId).append(
".self);");
-
//计算指标最大值
-
reduceBuf.append(
"tempVal = parseFloat(doc.").append(indexId).append(
");");
-
reduceBuf.append(
"if(").append(
"prev.").append(indexId).append(
".max == 0").append(
"){");
-
reduceBuf.append(
"prev.").append(indexId).append(
".max = tempVal;");
-
reduceBuf.append(
"}else{");
-
reduceBuf.append(
"prev.").append(indexId).append(
".max = ");
-
reduceBuf.append(
"prev.").append(indexId).append(
".max > tempVal ? ");
-
reduceBuf.append(
"prev.").append(indexId).append(
".max : tempVal;");
-
reduceBuf.append(
"}");
-
//计算指标最小值
-
reduceBuf.append(
"if(").append(
"prev.").append(indexId).append(
".min == 0").append(
"){");
-
reduceBuf.append(
"prev.").append(indexId).append(
".min = tempVal;");
-
reduceBuf.append(
"}else{");
-
reduceBuf.append(
"prev.").append(indexId).append(
".min = ");
-
reduceBuf.append(
"prev.").append(indexId).append(
".min < tempVal ? ");
-
reduceBuf.append(
"prev.").append(indexId).append(
".min : tempVal;");
-
reduceBuf.append(
"}");
-
//计算指标的平均值
-
reduceBuf.append(
"prev.").append(indexId).append(
".avg = ");
-
reduceBuf.append(
"prev.").append(indexId).append(
".sum");
-
reduceBuf.append(
" / ");
-
reduceBuf.append(
"prev.").append(indexId).append(
".count;");
-
}
-
reduceBuf.append(
"}");
-
-
return reduceBuf.toString();
-
}
-
-
/**
-
* 方法描述:根据用户选择的指标编码,生成MapReduce中的finalize函数
-
* @param indexMap 指标编码
-
* @return reduce函数
-
*/
-
public static String generateFormulaFinalizeObject(HashMap forIdxMap, HashMap indexMap)
-
{
-
StringBuffer reduceBuf =
new StringBuffer();
-
reduceBuf.append(
"function(doc){");
-
//得到计算指标的公式运行值
-
Iterator formulaIt = forIdxMap.keySet().iterator();
-
while (formulaIt.hasNext())
-
{
-
String indexId = (String)formulaIt.next();
-
String idxFormula = (String)forIdxMap.get(indexId);
-
reduceBuf.append(
"var tempIdx, tempFormula;");
-
Iterator indexItB = indexMap.keySet().iterator();
-
int i =
0;
-
while (indexItB.hasNext())
-
{
-
String indexIdS = (String)indexItB.next();
-
if(i ==
0)
-
{
-
reduceBuf.append(
"tempFormula = \"").append(idxFormula).append(
"\";");
-
}
-
reduceBuf.append(
"tempIdx = ").append(
"doc.").append(indexIdS).append(
".sum;");
-
reduceBuf.append(
"tempFormula = ").append(
"tempFormula").append(
".replace(/sum\\(").append(indexIdS).append(
"\\)/g,tempIdx);");
-
reduceBuf.append(
"tempIdx = ").append(
"doc.").append(indexIdS).append(
".count;");
-
reduceBuf.append(
"tempFormula = ").append(
"tempFormula").append(
".replace(/count\\(").append(indexIdS).append(
"\\)/g,tempIdx);");
-
reduceBuf.append(
"tempIdx = ").append(
"doc.").append(indexIdS).append(
".min;");
-
reduceBuf.append(
"tempFormula = ").append(
"tempFormula").append(
".replace(/min\\(").append(indexIdS).append(
"\\)/g,tempIdx);");
-
reduceBuf.append(
"tempIdx = ").append(
"doc.").append(indexIdS).append(
".max;");
-
reduceBuf.append(
"tempFormula = ").append(
"tempFormula").append(
".replace(/max\\(").append(indexIdS).append(
"\\)/g,tempIdx);");
-
reduceBuf.append(
"tempIdx = ").append(
"doc.").append(indexIdS).append(
".avg;");
-
reduceBuf.append(
"tempFormula = ").append(
"tempFormula").append(
".replace(/avg\\(").append(indexIdS).append(
"\\)/g,tempIdx);");
-
i++;
-
}
-
reduceBuf.append(
"var resTemp = ").append(
"eval(tempFormula);");
-
reduceBuf.append(
"doc.").append(indexId).append(
" = resTemp.toFixed(2);");
-
}
-
-
Iterator indexItC = indexMap.keySet().iterator();
-
while (indexItC.hasNext())
-
{
-
String indexId = (String)indexItC.next();
-
reduceBuf.append(
"doc.").append(indexId).append(
" = doc.").append(indexId).append(
".self;");
-
}
-
reduceBuf.append(
"}");
-
-
return reduceBuf.toString();
-
}
-
}