1. KeywordsStaticsModel 数据模型
private String id;
private String keyword; //关键词
private String keywordUuid;//keyword + datestr 的md5
//搜索相关
private int sessionCount = 0;
private int adsCount = 0;
//点击相关
private int clickCount = 0;
private int yahooClick = 0;
private float score;//排序分数
private long createTime; //创建时间
private String createTimeStr; //记录当前访问时间
2. mapreduce 统计 每个关键字的sessionCount总数
//Query query = new Query();
query.addCriteria(Criteria.where("createTime").gte(timeStart).lt(timeEnd));
String mapFunction = "function(){" +
"emit(this.keyword,{" +
"sessionCount:this.sessionCount," +
"adsCount:this.adsCount," +
"amazonClick:this.amazonClick," +
"becomeClick:this.becomeClick," +
"clickCount:this.clickCount," +
"rakutenClick:this.rakutenClick})" +
"}";
String reduceFunction = "function(key, values){" +
"var x = {sessionCount:0,adsCount:0,clickCount:0,yahooClick:0,yahooActionClick:0};" +
"values.forEach(function(val){ " +
"x.sessionCount+=val.sessionCount; "+
"x.adsCount += val.adsCount; "+
"x.clickCount += val.clickCount; "+
"});" +
"return x;}";
long startTime = System.currentTimeMillis();
MapReduceResults<KeywordsStaticsModel> asd = getMongoTemplate().mapReduce(query, "keywordsStaticsModel", mapFunction, reduceFunction, KeywordsStaticsModel.class);
long endTime = System.currentTimeMillis();
System.out.println("spend time ==>"+(endTime-startTime));
3. group 的 统计 每个关键字的sessionCount总数,注意最多只能处理20000条数据。
Criteria criteria = Criteria.where("createTime").gte(timeStart).lt(timeEnd);
String reduceFunction = "function(obj,prev){" +
"prev.sessionCountSum += obj.sessionCount;" +
"prev.adsCountSum += obj.adsCount;}";
GroupBy groupBy = new GroupBy("keyword");
groupBy.reduceFunction(reduceFunction);
DBObject dbObject = new BasicDBObject();
dbObject.put("sessionCountSum", 0);
dbObject.put("adsCountSum", 0);
groupBy.initialDocument(dbObject);
GroupByResults<KeywordsStaticsModel> ttt = getMongoTemplate().group(criteria,"keywordsStaticsModel", groupBy, KeywordsStaticsModel.class);
DBObject resultSet = ttt.getRawResults();
System.out.println("count==>"+resultSet.get("count"));
System.out.println("keys==>"+resultSet.get("keys"));