文档结构:
Spcode
Spname
Consignid
Consname
Region
Regionname
Serviceid
Servicename
Srctermid
Logtime
>每天要做两次分组
#SQL
#天分组
SELECT Spcode, Spname, Consignid, Consname, Region,
Regionname, Serviceid, Servicename, Srctermid
FROM mo_log_201208
WHERE Logtime > "20120823" AND Logtime < "20120824"
GROUP BY Spcode, Spname, Consignid, Consname, Region, Regionname, Serviceid, Servicename
#MongoDB
#天分组
res = db.runCommand({
mapreduce:'mo_log_201208',
query:{Logtime:{$gte:'20120823', $lte:'20120824'}},
map:function() {
emit({Spcode:this.Spcode, Spname:this.Spname,
Consignid:this.Consignid, Consname:this.Consname,
Region:this.Region, Regionname:this.Regionname,
Serviceid:this.Serviceid,
Servicename:this.Servicename,
Srctermid:this.Srctermid}, {count:1});
},
reduce:function(key, value) {
var ret = {count:0};
ret.count++;
return ret;
},
out:'tmp_mo_spcode_consignid_region_serviceid_201208_1',
verbose:true
})
#SQL
#月分组
SELECT Spcode, Spname, Consignid, Consname, Region,
Regionname, Serviceid, Servicename, Srctermid
FROM mo_log_201208
GROUP BY Spcode, Spname, Consignid, Consname, Region, Regionname, Serviceid, Servicename
#MongoDB
#月分组
res = db.runCommand({
mapreduce:'mo_log_201208',
map:function() {
emit({Spcode:this.Spcode, Spname:this.Spname,
Consignid:this.Consignid, Consname:this.Consname,
Region:this.Region, Regionname:this.Regionname,
Serviceid:this.Serviceid, Servicename:this.Servicename,
Srctermid:this.Srctermid}, {count:1});
},
reduce:function(key, value) {
var ret = {count:0};
ret.count++;
return ret;
},
out:'tmp_mo_spcode_consignid_region_serviceid_201208',
verbose:true
})
>随着每天数据量的不断增长, 月分组的执行时间会不断的增加~~
>为了减少重复的分组操作, 降低分组时间, 用天分组的数据来迭代月分组的数据
#MongoDB
res = db.runCommand({
mapreduce:'mo_log_201208',
query:{Logtime:{$gte:'20120823', $lte:'20120824'}},
map:function() {
emit({Spcode:this.Spcode, Spname:this.Spname,
Consignid:this.Consignid, Consname:this.Consname,
Region:this.Region, Regionname:this.Regionname,
Serviceid:this.Serviceid, Servicename:this.Servicename,
Srctermid:this.Srctermid}, {count:1});
},
reduce:function(key, value) {
var ret = {count:0};
ret.count++;
return ret;
},
finalize:function(key, value){
db.tmp_mo_spcode_consignid_region_serviceid_201208.insert({"_id":key, "value":value});
return value;
},
out:'tmp_mo_spcode_consignid_region_serviceid_201208_1',
verbose:true
})
>Mongodb缺省的主键是_id, 在使用insert或者save的时候, 不会产生重复数据
>insert和save的区别是
>>insert:当主键重复的时候, 放弃操作
>>save:当主键重复的时候,执行更新操作