MongoDB 之MapReduce统计关键字频率测试

测试环境:windows xp

Operating System: Windows XP Professional (5.1, Build 2600) Service Pack 3 (2600.xpsp_sp3_gdr.101209-1647)
Language: Chinese (Regional Setting: Chinese)
Processor: Pentium(R) Dual-Core CPU E5500 @ 2.80GHz (2 CPUs)
Memory: 3292MB RAM
测试结果:
1079844 条数据统计出10957个关键字排序取前100条记录,总耗时:308578毫秒

测试程序:

import java.io.BufferedReader;
import java.io.FileReader;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;

import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MapReduceCommand;
import com.mongodb.MapReduceOutput;
import com.mongodb.Mongo;
import com.mongodb.MongoException;

/**
*
*/

/**
* *
*/
public class Test4MongoDb {

/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
//insertTestKeywordLog();

calculateSearchKeyword();

}

/**
* 生成测试数据
*/
private static void insertTestKeywordLog() {
List<String> keyWordList = new ArrayList<String>();
try {
BufferedReader reader = new BufferedReader(new FileReader(
"d:\\pinyin.txt"));
String line = null;
Random ran = new Random(System.currentTimeMillis());
Mongo m;
int totalRows = 0;
long start = 0;
long end = 0;
m = new Mongo("localhost", 9999);
DB db = m.getDB("test");
DBCollection collection = db.getCollection("t_log");
String month = "02";
String year = "2010";
start = System.currentTimeMillis();
while ((line = reader.readLine()) != null) {

int insertCount = ran.nextInt(100);
if (insertCount == 0) {
insertCount = 1;
}
totalRows += insertCount;
for (int i = 0; i < insertCount; i++) {

DBObject record = new BasicDBObject();
record.put("id", System.currentTimeMillis());
record.put("keyword", line);

int day = ran.nextInt(28);
if (day == 0) {
day = 1;
}
record.put("dd", year + "-" + month
+ (day < 10 ? "-0" + day : "-" + day));
collection.save(record);

}
}

end = System.currentTimeMillis();
System.out.println("insert time =" + (end - start) + " row count="
+ totalRows);

} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

/**
* 统计查询关键字频率
*/
private static void calculateSearchKeyword() {
long start = 0;
long end = 0;
Mongo m;
try {
m = new Mongo("localhost", 9999);
DB db = m.getDB("test");
DBCollection collection = db.getCollection("t_log");

DBObject newDB = new BasicDBObject();
newDB.put("max", 100000);

String collectionName = "t_log_result_"
+ System.currentTimeMillis();
DBCollection resultCollection = db.createCollection(collectionName,
newDB);

//创建统计数量索引
DBObject indexObject = new BasicDBObject();
indexObject.put("hitCount", -1);
resultCollection.createIndex(indexObject);
//DBCollection resultCollection = db.getCollection("t_log_result_"+System.currentTimeMillis());
start = System.currentTimeMillis();

DBObject dbKey = new BasicDBObject();
dbKey.put("dd", true);
//查询符合条件的数据
DBObject condition = new BasicDBObject();
condition.put("dd", new BasicDBObject("$gte", "2010-02-01").append(
"$lte", "2010-02-28"));

//定义map
String map = "function() { key=this.keyword; "
+ " emit(key,{'count':1}); " + "}";
//定义reduce
String reduce = " function r( key, values ) { " + " var count=0;"
+ " for ( var i = 0; i < values.length; i++ ){"
+ " count += values[i].count;" + " }"
+ " return count;} ";
///Map<String,Object> scope = new HashMap<String,Object>();

MapReduceCommand mr = new MapReduceCommand(collection, map, reduce,
null, MapReduceCommand.OutputType.INLINE, condition);

int resultCount = 0;
MapReduceOutput out = collection.mapReduce(mr);
//获取统计结果
for (DBObject result : out.results()) {
Double value = null;
if (result.get("value") != null
&& result.get("value") instanceof DBObject) {

DBObject dbObj = (DBObject) result.get("value");
value = (Double) dbObj.get("count");

} else {
value = (Double) result.get("value");
}
String found = (String) result.get("_id");
DBObject keywordObject = new BasicDBObject();
keywordObject.put("hitCount", value);
keywordObject.put("keyword", found);
//记录到统计结果表中
resultCollection.save(keywordObject);
resultCount++;

}
DBObject query = new BasicDBObject();
DBObject orderBy = new BasicDBObject();
orderBy.put("hitCount", -1);
//取top100
DBCursor cursor = resultCollection.find().sort(orderBy).limit(100);
while (cursor.hasNext()) {
System.out.println(cursor.next());
}
end = System.currentTimeMillis();
System.out.println("total time =" + (end - start)
+ " total row count=" + resultCount);

} catch (UnknownHostException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (MongoException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值