为了总结聊天室的后续行动,问题实际上与find()查询相关,该查询正在扫描所有~500k文档以查找15:
db.tweet_data.find({
$or:
[
{ in_reply_to_screen_name: /^kunalnayyar$/i, handle: /^kaleycuoco$/i, id: { $gt: 0 } },
{ in_reply_to_screen_name: /^kaleycuoco$/i, handle: /^kunalnayyar$/i, id: { $gt: 0 } }
],
in_reply_to_status_id_str: { $ne: null }
} ).explain()
{
"cursor" : "BtreeCursor id_1",
"nscanned" : 523248,
"nscannedObjects" : 523248,
"n" : 15,
"millis" : 23682,
"nYields" : 0,
"nChunkSkips" : 0,
"isMultiKey" : false,
"indexOnly" : false,
"indexBounds" : {
"id" : [
[
0,
1.7976931348623157e+308
]
]
}
}
建议的方法:
>创建小写handle_lc和inreply_lc字段以进行搜索
>在这些上添加compound index:
db.tweet.ensureIndex({handle_lc:1,inreply_lc:1})
>复合索引的顺序允许通过句柄或(句柄,in_reply_to)有效地查找所有推文
>按完全匹配而不是正则表达式搜索:
db.tweet_data.find({ $或: [ {in_reply_to_screen_name:’kunalnayyar’,句柄:’kaleycuoco’,id:{$gt:0}}, {in_reply_to_screen_name:’kaleycuoco’,句柄:’kunalnayyar’,id:{$gt:0}} ] })