在之前的2篇文章中分别分析了mongod和mongo的启动流程,下面开始将分析mongodb的查询,由于查询部分流程比较长,将分成mongo端的请求,mongod端的数据库的加载,mongod query的选取,mongod文档的匹配与数据的响应几部分来分析。
首先进入mongo的查询请求部分.mongo的查询请求部分归纳起来很简单就是将请求分装成一个Message结构,然后将其发送到服务端,等待服务端的相应数据,取得数据最后显示结果.下面来看具体流程分析吧.
当我们点击db.coll.find({x:1})时按照上一篇文章的讲解,我们首先来到了mongo/shell/dbshell.cpp
- if ( ! wascmd ) {
- try {
- if ( scope->exec( code.c_str() , "(shell)" , false , true , false ) )//执行相应的javascript代码
- scope->exec( "shellPrintHelper( __lastres__ );" , "(shell2)" , true , true , false );
- }
- catch ( std::exception& e ) {
- cout << "error:" << e.what() << endl;
- }
- }
- //这里因为我们只设置了query,所以其它选项都是空的,this.getQueryOptions()目前只有一个SlaveOK的option,在replset模式下是不能查询secondary服务器的,需要调用rs.SlaveOK()之后才能对secondary进行查询,其执行SlaveOK后每次查询时都会添加一个QueryOption.
- DBCollection.prototype.find = function (query, fields, limit, skip, batchSize, options) {
- return new DBQuery( this._mongo , this._db , this ,
- this._fullName , this._massageObject( query ) , fields , limit , skip , batchSize , options || this.getQueryOptions() );
- }
- JSBool dbquery_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
- try {
- smuassert( cx , "DDQuery needs at least 4 args" , argc >= 4 );
- n style="white-space:pre"> </span> //整个代码都是创建一个DBQuery对象,并未进行任何的查询请求动作
- Convertor c(cx);
- c.setProperty( obj , "_mongo" , argv[0] );
- c.setProperty( obj , "_db" , argv[1] );
- c.setProperty( obj , "_collection" , argv[2] );
- c.setProperty( obj , "_ns" , argv[3] );
- if ( argc > 4 && JSVAL_IS_OBJECT( argv[4] ) )
- c.setProperty( obj , "_query" , argv[4] );
- else {
- JSObject * temp = JS_NewObject( cx , 0 , 0 , 0 );
- CHECKNEWOBJECT( temp, cx, "dbquery_constructor" );
- c.setProperty( obj , "_query" , OBJECT_TO_JSVAL( temp ) );
- }
- if ( argc > 5 && JSVAL_IS_OBJECT( argv[5] ) )
- c.setProperty( obj , "_fields" , argv[5] );
- else
- c.setProperty( obj , "_fields" , JSVAL_NULL );
- if ( argc > 6 && JSVAL_IS_NUMBER( argv[6] ) )
- c.setProperty( obj , "_limit" , argv[6] );
- else
- c.setProperty( obj , "_limit" , JSVAL_ZERO );
- if ( argc > 7 && JSVAL_IS_NUMBER( argv[7] ) )
- c.setProperty( obj , "_skip" , argv[7] );
- else
- c.setProperty( obj , "_skip" , JSVAL_ZERO );
- if ( argc > 8 && JSVAL_IS_NUMBER( argv[8] ) )
- c.setProperty( obj , "_batchSize" , argv[8] );
- else
- c.setProperty( obj , "_batchSize" , JSVAL_ZERO );
- if ( argc > 9 && JSVAL_IS_NUMBER( argv[9] ) )
- c.setProperty( obj , "_options" , argv[9] );
- else
- c.setProperty( obj , "_options" , JSVAL_ZERO );
- c.setProperty( obj , "_cursor" , JSVAL_NULL );
- c.setProperty( obj , "_numReturned" , JSVAL_ZERO );
- c.setProperty( obj , "_special" , JSVAL_FALSE );
- }
- catch ( const AssertionException& e ) {
- if ( ! JS_IsExceptionPending( cx ) ) {
- JS_ReportError( cx, e.what() );
- }
- return JS_FALSE;
- }
- catch ( const std::exception& e ) {
- log() << "unhandled exception: " << e.what() << ", throwing Fatal Assertion" << endl;
- fassertFailed( 16323 );
- }
- return JS_TRUE;
- }
- try {
- if ( scope->exec( code.c_str() , "(shell)" , false , true , false ) )//执行相应的javascript代码
- scope->exec( "shellPrintHelper( __lastres__ );" , "(shell2)" , true , true , false );
- }
- bool exec( const StringData& code,const string& name = "(anon)",bool printResult = false,bool reportError = true, bool assertOnError = true,int timeoutMs = 0 ) {
- JSBool worked = JS_EvaluateScript( _context,
- _global,
- code.data(),
- code.size(),
- name.c_str(),
- 1,
- &ret );
- if ( worked )
- _convertor->setProperty( _global , "__lastres__" , ret );
- }
- shellPrintHelper = function (x) {
- if (typeof (x) == "undefined") {
- // Make sure that we have a db var before we use it
- // TODO: This implicit calling of GLE can cause subtle, hard to track issues - remove?
- if (__callLastError && typeof( db ) != "undefined" && db.getMongo ) {
- __callLastError = false;
- // explicit w:1 so that replset getLastErrorDefaults aren't used here which would be bad.
- var err = db.getLastError(1);
- if (err != null) {
- print(err);
- }
- }
- return;
- }
- if (x == __magicNoPrint)
- return;
- if (x == null) {
- print("null");
- return;
- }
- if (typeof x != "object")
- return print(x);
- var p = x.shellPrint;//我们这里是DBQuery对象,所以执行到这里,来到了DBQuery.shellPrint函数
- if (typeof p == "function")
- return x.shellPrint();
- var p = x.tojson;
- if (typeof p == "function")
- print(x.tojson());
- else
- print(tojson(x));
- }
- DBQuery.prototype.shellPrint = function(){//(mongo/util/query.js)
- try {
- var start = new Date().getTime();
- var n = 0;//还有查询结果并且输出数目小于shellBatchSize,循环打印结果
- while ( this.hasNext() && n < DBQuery.shellBatchSize ){//这里shellBatchSize定义为20
- var s = this._prettyShell ? tojson( this.next() ) : tojson( this.next() , "" , true );
- print( s );//调用native函数native_print打印结果
- n++;
- }
- if (typeof _verboseShell !== 'undefined' && _verboseShell) {
- var time = new Date().getTime() - start;
- print("Fetched " + n + " record(s) in " + time + "ms");
- }
- if ( this.hasNext() ){
- print( "Type \"it\" for more" );
- ___it___ = this;
- }
- else {
- ___it___ = null;
- }
- }
- catch ( e ){
- print( e );
- }
- }
- DBQuery.prototype.hasNext = function(){
- this._exec();
- if ( this._limit > 0 && this._cursorSeen >= this._limit )//超过了限制返回false,将不会再输出结果
- return false;
- var o = this._cursor.hasNext();
- return o;
- }
- DBQuery.prototype.next = function(){
- this._exec();
- var o = this._cursor.hasNext();
- if ( o )
- this._cursorSeen++;
- else
- throw "error hasNext: " + o;
- var ret = this._cursor.next();
- if ( ret.$err && this._numReturned == 0 && ! this.hasNext() )
- throw "error: " + tojson( ret );
- this._numReturned++;
- return ret;
- }
- DBQuery.prototype._exec = function(){//到这里终于到了this._mongo.find
- if ( ! this._cursor ){
- assert.eq( 0 , this._numReturned );
- this._cursor = this._mongo.find( this._ns , this._query , this._fields , this._limit , this._skip , this._batchSize , this._options );
- this._cursorSeen = 0;
- }
- return this._cursor;
- }
- JSBool mongo_find(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
- shared_ptr< DBClientWithCommands > * connHolder = (shared_ptr< DBClientWithCommands >*)JS_GetPrivate( cx , obj );
- smuassert( cx , "no connection!" , connHolder && connHolder->get() );
- DBClientWithCommands *conn = connHolder->get();
- Convertor c( cx );
- string ns = c.toString( argv[0] );
- BSONObj q = c.toObject( argv[1] );
- BSONObj f = c.toObject( argv[2] );
- int nToReturn = (int) c.toNumber( argv[3] );
- int nToSkip = (int) c.toNumber( argv[4] );
- int batchSize = (int) c.toNumber( argv[5] );
- int options = (int)c.toNumber( argv[6] );//上面一篇文章我们分析到这里的conn其实是由ConnectionString::connect函数返回的,其返回的对象指针可能是:DBClientConnection对应Master,也就是只设置了一个地址,DBClientReplicaSet对应pair或者set模式,SyncClusterConnection对应sync模式,继续分析流程我们选择最简单的Master模式,只有一个地址的服务端
- auto_ptr<DBClientCursor> cursor = conn->query( ns , q , nToReturn , nToSkip , f.nFields() ? &f : 0 , options , batchSize );
- if ( ! cursor.get() ) {
- log() << "query failed : " << ns << " " << q << " to: " << conn->toString() << endl;
- JS_ReportError( cx , "error doing query: failed" );
- return JS_FALSE;
- }
- JSObject * mycursor = JS_NewObject( cx , &internal_cursor_class , 0 , 0 );
- CHECKNEWOBJECT( mycursor, cx, "internal_cursor_class" );
- verify( JS_SetPrivate( cx , mycursor , new CursorHolder( cursor, *connHolder ) ) );
- *rval = OBJECT_TO_JSVAL( mycursor );
- return JS_TRUE;
- }
- auto_ptr<DBClientCursor> DBClientBase::query(const string &ns, Query query, int nToReturn,
- int nToSkip, const BSONObj *fieldsToReturn, int queryOptions , int batchSize ) {
- auto_ptr<DBClientCursor> c( new DBClientCursor( this,//根据传入的参数创建一个DBClientCursor对象
- ns, query.obj, nToReturn, nToSkip,
- fieldsToReturn, queryOptions , batchSize ) );
- if ( c->init() )//创建Message并向服务端发送查询请求
- return c;
- return auto_ptr< DBClientCursor >( 0 );
- }
- bool DBClientCursor::init() {
- Message toSend;
- _assembleInit( toSend );//构建将要发送的查询请求这是一个Message,具体来说Message负责发送数据,具体的数据是在MsgData中
- verify( _client );
- if ( !_client->call( toSend, *batch.m, false, &_originalHost ) ) {//实际的发送数据,同时这里发送了数据后会调用recv接收数据
- // log msg temp? //接收的数据同样是MsgData,同样由Message来管理
- log() << "DBClientCursor::init call() failed" << endl;
- return false;
- }
- if ( batch.m->empty() ) {
- // log msg temp?
- log() << "DBClientCursor::init message from call() was empty" << endl;
- return false;
- }
- dataReceived();//根据上面的batch.m收到的数据得出查询是否成功成功则设置cursorId,下一次请求时operation就变动为dbGetmore了.
- return true; //查询错误则抛出异常
- }
- auto_ptr<DBClientCursor> cursor = conn->query( ns , q , nToReturn , nToSkip , f.nFields() ? &f : 0 , options , batchSize );
- if ( ! cursor.get() ) {//这里得到了cursor
- log() << "query failed : " << ns << " " << q << " to: " << conn->toString() << endl;
- JS_ReportError( cx , "error doing query: failed" );
- return JS_FALSE;
- }
- JSObject * mycursor = JS_NewObject( cx , &internal_cursor_class , 0 , 0 );//将cursor封装成一个javascript对象,javascript就能
- CHECKNEWOBJECT( mycursor, cx, "internal_cursor_class" );//使用游标了
- verify( JS_SetPrivate( cx , mycursor , new CursorHolder( cursor, *connHolder ) ) );
- DBQuery.prototype.hasNext = function(){
- this._exec();
- if ( this._limit > 0 && this._cursorSeen >= this._limit )
- return false;
- var o = this._cursor.hasNext();//这里的cursor对应于上面的C++对象cursor,其hasNext函数对应的native函数为internal_cursor_hasNext,
- return o; //这是在mongo启动时初始化javascript环境时绑定的
- }
- JSBool internal_cursor_hasNext(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
- try {
- DBClientCursor *cursor = getCursor( cx, obj );
- *rval = cursor->more() ? JSVAL_TRUE : JSVAL_FALSE;//这里返回的就是是否还有数据,如果本地没有查询数据了,那么其会再构建一个
- } //dbGetmore的请求向服务器请求更多数据,还是没有则返回false,表示没有数据了
- catch ( const AssertionException& e ) {
- if ( ! JS_IsExceptionPending( cx ) ) {
- JS_ReportError( cx, e.what() );
- }
- return JS_FALSE;
- }
- catch ( const std::exception& e ) {
- log() << "unhandled exception: " << e.what() << ", throwing Fatal Assertion" << endl;
- fassertFailed( 16290 );
- }
- return JS_TRUE;
- }
- DBQuery.prototype.shellPrint = function(){
- try {
- var start = new Date().getTime();
- var n = 0;
- while ( this.hasNext() && n < DBQuery.shellBatchSize ){//前面分析这里hasNext对应C++函数internal_cursor_hasNext,next对应
- var s = this._prettyShell ? tojson( this.next() ) : tojson( this.next() , "" , true );//internal_cursor_next,怎么得到的数据不再分析
- print( s );//调用native_print打印结果
- n++;
- }
- DBQuery.prototype.sort = function( sortBy ){//可以看到,这里只是在查询时增加了相应的查询请求而已
- return this._addSpecial( "orderby" , sortBy );
- }
- DBQuery.prototype.hint = function( hint ){
- return this._addSpecial( "$hint" , hint );
- }
- DBQuery.prototype.min = function( min ) {
- return this._addSpecial( "$min" , min );
- }
- DBQuery.prototype.max = function( max ) {
- return this._addSpecial( "$max" , max );
- }
- DBQuery.prototype.showDiskLoc = function() {
- return this._addSpecial( "$showDiskLoc" , true);
- }
- DBQuery.prototype.count = function( applySkipLimit ){//而这里count这种是变更了查询的,这里是向服务器发送了一个count命令,执行的并不是
- var cmd = { count: this._collection.getName() };//查询请求
- if ( this._query ){
- if ( this._special )
- cmd.query = this._query.query;
- else
- cmd.query = this._query;
- }
- cmd.fields = this._fields || {};
- if ( applySkipLimit ){
- if ( this._limit )
- cmd.limit = this._limit;
- if ( this._skip )
- cmd.skip = this._skip;
- }
- var res = this._db.runCommand( cmd );
- if( res && res.n != null ) return res.n;
- throw "count failed: " + tojson( res );
- }
- DBQuery.prototype.size = function(){
- return this.count( true );
- }
总结,本文分析了mongodb自带的mongo客户端发起的查询请求以及结果的打印过程,
mongodb源码分析(五)查询2之mongod的数据库加载
上一篇文章分析到了客户端查询请求的发送,接着分析服务端的处理动作,分析从服务端响应开始到数据库
正确加载止,主要流程为数据库的读入过程与用户的认证.
mongod服务对于客户端请求的处理在mongo/db/db.cpp MyMessageHandler::process中,其中调用了
函数assembleResponse完成请求响应,我们就从这个函数开始入手分析,代码很长,删除一些支流或者不相关的代码.
- void assembleResponse( Message &m, DbResponse &dbresponse, const HostAndPort& remote ) {
- if ( op == dbQuery ) {
- if( strstr(ns, ".$cmd") ) {
- isCommand = true;
- opwrite(m);//写入诊断用的log,默认loglevel为0,未开启,需要开启启动时加入--diaglog x,0 = off; 1 = writes, 2 = reads, 3 = both
- if( strstr(ns, ".$cmd.sys.") ) {//7 = log a few reads, and all writes.
- if( strstr(ns, "$cmd.sys.inprog") ) {
- inProgCmd(m, dbresponse);//查看当前进度的命令
- return;
- }
- if( strstr(ns, "$cmd.sys.killop") ) {
- killOp(m, dbresponse);//终止当前操作
- return;
- }
- if( strstr(ns, "$cmd.sys.unlock") ) {
- unlockFsync(ns, m, dbresponse);
- return;
- }
- }
- }
- else {
- opread(m);
- }
- }
- else if( op == dbGetMore ) {
- opread(m);
- }
- else {
- opwrite(m);
- }
- long long logThreshold = cmdLine.slowMS;//启动的时候设置的参数默认是100ms,当操作超过了这个时间且启动时设置--profile为1或者2
- bool shouldLog = logLevel >= 1;//时mongodb将记录这次慢操作,1为只记录慢操作,即操作时间大于了设置的slowMS,2表示记录所有操作
- if ( op == dbQuery ) { //可通过--slowms设置slowMS
- if ( handlePossibleShardedMessage( m , &dbresponse ) )//这里和shard有关,以后会的文章会讲到
- return;
- receivedQuery(c , dbresponse, m );//真正的查询入口
- }
- else if ( op == dbGetMore ) {//已经查询了数据,这里只是执行得到更多数据的入口
- if ( ! receivedGetMore(dbresponse, m, currentOp) )
- shouldLog = true;
- }
- if ( op == dbKillCursors ) {
- currentOp.ensureStarted();
- logThreshold = 10;
- receivedKillCursors(m);
- }
- else if ( op == dbInsert ) {//插入操作入口
- receivedInsert(m, currentOp);
- }
- else if ( op == dbUpdate ) {//更新操作入口
- receivedUpdate(m, currentOp);
- }
- else if ( op == dbDelete ) {//删除操作入口
- receivedDelete(m, currentOp);
- }
- if ( currentOp.shouldDBProfile( debug.executionTime ) ) {//该操作将被记录,原因可能有二:一,启动时设置--profile 2,则所有操作将被
- // performance profiling is on //记录.二,启动时设置--profile 1,且操作时间超过了默认的slowMs,那么操作将被 else {//这个地方if部分被删除了,就是在不能获取锁的状况下不记录该操作的代码
- Lock::DBWrite lk( currentOp.getNS() );//记录具体记录操作,就是在xxx.system.profile集合中插入该操作的具体记录
- if ( dbHolder()._isLoaded( nsToDatabase( currentOp.getNS() ) , dbpath ) ) {
- Client::Context cx( currentOp.getNS(), dbpath, false );
- profile(c , currentOp );
- }
- }
- }
前进到receivedQuery,其解析了接收到的数据,然后调用runQuery负责处理查询,然后出来runQuery抛出的异常,直接进入runQuery.
- string runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) {
- red_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) );
- if ( pq.couldBeCommand() ) {//这里表明这是一个命令,关于mongodb的命令的讲解这里有一篇文章,我就不再分析了.
- BSONObjBuilder cmdResBuf;//<a href="http://www.cnblogs.com/daizhj/archive/2011/04/29/mongos_command_source_code.html">http://www.cnblogs.com/daizhj/archive/2011/04/29/mongos_command_source_code.html</a>
- if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ){}
- bool explain = pq.isExplain();//这里的explain来自这里db.coll.find().explain(),若使用了.explain()则为true,否则false
- BSONObj order = pq.getOrder();
- BSONObj query = pq.getFilter();
- // Run a simple id query.
- if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) {
- if ( queryIdHack( ns, query, pq, curop, result ) ) {//id查询的优化
- return "";
- }
- }
- bool hasRetried = false;
- while ( 1 ) {//这里的ReadContext这这篇文章的主角,其内部在第一次锁数据库时完成了数据库的加载动作
- Client::ReadContext ctx( ns , dbpath ); // read locks
- replVerifyReadsOk(&pq);//还记得replset模式中无法查询secondary服务器吗,就是在这里限制的
- BSONObj oldPlan;
- if ( ! hasRetried && explain && ! pq.hasIndexSpecifier() ) {
- scoped_ptr<MultiPlanScanner> mps( MultiPlanScanner::make( ns, query, order ) );
- oldPlan = mps->cachedPlanExplainSummary();
- }//这里才是真正的查询,其内部很复杂,下一篇文章将讲到
- return queryWithQueryOptimizer( queryOptions, ns, jsobj, curop, query, order,
- pq_shared, oldPlan, shardingVersionAtStart,
- pgfs, npfe, result );
- }
- }
- }
- Client::ReadContext::ReadContext(const string& ns, string path, bool doauth ) {
- {
- lk.reset( new Lock::DBRead(ns) );//数据库锁,这里mongodb的锁机制本文将不会涉及到,感兴趣的自己分析
- Database *db = dbHolder().get(ns, path);
- if( db ) {//第一次加载时显然为空
- c.reset( new Context(path, ns, db, doauth) );
- return;
- }
- }
- if( Lock::isW() ) { //全局的写锁
- // write locked already
- DEV RARELY log() << "write locked on ReadContext construction " << ns << endl;
- c.reset( new Context(ns, path, doauth) );
- }
- else if( !Lock::nested() ) {
- lk.reset(0);
- {
- Lock::GlobalWrite w;//加入全局的写锁,这里是真正的数据库加载地点
- Context c(ns, path, doauth);
- }
- // db could be closed at this interim point -- that is ok, we will throw, and don't mind throwing.
- lk.reset( new Lock::DBRead(ns) );
- c.reset( new Context(ns, path, doauth) );
- }
- }
- Client::Context::Context(const string& ns, string path , bool doauth, bool doVersion ) :
- _client( currentClient.get() ),
- _oldContext( _client->_context ),
- _path( path ),
- _justCreated(false), // set for real in finishInit
- _doVersion(doVersion),
- _ns( ns ),
- _db(0)
- {
- _finishInit( doauth );
- }
- void Client::Context::_finishInit( bool doauth ) {
- _db = dbHolderUnchecked().getOrCreate( _ns , _path , _justCreated );//读取或者创建数据库
- checkNsAccess( doauth, writeLocked ? 1 : 0 );//认证检查
- }
- Database* DatabaseHolder::getOrCreate( const string& ns , const string& path , bool& justCreated ) {
- string dbname = _todb( ns );//将test.coll这种类型的字符串转换为test
- {
- SimpleMutex::scoped_lock lk(_m);
- Lock::assertAtLeastReadLocked(ns);
- DBs& m = _paths[path];//在配置的路径中找到已经加载的数据库,直接返回
- {
- DBs::iterator i = m.find(dbname);
- if( i != m.end() ) {
- justCreated = false;
- return i->second;
- }
- }
- Database *db = new Database( dbname.c_str() , justCreated , path );//实际的数据读取
- {
- SimpleMutex::scoped_lock lk(_m);//数据库加载完成后按照路径数据库记录
- DBs& m = _paths[path];
- verify( m[dbname] == 0 );
- m[dbname] = db;
- _size++;
- }
- return db;
- }
- Database::Database(const char *nm, bool& newDb, const string& _path )
- : name(nm), path(_path), namespaceIndex( path, name ),
- profileName(name + ".system.profile")
- {
- try {
- newDb = namespaceIndex.exists();//查看xxx.ns文件是否存储,存在表示数据库已经创建
- // If already exists, open. Otherwise behave as if empty until
- // there's a write, then open.
- if (!newDb) {
- namespaceIndex.init();//加载具体的xxx.ns文件
- if( _openAllFiles )
- openAllFiles();//加载所有的数据文件xxx.0,xxx.1,xxx.2这种类型的文件
- }
- magic = 781231;
- }
- NOINLINE_DECL void NamespaceIndex::_init() {
- unsigned long long len = 0;
- boost::filesystem::path nsPath = path();//xxx.ns
- string pathString = nsPath.string();
- void *p = 0;
- if( boost::filesystem::exists(nsPath) ) {//如果存在该文件,则使用内存映射文件map该文件
- if( f.open(pathString, true) ) {//这里f为MongoMMF对象
- len = f.length();
- if ( len % (1024*1024) != 0 ) {
- log() << "bad .ns file: " << pathString << endl;
- uassert( 10079 , "bad .ns file length, cannot open database", len % (1024*1024) == 0 );
- }
- p = f.getView();//这里得到map的文件的指针
- }
- }
- else {
- // use lenForNewNsFiles, we are making a new database
- massert( 10343, "bad lenForNewNsFiles", lenForNewNsFiles >= 1024*1024 );
- maybeMkdir();
- unsigned long long l = lenForNewNsFiles;//创建具体的ns文件,默认大小是16M,可以用--nssize 来设置大小,MB为单位,只对新创建的数据库
- if( f.create(pathString, l, true) ) { //起作用
- getDur().createdFile(pathString, l); // always a new file
- len = l;
- verify( len == lenForNewNsFiles );
- p = f.getView();
- }
- }
- verify( len <= 0x7fffffff );
- ht = new HashTable<Namespace,NamespaceDetails>(p, (int) len, "namespace index");
- if( checkNsFilesOnLoad )
- ht->iterAll(namespaceOnLoadCallback);
- }
- bool MongoMMF::open(string fname, bool sequentialHint) {
- LOG(3) << "mmf open " << fname << endl;
- setPath(fname);
- _view_write = mapWithOptions(fname.c_str(), sequentialHint ? SEQUENTIAL : 0);//这里是真正的映射,
- return finishOpening();
- }
- bool MongoMMF::finishOpening() {
- if( _view_write ) {
- if( cmdLine.dur ) {//开启了journal功能后创建一个私有的map,这个日志功能我将以后专门写一篇文章分析.
- _view_private = createPrivateMap();
- if( _view_private == 0 ) {
- msgasserted(13636, str::stream() << "file " << filename() << " open/create failed in createPrivateMap (look in log for more information)");
- }
- privateViews.add(_view_private, this); // note that testIntent builds use this, even though it points to view_write then...
- }
- else {
- _view_private = _view_write;
- }
- return true;
- }
- return false;
- }
- ht = new HashTable<Namespace,NamespaceDetails>(p, (int) len, "namespace index");
NamespaceDetails : this is the "header" for a collection that has all its details.
It's in the .ns file and this is a memory mapped region (thus the pack pragma above).
- class NamespaceDetails {
- public:
- enum { NIndexesMax = 64, NIndexesExtra = 30, NIndexesBase = 10 };
- /*-------- data fields, as present on disk : */
- DiskLoc firstExtent;//记录第一个extent,在分析数据的插入时会具体讨论mongodb的存储
- DiskLoc lastExtent;//记录的最后一个extent
- /* NOTE: capped collections v1 override the meaning of deletedList.
- deletedList[0] points to a list of free records (DeletedRecord's) for all extents in
- the capped namespace.
- deletedList[1] points to the last record in the prev extent. When the "current extent"
- changes, this value is updated. !deletedList[1].isValid() when this value is not
- yet computed.
- */
- DiskLoc deletedList[Buckets];
- // ofs 168 (8 byte aligned)
- struct Stats {
- // datasize and nrecords MUST Be adjacent code assumes!
- long long datasize; // this includes padding, but not record headers
- long long nrecords;
- } stats;
- int lastExtentSize;
- int nIndexes;
- private:
- // ofs 192
- IndexDetails _indexes[NIndexesBase];//10个索引保存到这里,若1个集合索引超过10其它的索引以extra的形式存在,extra地址保存在下面的
- // ofs 352 (16 byte aligned) //extraOffset处
- int _isCapped; // there is wasted space here if I'm right (ERH)
- int _maxDocsInCapped; // max # of objects for a capped table. TODO: should this be 64 bit?
- double _paddingFactor; // 1.0 = no padding.
- // ofs 386 (16)
- int _systemFlags; // things that the system sets/cares about
- public:
- DiskLoc capExtent;
- DiskLoc capFirstNewRecord;
- unsigned short dataFileVersion; // NamespaceDetails version. So we can do backward compatibility in the future. See filever.h
- unsigned short indexFileVersion;
- unsigned long long multiKeyIndexBits;
- private:
- // ofs 400 (16)
- unsigned long long reservedA;
- long long extraOffset; // where the $extra info is located (bytes relative to this)
- public:
- int indexBuildInProgress; // 1 if in prog
- private:
- int _userFlags;
- char reserved[72];
- /*-------- end data 496 bytes */
_init函数执行完毕,网上回到Database::Database()函数:
- if( _openAllFiles )
- openAllFiles();//这里映射所有的xx.0,xx.1这种文件,记录映射的文件,映射的方式如同映射xx.ns,在开启了journal时同时保存两份地址.这里不再分析,感兴趣的自己研究吧
未通过将返回false,返回false,将导致mongod向客户端发送未认证信息,客户端的操作请求失败
- bool AuthenticationInfo::_isAuthorized(const string& dbname, Auth::Level level) const {
- if ( noauth ) {//启动时可--noauth设置为true,--auth设置为false,默认为false
- return true;
- }
- {
- scoped_spinlock lk(_lock);
- <span style="white-space:pre"> </span> //查询dbname这个数据库是否已经得到认证,这里的认证数据是在mongo启动时连接服务端认证通过后保存的
- if ( _isAuthorizedSingle_inlock( dbname , level ) )
- return true;
- if ( _isAuthorizedSingle_inlock( "admin" , level ) )
- return true;
- if ( _isAuthorizedSingle_inlock( "local" , level ) )
- return true;
- }
- return _isAuthorizedSpecialChecks( dbname );//若未通过上面的认证将会查看是否打开了_isLocalHostAndLocalHostIsAuthorizedForAll,也就是该连接是否是来自于本地连接.
- }
本文到这里结束,主要是搞清楚了mongod接收到来自客户端请求后的执行流程到数据库的加载,重要的
是明白ns文件的作用,普通数据文件xx.0,xx.1的映射,下一篇文章我们将继续分析查询请求的处理.
本文链接:http://blog.csdn.net/yhjj0108/article/details/8255968
作者: yhjj0108,杨浩
mongodb源码分析(六)查询3之mongod的cursor的产生
上一篇文章分析了mongod的数据库加载部分,下面这一篇文章将继续分析mongod cursor的产生,这里cursor
的生成应该是mongodb系统中最复杂的部分.下面先介绍几个关于mongodb的游标概念.
basicCursor: 直接扫描整个collection的游标,可设置初始的扫描位置,扫描为顺序扫描.
ReverseCursor: 反向扫描游标,相对于顺序扫描,这里是反向扫描.
ReverseCappedCursor: cap集合的反向扫描游标.
ForwardCappedCursor: cap集合的顺序扫描游标.
GeoCursorBase: 空间地理索引游标的基类,我并未阅读相关代码,感兴趣的自己研究吧.
BtreeCursor: mongodb的一般数据索引扫描游标,这个游标完成对于索引的扫描.
MultiCursor: 有待研究.
QueryOptimizerCursor: 经过优化的扫描游标,多plan扫描时或者对于查询中有$or的语句且$or语句其作用时由于
优化查询的游标. 这里将简单描述其流程.
1. 如果是类似这种db.coll.find()的查询则将直接返回一个BasicCursor的游标扫描全表.
2. 如果是简单的id查询如db.coll.find(_id:xxx),且允许_id查询plan的情况下直接查询_id索引,返回一个_id索引
的BtreeCursor.
3.根据查询整理出查询值的范围,作为优化查询范围的依据,如:db.coll.find({x:{$lt:100,$gt:20}}),那么这里其范围就是[20.100],这个范围只有在对应的变量是索引的情况下起作用,如x为其中的一个索引,那么这里的范围将帮助其
游标BtreeCursor首先直接将查询范围定位到[20,100]的位置,这个工作对于由Btree组织的索引来说很简单.简单
来说就是优化查询范围.但是若x不是索引那么这里得到的查询范围将是无用的,这里将返回一个BasicCursor的
游标执行全表扫描.
4.根据得到的所有的查询域的范围比如说x:[10,20],y:[4,6]这种选取查询计划(QueryPlan).查询计划的选取这里举个
例子,有x,y两个查询域.index有{x:1},{y:1},{x:1,y:1}这几个索引,那么选取计划时发现只有索引{x:1,y:1}完全满足查
询计划,其是最优的,那么确定选取这个索引为查询索引.返回唯一的QueryPlan,最后生成一个确切的
BtreeCursor.但是如果没有{x:1,y:1}这个索引怎么办呢?那么剩下两个索引{x:1},{y:1}都部分包含了查询域,他们
都是有作用的,于是乎生成了两个QueryPlan,一个对应于索引{x:1},一个对应于索引{y:1},于是乎使用
QueryOptimizerCursor这个cursor管理两个BtreeCursor,每次交替执行两个BtreeCursor的查询,直到一个
BtreeCursor查询完毕,那么这个plan是所有plan中最优的,将其缓存起来,下一次同样查询时直接选择这个plan作
为查询的plan.因为两个plan中首先完成扫描的plan查询的次数最少.那么两个plan都查询到的同一条满足查询要
求的数据怎么办,查询结尾会有一个对于满足要求的document地址的记录,如果一条满足要求的document的地址
已经在记录中了,就不再记录这个document.
5.$or查询的优化,对于一个$or举例来说明:{$or:[{x:1},{y:2},{z:3,a:4}]}这样的查询请求,这样要当$or中的每一个查
询域,中至少一个域是可用的索引比如说有索引x,y,a那么这个$or才是有意义的.如果这个$or有意义,那么这里将
使用QueryOptimizerCursor来处理每一个$or中的查询域,比如说{x:1},然后这里退化到4,plan的选取,$or中的查
询一个一个的执行.回过头来分析,如果没有索引y,那么对于这个$or的查询因为满足y:2的文档将会被返回,那么
只能扫描全表,这时即使有索引x,z或者a这种也不能避免全表的扫描,那么这里的$or就变得没有优化的意义了.
另外如果查询指定了sort(xxx:1)按照某些域排序或者设定了最大值最小值$or也是无意义的.
6. 查询结束后的排序,当指定了如db.coll.find({x:1,y:2}).sort(z:1),这种需要按照z升序排列的查询时,这种情况就要
考虑当没有索引z时,那么排序是肯定避免不了的,查询的结果会放到一个map中,map按照z的升序来排序,当排序
的文档总大小超过了默认热32M最大值时会返回错误,提醒你应该为z域建立索引了.下面来看有索引时的状况.
(1),索引为{x:1},{z:1},如果这里两个索引查询的文档数目一样多,那么优先选择{x:1},因为建立索引时其比较靠前,然
后还是得排序.
(2)索引{x:1,z:1},{z:1,x:1},由于第一个索引查出来的顺序是按照x的顺序来排列的,那么还是得排序,第二个索引不需
要排序,但是考虑最优QueryPlan的选取是找到最先执行完plan的索引,这里仍然不会选取{z:1,x:1}这个plan,而
是会选取{x:1,z:1}这个plan.考虑到两个索引还不直观,这里加入一个{x:1},{x:1,z:1},{z:1,x:1},那么这里将会选择第
一个索引{x:1}.要让mongod选择{z:1,x:1}这plan只能使用db.coll.find({x:{$lt:5,$gt:0}).sort({z:1}).hint({z:1,x:1}),
总觉得这是一个bug,mongod应该能够修正这种情况才对,应该能自己选择最优的索引{z:1,x:1}才对.这里有一篇
10gen的工程师谈mongodb索引优化的文章可以一看:
http://www.csdn.net/article/2012-11-09/2811690-optimizing-mongodb-compound
上面介绍了那么多的流程情况下面正式进入代码分析阶段.接上篇文章runQuery->queryWithQueryOptimizer
- string queryWithQueryOptimizer( int queryOptions, const string& ns,
- const BSONObj &jsobj, CurOp& curop,
- const BSONObj &query, const BSONObj &order,
- const shared_ptr<ParsedQuery> &pq_shared,
- const BSONObj &oldPlan,
- const ConfigVersion &shardingVersionAtStart,
- scoped_ptr<PageFaultRetryableSection>& parentPageFaultSection,
- scoped_ptr<NoPageFaultsAllowed>& noPageFault,
- Message &result ) {
- const ParsedQuery &pq( *pq_shared );
- shared_ptr<Cursor> cursor;
- QueryPlanSummary queryPlan;
- if ( pq.hasOption( QueryOption_OplogReplay ) ) {//用于oplog的回放的游标.
- cursor = FindingStartCursor::getCursor( ns.c_str(), query, order );
- }
- else {
- cursor =//本文的主要分析的部分,游标的获取
- NamespaceDetailsTransient::getCursor( ns.c_str(), query, order, QueryPlanSelectionPolicy::any(),
- 0, pq_shared, false, &queryPlan );
- }
- scoped_ptr<QueryResponseBuilder> queryResponseBuilder
- ( QueryResponseBuilder::make( pq, cursor, queryPlan, oldPlan ) );
- bool saveClientCursor = false;
- OpTime slaveReadTill;
- ClientCursor::Holder ccPointer( new ClientCursor( QueryOption_NoCursorTimeout, cursor,
- ns ) );
- for( ; cursor->ok(); cursor->advance() ) {
- bool yielded = false;//这里的查询机制,当查询时间超过了一个给定的值,这里为10ms或者在一段时间内调用该函数超过了128次,又或者cursor指向的文档不在内存中,那么这里将睡眠一会儿,睡眠的时间由当前系统中读取读者数目r和写者数目w,由10*r+w决定,单位为ms,最大值不超过1000000.
- if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered, &yielded ) ||//睡眠前会通知游标保存当前位置
- !cursor->ok() ) {//这里是睡眠完成后发现当前游标失效了
- cursor.reset();
- queryResponseBuilder->noteYield();
- // !!! TODO The queryResponseBuilder still holds cursor. Currently it will not do
- // anything unsafe with the cursor in handoff(), but this is very fragile.
- //
- // We don't fail the query since we're fine with returning partial data if the
- // collection was dropped.
- // NOTE see SERVER-2454.
- // TODO This is wrong. The cursor could be gone if the closeAllDatabases command
- // just ran.
- break;
- }
- if ( yielded ) {//发生过yield,这是由两种情况构成,要么关心的数据不在内存,要么
- queryResponseBuilder->noteYield();//clientCursor超过了ccPointer->_yieldSometimesTracker规定的yield时间
- }
- if ( pq.getMaxScan() && cursor->nscanned() > pq.getMaxScan() ) {//超过了用户查询时设置的最大的扫描扫描数目
- break;
- }
- if ( !queryResponseBuilder->addMatch() ) {//具体查询的文档的匹配过程,下一篇文章将介绍
- continue;
- }
- // Note slave's position in the oplog.
- if ( pq.hasOption( QueryOption_OplogReplay ) ) {
- BSONObj current = cursor->current();
- BSONElement e = current["ts"];
- if ( e.type() == Date || e.type() == Timestamp ) {
- slaveReadTill = e._opTime();
- }
- }
- if ( !cursor->supportGetMore() || pq.isExplain() ) {
- if ( queryResponseBuilder->enoughTotalResults() ) {
- break;
- }
- }
- else if ( queryResponseBuilder->enoughForFirstBatch() ) {
- // if only 1 requested, no cursor saved for efficiency...we assume it is findOne()
- if ( pq.wantMore() && pq.getNumToReturn() != 1 ) {
- queryResponseBuilder->finishedFirstBatch();
- if ( cursor->advance() ) {
- saveClientCursor = true;
- }
- }
- break;
- }
- }
- if ( cursor ) {
- if ( pq.hasOption( QueryOption_CursorTailable ) && pq.getNumToReturn() != 1 ) {
- cursor->setTailable();
- }
- // If the tailing request succeeded.
- if ( cursor->tailable() ) {
- saveClientCursor = true;
- }
- }
- int nReturned = queryResponseBuilder->handoff( result );
- ccPointer.reset();
- long long cursorid = 0;
- if ( saveClientCursor ) {//保存cursor下一次客户端请求调用dbGetmore时直接从这里读出游标
- // Create a new ClientCursor, with a default timeout.
- ccPointer.reset( new ClientCursor( queryOptions, cursor, ns,
- jsobj.getOwned() ) );
- cursorid = ccPointer->cursorid();
- DEV tlog(2) << "query has more, cursorid: " << cursorid << endl;
- if ( cursor->supportYields() ) {
- ClientCursor::YieldData data;
- ccPointer->prepareToYield( data );
- }
- else {
- ccPointer->c()->noteLocation();
- }
- // Save slave's position in the oplog.
- if ( pq.hasOption( QueryOption_OplogReplay ) && !slaveReadTill.isNull() ) {
- ccPointer->slaveReadTill( slaveReadTill );
- }
- if ( !ccPointer->ok() && ccPointer->c()->tailable() ) {
- DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl;
- }
- if( queryOptions & QueryOption_Exhaust ) {
- curop.debug().exhaust = true;
- }
- // Set attributes for getMore.
- ccPointer->setChunkManager( queryResponseBuilder->chunkManager() );
- ccPointer->setPos( nReturned );
- ccPointer->pq = pq_shared;
- ccPointer->fields = pq.getFieldPtr();
- ccPointer.release();
- }//返回结果集
- QueryResult *qr = (QueryResult *) result.header();
- qr->cursorId = cursorid;
- curop.debug().cursorid = ( cursorid == 0 ? -1 : qr->cursorId );
- qr->setResultFlagsToOk();
- // qr->len is updated automatically by appendData()
- curop.debug().responseLength = qr->len;
- qr->setOperation(opReply);
- qr->startingFrom = 0;
- qr->nReturned = nReturned;
- int duration = curop.elapsedMillis();
- bool dbprofile = curop.shouldDBProfile( duration );//记录查询命令
- if ( dbprofile || duration >= cmdLine.slowMS ) {
- curop.debug().nscanned = ( cursor ? cursor->nscanned() : 0LL );
- curop.debug().ntoskip = pq.getSkip();
- }
- curop.debug().nreturned = nReturned;
- return curop.debug().exhaust ? ns : "";
- }
继续来看游标的产生:
NamespaceDetailsTransient::getCursor->CursorGenerator::generate
- shared_ptr<Cursor> CursorGenerator::generate() {
- setArgumentsHint();//设置查询使用的索引,来自于db.coll.find({x:1}).hint({xx:1})的hint函数.mongodb提供一个snapshot的选项,当设置了snapshot时强制使用索引_id.这里有一篇文章介绍snapshot的特性:<a href="http://www.cnblogs.com/silentcross/archive/2011/07/04/2095424.html">http://www.cnblogs.com/silentcross/archive/2011/07/04/2095424.html</a>
- shared_ptr<Cursor> cursor = shortcutCursor();//这里要么查询是空的且排序是空的,则返回一个集合扫描的Cursor,要么是按照简单的_id查询
- if ( cursor ) { //返回一个_id索引的BtreeCursor.
- return cursor;
- }
- setMultiPlanScanner();//创建查询范围,生成plan.
- cursor = singlePlanCursor()//如果只有单个plan生成则根据plan生成对应的cursor
- if ( cursor ) {
- return cursor;
- }//多plan优化
- return newQueryOptimizerCursor( _mps, _planPolicy, isOrderRequired(), explain() );
- }
- void CursorGenerator::setMultiPlanScanner() {//基本这里所有的make都干两件事,1是分配对应的对象,二是调用其初始化函数init初始化
- _mps.reset( MultiPlanScanner::make( _ns, _query, _order, _parsedQuery, hint(),
- explain() ? QueryPlanGenerator::Ignore :
- QueryPlanGenerator::Use,
- min(), max() ) );
- }
- void MultiPlanScanner::init( const BSONObj &order, const BSONObj &min, const BSONObj &max ) {
- if ( !order.isEmpty() || !min.isEmpty() || !max.isEmpty() ) {
- _or = false;//_or是根据传入的query中是否有$or标志设置的
- }
- if ( _or ) {//这里的OrRangeGenerator和下面的FieldRangeSetPair其实都是用来确定查询域的范围的,通过这个范围来直接定位Btree的位置,跳过不必要的btree数据对象扫描,当没有相应的索引时,这里建立的查询域范围将是无用的.
- // Only construct an OrRangeGenerator if we may handle $or clauses.
- _org.reset( new OrRangeGenerator( _ns.c_str(), _query ) );
- if ( !_org->getSpecial().empty() ) {
- _or = false;
- }
- else if ( haveUselessOr() ) {//对于如{$or:[{x:1},{y:1}]},这里要存在index:x,y时_or才不会被设置_or=false,前面描述过程时说到过
- _or = false;
- }
- }
- // if _or == false, don't use or clauses for index selection
- if ( !_or ) {
- ++_i;//若query为{$or:[{a:1},{:1}]}这种以or开头的语句,那么frsp是无用的,具体见FieldRangeSet::handleMatchField
- auto_ptr<FieldRangeSetPair> frsp( new FieldRangeSetPair( _ns.c_str(), _query, true ) );
- updateCurrentQps( QueryPlanSet::make( _ns.c_str(), frsp, auto_ptr<FieldRangeSetPair>(),
- _query, order, _parsedQuery, _hint,
- _recordedPlanPolicy,
- min, max, true ) );
- }
- else {
- BSONElement e = _query.getField( "$or" );
- massert( 13268, "invalid $or spec",
- e.type() == Array && e.embeddedObject().nFields() > 0 );
- handleBeginningOfClause();
- }
- }
- OrRangeGenerator::OrRangeGenerator( const char *ns, const BSONObj &query , bool optimize )
- : _baseSet( ns, query, optimize ), _orFound() {
- BSONObjIterator i( _baseSet.originalQuery() );
- //取出所有的$or开始的对象,分别创建一个FieldRangeSetPair
- while( i.more() ) {//just like{$or:[{a:1},{b:1}], $or[{c:1},{d:1}]....}
- BSONElement e = i.next();
- if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
- uassert( 13262, "$or requires nonempty array", e.type() == Array && e.embeddedObject().nFields() > 0 );
- BSONObjIterator j( e.embeddedObject() );
- while( j.more() ) {
- BSONElement f = j.next();
- uassert( 13263, "$or array must contain objects", f.type() == Object );
- _orSets.push_back( FieldRangeSetPair( ns, f.embeddedObject(), optimize ) );
- uassert( 13291, "$or may not contain 'special' query", _orSets.back().getSpecial().empty() );
- _originalOrSets.push_back( _orSets.back() );
- }
- _orFound = true;
- continue;
- }
- }
- }
- FieldRangeSetPair( const char *ns, const BSONObj &query, bool optimize=true )
- :_singleKey( ns, query, true, optimize ), _multiKey( ns, query, false, optimize ) {}
索引如:db.coll.ensureIndex({x:1})时,当不存在x:[xx,xxx,xxxx]这种数据时那么这个索引x就是单值索引,当插入一条数据中
包括了x:[xx,xxx,xxx]这种array结构的x时,x变为多值索引.多值索引简单来说就是就是对于array中的每一个之建立一个索引.
继续前进到_singleKey对象的构造函数:
- FieldRangeSet::FieldRangeSet( const char *ns, const BSONObj &query, bool singleKey,
- bool optimize ) :
- _ns( ns ),
- _queries( 1, query.getOwned() ),
- _singleKey( singleKey ),
- _exactMatchRepresentation( true ),
- _boundElemMatch( true ) {
- init( optimize );
- }
- void FieldRangeSet::init( bool optimize ) {
- BSONObjIterator i( _queries[ 0 ] );
- while( i.more() ) {
- handleMatchField( i.next(), optimize );
- }
- }
- void FieldRangeSet::handleMatchField( const BSONElement& matchElement, bool optimize ) {
- const char* matchFieldName = matchElement.fieldName();
- if ( matchFieldName[ 0 ] == '$' ) {
- if ( str::equals( matchFieldName, "$and" ) ) {//$and对象对于$and中的每一个查询调用handleMatchField递归处理
- uassert( 14816, "$and expression must be a nonempty array",
- matchElement.type() == Array &&
- matchElement.embeddedObject().nFields() > 0 );
- handleConjunctionClauses( matchElement.embeddedObject(), optimize );
- return;
- }
- adjustMatchField();
- if ( str::equals( matchFieldName, "$or" ) ) {//这里出现了这种形式:$or:[{xxx:1}],只有一个分支,也是调用
- // Check for a singleton $or expression. //handleMatchField递归处理
- if ( matchElement.type() == Array &&
- matchElement.embeddedObject().nFields() == 1 ) {
- // Compute field bounds for a singleton $or expression as if it is a $and
- // expression. With only one clause, the matching semantics are the same.
- // SERVER-6416
- handleConjunctionClauses( matchElement.embeddedObject(), optimize );
- }
- return;
- }
- if ( str::equals( matchFieldName, "$nor" ) ) {
- return;
- }
- if ( str::equals( matchFieldName, "$where" ) ) {
- return;
- }
- }
- //just like {x: 1} or {x : {y : 1, z : 2}}
- bool equality =
- // Check for a parsable '$' operator within a match element, indicating the object
- // should not be matched as is but parsed.
- // NOTE This only checks for a '$' prefix in the first embedded field whereas Matcher
- // checks all embedded fields.
- ( getGtLtOp( matchElement ) == BSONObj::Equality ) &&
- // Similarly check for the $not meta operator.
- !( matchElement.type() == Object &&//这里对于intersectMatchField以及其内部的内容就不再做分析了,写出来太多
- str::equals( matchElement.embeddedObject().firstElementFieldName(), "$not" ) );
- if ( equality ) {//这里将建立一个matchFieldName的FieldRange结构,然后和之前可能存在的这个域的FieldRange结构做运算
- intersectMatchField( matchFieldName, matchElement, false, optimize );//得到新的matchFieldName的范围
- return;
- }
- bool untypedRegex =
- ( matchElement.type() == Object ) &&//like {x: {$regex: /acme.*corp/i, $nin: ['acmeblahcorp']}}
- matchElement.embeddedObject().hasField( "$regex" );//like {x:{$regex: 'acme.*corp', $options:'i'}}
- if ( untypedRegex ) {
- // $regex/$options pairs must be handled together and so are passed via the
- // element encapsulating them.
- intersectMatchField( matchFieldName, matchElement, false, optimize );
- // Other elements may remain to be handled, below.
- }//这里是处理类似{x:{$elemMatch:{y:1,z:2}}},{x:{$all:[1,2,3]}}这种查询语句
- BSONObjIterator matchExpressionIterator( matchElement.embeddedObject() );
- while( matchExpressionIterator.more() ) {
- BSONElement opElement = matchExpressionIterator.next();
- if ( str::equals( opElement.fieldName(), "$not" ) ) {
- handleNotOp( matchFieldName, opElement, optimize );
- }
- else {
- handleOp( matchFieldName, opElement, false, optimize );
- }
- }
- }
- void FieldRangeSet::handleOp( const char* matchFieldName, const BSONElement& op, bool isNot,
- bool optimize ) {
- int opType = op.getGtLtOp();
- // If the first $all element's first op is an $elemMatch, generate bounds for it
- // and ignore the remaining $all elements. SERVER-664
- if ( opType == BSONObj::opALL ) {//类似这种{x:{$all:[{$elemMatch:{k:1,f:1}},{x:1},{z:1}]}},则这里只处理其中的第一个element
- uassert( 13050, "$all requires array", op.type() == Array );
- BSONElement firstAllClause = op.embeddedObject().firstElement();
- if ( firstAllClause.type() == Object ) {
- BSONElement firstAllClauseOp = firstAllClause.embeddedObject().firstElement();
- if ( firstAllClauseOp.getGtLtOp() == BSONObj::opELEM_MATCH ) {
- handleElemMatch( matchFieldName, firstAllClauseOp, isNot, optimize );
- return;
- }
- }
- }//不再深入到HandleElemMatch函数内部,简单说一下,对于{$elemMatch:{k:1,y:2}}这种语句就是再建立一个FieldRangeSet并对其内部
- if ( opType == BSONObj::opELEM_MATCH ) {//的{k:1,y:2}做处理,得到的FieldRangeSet与当前的FieldRangeSet做与运算,得到的结果
- handleElemMatch( matchFieldName, op, isNot, optimize );//保存到当前FieldRangeSet中
- }
- else {
- intersectMatchField( matchFieldName, op, isNot, optimize );
- }
- }
回到MultiPlanScanner::init继续前进看看:QueryPlanSet::make函数.
- auto_ptr<FieldRangeSetPair> frsp( new FieldRangeSetPair( _ns.c_str(), _query, true ) );
- updateCurrentQps( QueryPlanSet::make( _ns.c_str(), frsp, auto_ptr<FieldRangeSetPair>(),
- _query, order, _parsedQuery, _hint,
- _recordedPlanPolicy,
- min, max, true ) );
- void QueryPlanSet::init() {
- DEBUGQO( "QueryPlanSet::init " << ns << "\t" << _originalQuery );
- _plans.clear();//清空plans,这里将是plan的选取
- _usingCachedPlan = false;
- _generator.addInitialPlans();
- }
- void QueryPlanGenerator::addInitialPlans() {
- const char *ns = _qps.frsp().ns();
- NamespaceDetails *d = nsdetails( ns );
- if ( addShortCircuitPlan( d ) ) {//这里直接选择单个plan,下面看看这里添加的plan都是什么状况
- return;
- }
- addStandardPlans( d );//根据索引实际添加的plan
- warnOnCappedIdTableScan();
- }
- bool QueryPlanGenerator::addShortCircuitPlan( NamespaceDetails *d ) {
- return//1 集合不存在,2 不可能有match的索引,3 hint指定选择索引的plan, 4使用特殊索引的plan如:
- // The collection is missing.//空间地理索引,5无法指定范围并且排序为空的plan,6指定排序
- setUnindexedPlanIf( !d, d ) ||//不为空为$natural(这个是按照插入顺序排序的要求)的plan
- // No match is possible.//这几种情况下选择的plan都是一定的,不存在多plan的情况
- setUnindexedPlanIf( !_qps.frsp().matchPossible(), d ) ||
- // The hint, min, or max parameters are specified.
- addHintPlan( d ) ||
- // A special index operation is requested. yhjj0108 add -- maybe for special index 2d and so on
- addSpecialPlan( d ) ||
- // No indexable ranges or ordering are specified.
- setUnindexedPlanIf( _qps.frsp().noNonUniversalRanges() && _qps.order().isEmpty(), d ) ||
- // $natural sort is requested.
- setUnindexedPlanIf( !_qps.order().isEmpty() &&
- str::equals( _qps.order().firstElementFieldName(), "$natural" ), d );
- }
- void QueryPlanGenerator::addStandardPlans( NamespaceDetails *d ) {
- if ( !addCachedPlan( d ) ) {//plan已经被缓存了,表示执行过一次该查询以上,上一次已经找出了最优的
- addFallbackPlans();//plan,这一次直接取出最优的plan就行了.
- }
- }
在继续之前这里需要说明的是mongodb的plan分5种:
- enum Utility {
- Impossible, // Cannot produce any matches, so the query must have an empty result set.
- // No other plans need to be considered.
- Optimal, // Should run as the only candidate plan in the absence of an Impossible
- // plan.
- Helpful, // Should be considered.
- Unhelpful, // Should not be considered.
- Disallowed // Must not be considered unless explicitly hinted. May produce a
- // semantically incorrect result set.
- };
时会产生一个空的range,进而产生完全无法匹配的状况.
Optimal: FieldRangeSetPair中每一个域都在索引中,这是一个最优的索引,根据这个索引产生的plan
将是最优的,不需要再考虑其它plan了.
Helpful: 选择的索引能够覆盖FieldRangeSetPair中的部分域,这个索引是有用的,虽然可能会多搜索
一些不会匹配其它域的document.在没有Optimal索引的情况下会根据Helpful索引建立plan
有多个Helpful的索引将建立多plan.
Unhelpful:无用的索引,不会考虑,似乎和Impossible差不多.
Disallowed: 如果使用这个索引查询数据可能会出错,这里有一个sparse的概念.mongodb的普通索引
是会索引无关数据的,举例来说有索引{x:1},插入一条数据{y:10},那么索引也会把这条数据
索引了,但是建立sparse索引db.xxx.ensureIndex({x:1},{sparse:true})那么这里的索引将
不再索引{y:10}这条数据了.对于sparse索引并且存在类似{z:{$exist:false}}这种情况,那么
使用该索引结果可能是不正确的不考虑该索引.
下面继续看代码:
- void QueryPlanGenerator::addFallbackPlans() {
- const char *ns = _qps.frsp().ns();
- NamespaceDetails *d = nsdetails( ns );
- vector<shared_ptr<QueryPlan> > plans;
- shared_ptr<QueryPlan> optimalPlan;
- shared_ptr<QueryPlan> specialPlan;
- for( int i = 0; i < d->nIndexes; ++i ) {//遍历所有索引,找出有用的索引,indexUseful指只要索引中
- if ( !QueryUtilIndexed::indexUseful( _qps.frsp(), d, i, _qps.order() ) ) {//有一个域覆盖了
- continue;//查询条件或者排序条件那么这个索引就是有用的
- }//根据索引建立一个plan,通过建立的plan得出其是否是有用的
- shared_ptr<QueryPlan> p = newPlan( d, i );
- switch( p->utility() ) {
- case QueryPlan::Impossible://后面将会看到对于这个plan若只存在其
- _qps.setSinglePlan( p );//那么将建立一个有0个文档的cursor
- return;
- case QueryPlan::Optimal://最优的plan,有则肯定选择它
- if ( !optimalPlan ) {
- optimalPlan = p;
- }
- break;
- case QueryPlan::Helpful://这个plan是有帮助的记录其
- if ( p->special().empty() ) {
- // Not a 'special' plan.
- plans.push_back( p );
- }
- else if ( _allowSpecial ) {//类似空间地理索引这种索引插件产生的索引plan
- specialPlan = p;
- }
- break;
- default:
- break;
- }
- }
- if ( optimalPlan ) {//最优的plan,有人肯呢个会问如果存在impossible的plan后那么这里的
- _qps.setSinglePlan( optimalPlan );//setSinglePlan会插入不进去,其实不用担心,impossible表示完全无法匹配如:y>10 and y<3这种情况,那么任意的plan都无法匹配,自然无法产生optimalPlan了.
- // Record an optimal plan in the query cache immediately, with a small nscanned value
- // that will be ignored.
- optimalPlan->registerSelf//将其注册为最优的plan,以后可以直接使用这个plan而不用比对哪个plan最优了
- ( 0, CandidatePlanCharacter( !optimalPlan->scanAndOrderRequired(),
- optimalPlan->scanAndOrderRequired() ) );
- return;
- }
- // Only add a special plan if no standard btree plans have been added. SERVER-4531
- if ( plans.empty() && specialPlan ) {
- _qps.setSinglePlan( specialPlan );
- return;
- }
- //对于这种db.coll.find({x:1,y:1}),存在着索引{key:{x:1}},{key:{y:1}},两者都不是最优的
- //所以这里产生了两个QueryPlan,分别是{key:{x:1}}和{key:{y:1}}
- for( vector<shared_ptr<QueryPlan> >::const_iterator i = plans.begin(); i != plans.end();
- ++i ) {//将所有的planplan键入到候选plan中.
- _qps.addCandidatePlan( *i );
- }//最后加入一个不使用索引的plan.
- _qps.addCandidatePlan( newPlan( d, -1 ) );
- }
继续来看看newPlan函数,这个函数包括了一个plan的构造.其同样是new一个QueryPlan然后调用其init函数:
- QueryPlan::QueryPlan( NamespaceDetails *d,
- int idxNo,
- const FieldRangeSetPair &frsp,
- const BSONObj &originalQuery,
- const BSONObj &order,
- const shared_ptr<const ParsedQuery> &parsedQuery,
- string special ) :
- _d(d),
- _idxNo(idxNo),
- _frs( frsp.frsForIndex( _d, _idxNo ) ),
- _frsMulti( frsp.frsForIndex( _d, -1 ) ),
- _originalQuery( originalQuery ),
- _order( order ),
- _parsedQuery( parsedQuery ),
- _index( 0 ),
- _scanAndOrderRequired( true ),//默认是需要排序的
- _exactKeyMatch( false ),
- _direction( 0 ),
- _endKeyInclusive(),
- _utility( Helpful ),//默认索引是有用的
- _special( special ),
- _type(0),
- _startOrEndSpec() {
- }
- void QueryPlan::init( const FieldRangeSetPair *originalFrsp,
- const BSONObj &startKey,
- const BSONObj &endKey ) {
- _endKeyInclusive = endKey.isEmpty();
- _startOrEndSpec = !startKey.isEmpty() || !endKey.isEmpty();
- BSONObj idxKey = _idxNo < 0 ? BSONObj() : _d->idx( _idxNo ).keyPattern();
- if ( !_frs.matchPossibleForIndex( idxKey ) ) {//Impossible的状况,这个plan是无用的
- _utility = Impossible;
- _scanAndOrderRequired = false;
- return;
- }
- if ( willScanTable() ) {//索引编号为-1(newplan(xxx,-1))且plan不为Impossible,那么只能扫描全表了
- if ( _order.isEmpty() || !strcmp( _order.firstElementFieldName(), "$natural" ) )
- _scanAndOrderRequired = false;//要么order为空,要么order指定为$natural(自然序列,那么都不需要排序了)
- return;
- }
- _index = &_d->idx(_idxNo);//得到索引
- // If the parsing or index indicates this is a special query, don't continue the processing
- if ( _special.size() ||//这部分的代码和索引插件有关,就是类似空间地理索引的处理流程
- ( _index->getSpec().getType() &&//跳过
- _index->getSpec().getType()->suitability( _originalQuery, _order ) != USELESS ) ) {
- _type = _index->getSpec().getType();
- if( !_special.size() ) _special = _index->getSpec().getType()->getPlugin()->getName();
- massert( 13040 , (string)"no type for special: " + _special , _type );
- // hopefully safe to use original query in these contexts;
- // don't think we can mix special with $or clause separation yet
- _scanAndOrderRequired = _type->scanAndOrderRequired( _originalQuery , _order );
- return;
- }
- const IndexSpec &idxSpec = _index->getSpec();
- BSONObjIterator o( _order );
- BSONObjIterator k( idxKey );
- if ( !o.moreWithEOO() )//索引与排序要求匹配,排序要求先结束那么扫描完了后
- _scanAndOrderRequired = false;//不需要再排序
- while( o.moreWithEOO() ) {
- BSONElement oe = o.next();
- if ( oe.eoo() ) {
- _scanAndOrderRequired = false;
- break;
- }
- if ( !k.moreWithEOO() )
- break;
- BSONElement ke;
- while( 1 ) {
- ke = k.next();
- if ( ke.eoo() )
- goto doneCheckOrder;
- if ( strcmp( oe.fieldName(), ke.fieldName() ) == 0 )
- break;
- if ( !_frs.range( ke.fieldName() ).equality() )
- goto doneCheckOrder;
- }//索引的顺序与排序要求相反,则使用反序
- int d = elementDirection( oe ) == elementDirection( ke ) ? 1 : -1;
- if ( _direction == 0 )
- _direction = d;
- else if ( _direction != d )
- break;
- }
- doneCheckOrder:
- if ( _scanAndOrderRequired )
- _direction = 0;
- BSONObjIterator i( idxKey );
- int exactIndexedQueryCount = 0;
- int optimalIndexedQueryCount = 0;
- bool awaitingLastOptimalField = true;
- set<string> orderFieldsUnindexed;
- _order.getFieldNames( orderFieldsUnindexed );
- while( i.moreWithEOO() ) {
- BSONElement e = i.next();
- if ( e.eoo() )
- break;
- const FieldRange &fr = _frs.range( e.fieldName() );
- if ( awaitingLastOptimalField ) {//这个索引有用,则OptimalIndexedQueryCount++,这里回到之前讨论的问题,查询为db.coll.find({x:{$lt:10,$gt:4}}).sort{y:1},当存在索引时{x:1},{x:1,y:1},{y:1,x:1},这里本来{y:1,x:1}应该是最优的索引,当使用其时前面会将_scanAndOrderRequired设置为false,这里遍历y时第一个进入这里时因为y不在查询内容中,所以fr.universal()为false,为universal()范围最大值为maxkey,最小为minkey,mongodb中maxkey大于其它一切数据,minkey小于其它一切数据,所以fr.equality()为false,那么awaitingLastOptmalField=false,第二次x遍历时走x路线,optimalIndexedQueryCount=-1,
- if ( !fr.universal() )
- ++optimalIndexedQueryCount;
- if ( !fr.equality() )
- awaitingLastOptimalField = false;
- }
- else {
- if ( !fr.universal() )
- optimalIndexedQueryCount = -1;
- }
- if ( fr.equality() ) {
- BSONElement e = fr.max();
- if ( !e.isNumber() && !e.mayEncapsulate() && e.type() != RegEx )
- ++exactIndexedQueryCount;
- }
- orderFieldsUnindexed.erase( e.fieldName() );
- }
- if ( !_scanAndOrderRequired &&//不需要排序并且索引有效的个数和之前得到的查询域的有效范围相等,那么这是最优的一个plan了.
- ( optimalIndexedQueryCount == _frs.numNonUniversalRanges() ) )
- _utility = Optimal;
- if ( exactIndexedQueryCount == _frs.numNonUniversalRanges() &&
- orderFieldsUnindexed.size() == 0 &&
- exactIndexedQueryCount == idxKey.nFields() &&
- exactKeyMatchSimpleQuery( _originalQuery, exactIndexedQueryCount ) ) {
- _exactKeyMatch = true;
- }
- _frv.reset( new FieldRangeVector( _frs, idxSpec, _direction ) );
- if ( originalFrsp ) {
- _originalFrv.reset( new FieldRangeVector( originalFrsp->frsForIndex( _d, _idxNo ),
- idxSpec, _direction ) );
- }
- else {
- _originalFrv = _frv;
- }
- if ( _startOrEndSpec ) {
- BSONObj newStart, newEnd;
- if ( !startKey.isEmpty() )
- _startKey = startKey;
- else
- _startKey = _frv->startKey();
- if ( !endKey.isEmpty() )
- _endKey = endKey;
- else
- _endKey = _frv->endKey();
- }
- if ( ( _scanAndOrderRequired || _order.isEmpty() ) &&
- _frs.range( idxKey.firstElementFieldName() ).universal() ) { // NOTE SERVER-2140
- _utility = Unhelpful;
- }
- if ( idxSpec.isSparse() && hasPossibleExistsFalsePredicate() ) {//
- _utility = Disallowed;
- }
- if ( _parsedQuery && _parsedQuery->getFields() && !_d->isMultikey( _idxNo ) ) { // Does not check modifiedKeys()
- _keyFieldsOnly.reset( _parsedQuery->getFields()->checkKey( _index->keyPattern() ) );
- }
- }
条件就能够达到要求找到最优的plan{y:1,x:1},为什么不选择这个plan呢,难道说考虑到要插入这种
{y:40}这种数据吗,虽然{x:1}这种索引没有y域但是其还是会对这个{y:40}数据加入索引啊,数目并不会
比{y:1,x:1}这个索引的数目多啊,而且{y:1,x:1},但是后来我发现我忽略了一个问题,索引{y:1,x:1}无法直接定
位到x的范围,那么查询的无关的document数目可能比{x:1}这个索引查询的数目多,对于mongodb优化考
虑的是如何得到最少的document扫描数目,所以{y:1,x:1}也只能是一个可考虑的索引而无法成为最优的
索引,所以要想让查询使用这个索引只能使用hint了.
这篇文件就暂时写到这里,后面还有很多内容,一篇文章写下来太多,还是分成两篇文章吧,关于plan的
选取请看下一篇文章.
本文链接:https://i-blog.csdnimg.cn/blog_migrate/3e2f1495d315de6caeabcff9efe8d464.png
作者:yhjj0108,杨浩