本文描述分片设置命令的流程.在分析分片设置命令流程前先来看看configserver服务器config数据库中各个collection的作用.
version: 保存当前configserver的版本信息,这个是随mongodb升级而变动的.
settings: 保存分片系统设置信息如chunksize大小,balancer配置信息.
shards: 保存shard中的配置信息包括每一个mongod的shard id如shard0以及其地址.
databases: 保存shard系统中的数据库的分片信息,是否分片,primary服务器的id.
collections: 保存分片的collection.
locks: 分布式锁状态,state=0,表示没锁,1表示准备加锁,2表示正拥有锁.
lockpings: 分布式服务器的ping信息,为分布式锁设计的,保留其ping时间,超时后其它服务器将能剥夺超时服务器的锁.
chunks: 保存分片collection的chunks信息,chunks的数据范围,collection名,所在服务器的id.
mongos: mongos进程的ping信息,确保mongos是可连接的.
tags: 似乎和replset中的tags作用类似,确保某些chunk只能在某些类型的服务器上.
在分片系统中,每一个mongod服务器是一个分shard,其内部名称在不指定的情况下会自动生成名字如shard0000 shard0001等编号.每一个数据库在初始加入分片系统中将会有一个primary shard,表示该数据库最早存于哪个服务器上.若是新添加的数据库,没有指定其存的位置,则其初始位置存于分片系统中当前数据最少的服务器上.
对于sharded collection,其由一个chunkManager管理,collection的分片是按照chunk来做单位的,每一个chunk默认大小为64M,可以通过命令修改,大小达到上限后将发生分片,chunkManager负责记录分片以及每一个分片所在的范围,以后对数据修改时将首先查询chunkManager,通过它将知道将请求发往哪一台服务器上.
下面开始分析代码.首先来看看添加一台服务器到分片系统db.runCommand(addshard:"127.0.0.1:27040").其将执行addshard命令.
bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
if ( !ClientBasic::getCurrent()->getAuthenticationInfo()->isAuthorized("admin") ) {//非admin权限不能执行该命令
errmsg = "unauthorized. Need admin authentication to add a shard ";
return false;
}
// get replica set component hosts
ConnectionString servers = ConnectionString::parse( cmdObj.firstElement().valuestrsafe() , errmsg );
// using localhost in server names implies every other process must use localhost addresses too
vector<HostAndPort> serverAddrs = servers.getServers();//得到添加的服务器地址
for ( size_t i = 0 ; i < serverAddrs.size() ; i++ ) {
if ( serverAddrs[i].isLocalHost() != grid.allowLocalHost() ) {
return false;
}
// it's fine if mongods of a set all use default port
if ( ! serverAddrs[i].hasPort() ) {//没有端口加上默认端口
serverAddrs[i].setPort( CmdLine::ShardServerPort );
}
}
// name is optional; addShard will provide one if needed
string name = "";
if ( cmdObj["name"].type() == String ) {//得到shard的名称,没有设置将自动生成一个如shard0000这样的.
name = cmdObj["name"].valuestrsafe();
}
// maxSize is the space usage cap in a shard in MBs
long long maxSize = 0;
if ( cmdObj[ ShardFields::maxSize.name() ].isNumber() ) {
maxSize = cmdObj[ ShardFields::maxSize.name() ].numberLong();
}
if ( ! grid.addShard( &name , servers , maxSize , errmsg ) ) {//实际的添加过程.
return false;
}
result << "shardAdded" << name;
return true;
}
run->addShard,删除了一大部分的检查流程.
bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) {
// name can be NULL, so provide a dummy one here to avoid testing it elsewhere
string nameInternal;
ReplicaSetMonitorPtr rsMonitor;
// Check whether the host (or set) exists and run several sanity checks on this request.
// There are two set of sanity checks: making sure adding this particular shard is consistent
// with the replica set state (if it exists) and making sure this shards databases can be
// brought into the grid without conflict.
vector<string> dbNames;
{
scoped_ptr<ScopedDbConnection> newShardConnPtr(//与配置要加入shard的服务器建立连接
ScopedDbConnection::getInternalScopedDbConnection( servers.toString() ) );
ScopedDbConnection& newShardConn = *newShardConnPtr;
BSONObj resIsMongos;
BSONObj resIsMaster;
ok = newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster );
// if the shard has only one host, make sure it is not part of a replica set
string setName = resIsMaster["setName"].str();
string commandSetName = servers.getSetName();
// if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of
// the set. It is fine if not all members of the set are present in 'servers'.
bool foundAll = true;
string offendingHost;
// shard name defaults to the name of the replica set
if ( name->empty() && ! setName.empty() )
*name = setName;
// In order to be accepted as a new shard, that mongod must not have any database name that exists already
// in any other shards. If that test passes, the new shard's databases are going to be entered as
// non-sharded db's whose primary is the newly added shard.
BSONObj resListDB;//列出所有的新加入的服务器的数据库,将其加入到shard
ok = newShardConn->runCommand( "admin" , BSON( "listDatabases" << 1 ) , resListDB );
BSONObjIterator i( resListDB["databases"].Obj() );
while ( i.more() ) {
BSONObj dbEntry = i.next().Obj();
const string& dbName = dbEntry["name"].String();
if ( _isSpecialLocalDB( dbName ) ) {
// 'local', 'admin', and 'config' are system DBs and should be excluded here
continue;
}
else {
dbNames.push_back( dbName );
}
}
if ( newShardConn->type() == ConnectionString::SET )
rsMonitor = ReplicaSetMonitor::get( setName );
newShardConn.done();
}
//判断这里数据库一定为null,否则失败
// check that none of the existing shard candidate's db's exist elsewhere
for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
DBConfigPtr config = getDBConfig( *it , false );//得到数据库的配置.
}
// if a name for a shard wasn't provided, pick one.
if ( name->empty() && ! _getNewShardName( name ) ) {//对于新加入的服务器给他一个新的shardid.
errMsg = "error generating new shard name";
return false;
}
// build the ConfigDB shard document
BSONObjBuilder b;
b.append( "_id" , *name );
b.append( "host" , rsMonitor ? rsMonitor->getServerAd