本文转载于运维生存时间http://www.ttlsa.com/html/1096.html
MongoDB的auto-sharding功能是指mongodb通过mongos自动建立一个水平扩展的数据库集群系统,将数据库分表存储在sharding的各个节点上。
通过把Sharding和Replica Sets相结合,可以搭建一个分布式的,高可用性,自动水平扩展的集群。
要构建MongoDB Sharding Cluster,需要三种角色:
Shard Server: mongod 实例, 使用 Replica Sets,确保每个数据节点都具有备份、自动容错转移、自动恢复能力。用于存储实际的数据块,实际生产环境中一个shard server角色可由几台机器组个一个relica set承担,防止主机单点故障
Config Server: mongod 实例,使用 3 个配置服务器,确保元数据完整性(two-phase commit)。存储了整个 Cluster Metadata,其中包括 chunk 信息。
Route Server: mongos 实例,配合 LVS(Linux Virtual Server),实现负载平衡,提高接入性能(high performance)。前端路由,客户端由此接入,且让整个集群看上去像单一数据库,前端应用可以透明使用。
环境如下:
192.168.198.131
shard1:10001
shard2:10002
shard3:10003
config1:20000
192.168.198.129
shard1:10001
shard2:10002
shard3:10003
config2:20000
192.168.198.132
shard1:10001
shard2:10002
shard3:10003
config3:20000
192.168.198.133
mongos:30000
分别在三台服务器上安装mongod服务,安装如下:
# wget http://fastdl.mongodb.org/linux/mongodb-linux-x86_64-2.0.3.tgz
# tar zxvf mongodb-linux-x86_64-2.0.3.tgz -C ../software/
# ln -s mongodb-linux-x86_64-2.0.3 /usr/local/mongodb
# useradd mongodb
# mkdir -p /data/mongodb/shard1
# mkdir -p /data/mongodb/shard2
# mkdir -p /data/mongodb/shard3
# mkdir -p /data/mongodb/config1
配置shard1的replica set
192.168.198.131
# cd /usr/local/mongodb/bin
# ./mongod –shardsvr –replSet shard1 –port 10001 –dbpath /data/mongodb/shard1 –oplogSize 100 –logpath /data/mongodb/shard1/shard1.log –logappend –fork
192.168.198.129
# ./mongod –shardsvr –replSet shard1 –port 10001 –dbpath /data/mongodb/shard1 –oplogSize 100 –logpath /data/mongodb/shard1/shard1.log –logappend –fork
192.168.198.132
# ./mongod –shardsvr –replSet shard1 –port 10001 –dbpath /data/mongodb/shard1 –oplogSize 100 –logpath /data/mongodb/shard1/shard1.log –logappend –fork
连接到192.168.198.131
# ./mongo –port 10001
> config={_id:”shard1″,members:[
... {_id:0,host:"192.168.198.131:10001"},
... {_id:1,host:"192.168.198.129:10001"},
... {_id:2,host:"192.168.198.132:10001"}]
… }
> rs.initiate(config)
{
“info” : “Config now saved locally. Should come online in about a minute.”,
“ok” : 1
}
PRIMARY> rs.status()
{
“set” : “shard1″,
“date” : ISODate(“2012-03-02T02:37:55Z”),
“myState” : 1,
“members” : [
{
"_id" : 0,
"name" : "192.168.198.131:10001",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"optime" : {
"t" : 1330655827000,
"i" : 1
},
"optimeDate" : ISODate("2012-03-02T02:37:07Z"),
"self" : true
},
{
"_id" : 1,
"name" : "192.168.198.129:10001",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 36,
"optime" : {
"t" : 1330655827000,
"i" : 1
},
"optimeDate" : ISODate("2012-03-02T02:37:07Z"),
"lastHeartbeat" : ISODate("2012-03-02T02:37:53Z"),
"pingMs" : 0
},
{
"_id" : 2,
"name" : "192.168.198.132:10001",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 36,
"optime" : {
"t" : 1330655827000,
"i" : 1
},
"optimeDate" : ISODate("2012-03-02T02:37:07Z"),
"lastHeartbeat" : ISODate("2012-03-02T02:37:53Z"),
"pingMs" : 466553
}
],
“ok” : 1
}
配置shard2的replica set
192.168.198.129
# ./mongod –shardsvr –replSet shard2 –port 10002 –dbpath /data/mongodb/shard2 –oplogSize 100 –logpath /data/mongodb/shard2/shard2.log –logappend –fork
192.168.198.131
# ./mongod –shardsvr –replSet shard2 –port 10002 –dbpath /data/mongodb/shard2 –oplogSize 100 –logpath /data/mongodb/shard2/shard2.log –logappend –fork
192.168.198.132
# ./mongod –shardsvr –replSet shard2 –port 10002 –dbpath /data/mongodb/shard2 –oplogSize 100 –logpath /data/mongodb/shard2/shard2.log –logappend –fork
连接到192.168.198.129
# ./mongo –port 10002
> config={_id:”shard2″,members:[
... {_id:0,host:"192.168.198.129:10002"},
... {_id:1,host:"192.168.198.131:10002"},
... {_id:2,host:"192.168.198.132:10002"}]
… }
{
“_id” : “shard2″,
“members” : [
{
"_id" : 0,
"host" : "192.168.198.129:10002"
},
{
"_id" : 1,
"host" : "192.168.198.131:10002"
},
{
"_id" : 2,
"host" : "192.168.198.132:10002"
}
]
}
> rs.initiate(config)
{
“info” : “Config now saved locally. Should come online in about a minute.”,
“ok” : 1
}
> rs.status()
{
“set” : “shard2″,
“date” : ISODate(“2012-03-02T02:53:17Z”),
“myState” : 1,
“members” : [
{
"_id" : 0,
"name" : "192.168.198.129:10002",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"optime" : {
"t" : 1330656717000,
"i" : 1
},
"optimeDate" : ISODate("2012-03-02T02:51:57Z"),
"self" : true
},
{
"_id" : 1,
"name" : "192.168.198.131:10002",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 73,
"optime" : {
"t" : 1330656717000,
"i" : 1
},
"optimeDate" : ISODate("2012-03-02T02:51:57Z"),
"lastHeartbeat" : ISODate("2012-03-02T02:53:17Z"),
"pingMs" : 1
},
{
"_id" : 2,
"name" : "192.168.198.132:10002",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 73,
"optime" : {
"t" : 1330656717000,
"i" : 1
},
"optimeDate" : ISODate("2012-03-02T02:51:57Z"),
"lastHeartbeat" : ISODate("2012-03-02T02:53:17Z"),
"pingMs" : 209906
}
],
“ok” : 1
}
配置shard3的replica set
192.168.198.132
# ./mongod –shardsvr –replSet shard3 –port 10003 –dbpath /data/mongodb/shard3 –oplogSize 100 –logpath /data/mongodb/shard3/shard3.log –logappend –fork
192.168.198.129
# ./mongod –shardsvr –replSet shard3 –port 10003 –dbpath /data/mongodb/shard3 –oplogSize 100 –logpath /data/mongodb/shard3/shard3.log –logappend –fork
192.168.198.131
# ./mongod –shardsvr –replSet shard3 –port 10003 –dbpath /data/mongodb/shard3 –oplogSize 100 –logpath /data/mongodb/shard3/shard3.log –logappend –fork
连接到192.168.198.132
# ./mongo –port 10003
> config={_id:”shard3″,members:[
... {_id:0,host:"192.168.198.132:10003"},
... {_id:1,host:"192.168.198.131:10003"},
... {_id:2,host:"192.168.198.129:10003"}]
… }
{
“_id” : “shard3″,
“members” : [
{
"_id" : 0,
"host" : "192.168.198.132:10003"
},
{
"_id" : 1,
"host" : "192.168.198.131:10003"
},
{
"_id" : 2,
"host" : "192.168.198.129:10003"
}
]
}
> rs.initiate(config)
{
“info” : “Config now saved locally. Should come online in about a minute.”,
“ok” : 1
}
> rs.status()
{
“set” : “shard3″,
“date” : ISODate(“2012-03-02T03:04:52Z”),
“myState” : 1,
“members” : [
{
"_id" : 0,
"name" : "192.168.198.132:10003",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"optime" : {
"t" : 1330657451000,
"i" : 1
},
"optimeDate" : ISODate("2012-03-02T03:04:11Z"),
"self" : true
},
{
"_id" : 1,
"name" : "192.168.198.131:10003",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 39,
"optime" : {
"t" : 1330657451000,
"i" : 1
},
"optimeDate" : ISODate("2012-03-02T03:04:11Z"),
"lastHeartbeat" : ISODate("2012-03-02T03:04:52Z"),
"pingMs" : 0
},
{
"_id" : 2,
"name" : "192.168.198.129:10003",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 39,
"optime" : {
"t" : 1330657451000,
"i" : 1
},
"optimeDate" : ISODate("2012-03-02T03:04:11Z"),
"lastHeartbeat" : ISODate("2012-03-02T03:04:52Z"),
"pingMs" : 0
}
],
“ok” : 1
}
配置config
192.168.198.131
# ./mongod –configsvr –dbpath /data/mongodb/config1 –port 20000 –logpath /data/mongodb/config1/config1.log –logappend –fork
192.168.198.129
# ./mongod –configsvr –dbpath /data/mongodb/config2 –port 20000 –logpath /data/mongodb/config2/config2.log –logappend –fork
192.168.198.132
# ./mongod –configsvr –dbpath /data/mongodb/config3 –port 20000 –logpath /data/mongodb/config3/config3.log –logappend –fork
配置mongos
# ./mongos –configdb 192.168.198.131:20000,192.168.198.129:20000,192.168.198.132:20000 –port 30000 –chunkSize 1 –logpath /data/mongodb/mongos.log –logappend –fork
配置shard cluster
# ./mongo –port 30000
mongos> use admin
switched to db admin
加入shards
mongos> db.runCommand({addshard:”shard1/192.168.198.131:10001,192.168.198.129:10001,192.168.198.132:10001″});
{ “shardAdded” : “shard1″, “ok” : 1 }
mongos> db.runCommand({addshard:”shard2/192.168.198.131:10002,192.168.198.129:10002,192.168.198.132:10002″});
{ “shardAdded” : “shard2″, “ok” : 1 }
mongos> db.runCommand({addshard:”shard3/192.168.198.131:10003,192.168.198.129:10003,192.168.198.132:10003″});
{ “shardAdded” : “shard3″, “ok” : 1 }
列出shards
mongos> db.runCommand({listshards:1})
{
“shards” : [
{
"_id" : "shard1",
"host" : "shard1/192.168.198.129:10001,192.168.198.131:10001,192.168.198.132:10001"
},
{
"_id" : "shard2",
"host" : "shard2/192.168.198.129:10002,192.168.198.131:10002,192.168.198.132:10002"
},
{
"_id" : "shard3",
"host" : "shard3/192.168.198.129:10003,192.168.198.131:10003,192.168.198.132:10003"
}
],
“ok” : 1
}
激活数据库分片
mongos> db.runCommand({enablesharding:”test”});
{ “ok” : 1 }
通过以上命令,可以将数据库test跨shard,如果不执行,数据库只会存放在一个shard,一旦激活数据库分片,数据库中的不同的collection将被存放在不同的shard上,但一个collection仍旧存放在同一个shard上,要使collection也分片,需对collection做些其他操作。
collection分片
mongos> db.runCommand({shardcollection:”test.data”,key:{_id:1}})
{ “collectionsharded” : “test.data”, “ok” : 1 }
分片的collection只能有一个在分片key上的唯一索引,其他唯一索引不被允许。
查看shard信息
mongos> printShardingStatus()
— Sharding Status —
sharding version: { “_id” : 1, “version” : 3 }
shards:
{ “_id” : “shard1″, “host” : “shard1/192.168.198.129:10001,192.168.198.131:10001,192.168.198.132:10001″ }
{ “_id” : “shard2″, “host” : “shard2/192.168.198.129:10002,192.168.198.131:10002,192.168.198.132:10002″ }
{ “_id” : “shard3″, “host” : “shard3/192.168.198.129:10003,192.168.198.131:10003,192.168.198.132:10003″ }
databases:
{ “_id” : “admin”, “partitioned” : false, “primary” : “config” }
{ “_id” : “test”, “partitioned” : true, “primary” : “shard1″ }
test.data chunks:
shard1 1
{ “_id” : { $minKey : 1 } } –>> { “_id” : { $maxKey : 1 } } on : shard1 { “t” : 1000, “i” : 0 }
mongos> use test
switched to db test
mongos> db.data.stats()
{
“sharded” : true,
“flags” : 1,
“ns” : “test.data”,
“count” : 0,
“numExtents” : 1,
“size” : 0,
“storageSize” : 8192,
“totalIndexSize” : 8176,
“indexSizes” : {
“_id_” : 8176
},
“avgObjSize” : 0,
“nindexes” : 1,
“nchunks” : 1,
“shards” : {
“shard1″ : {
“ns” : “test.data”,
“count” : 0,
“size” : 0,
“storageSize” : 8192,
“numExtents” : 1,
“nindexes” : 1,
“lastExtentSize” : 8192,
“paddingFactor” : 1,
“flags” : 1,
“totalIndexSize” : 8176,
“indexSizes” : {
“_id_” : 8176
},
“ok” : 1
}
},
“ok” : 1
}
测试:插入大量数据
mongos> for (var i=1;i<=500000;i++) db.data.save ({_id:i,value:”www.ttlsa.com”})
mongos> printShardingStatus()
— Sharding Status —
sharding version: { “_id” : 1, “version” : 3 }
shards:
{ “_id” : “shard1″, “host” : “shard1/192.168.198.129:10001,192.168.198.131:10001,192.168.198.132:10001″ }
{ “_id” : “shard2″, “host” : “shard2/192.168.198.129:10002,192.168.198.131:10002,192.168.198.132:10002″ }
{ “_id” : “shard3″, “host” : “shard3/192.168.198.129:10003,192.168.198.131:10003,192.168.198.132:10003″ }
databases:
{ “_id” : “admin”, “partitioned” : false, “primary” : “config” }
{ “_id” : “test”, “partitioned” : true, “primary” : “shard1″ }
test.data chunks:
shard1 6
shard2 5
shard3 11
too many chunks to print, use verbose if you want to force print
mongos> db.data.stats()
{
“sharded” : true,
“flags” : 1,
“ns” : “test.data”,
“count” : 500000,
“numExtents” : 19,
“size” : 22000084,
“storageSize” : 43614208,
“totalIndexSize” : 14062720,
“indexSizes” : {
“_id_” : 14062720
},
“avgObjSize” : 44.000168,
“nindexes” : 1,
“nchunks” : 22,
“shards” : {
“shard1″ : {
“ns” : “test.data”,
“count” : 112982,
“size” : 4971232,
“avgObjSize” : 44.00021242321786,
“storageSize” : 11182080,
“numExtents” : 6,
“nindexes” : 1,
“lastExtentSize” : 8388608,
“paddingFactor” : 1,
“flags” : 1,
“totalIndexSize” : 3172288,
“indexSizes” : {
“_id_” : 3172288
},
“ok” : 1
},
“shard2″ : {
“ns” : “test.data”,
“count” : 124978,
“size” : 5499056,
“avgObjSize” : 44.00019203379795,
“storageSize” : 11182080,
“numExtents” : 6,
“nindexes” : 1,
“lastExtentSize” : 8388608,
“paddingFactor” : 1,
“flags” : 1,
“totalIndexSize” : 3499328,
“indexSizes” : {
“_id_” : 3499328
},
“ok” : 1
},
“shard3″ : {
“ns” : “test.data”,
“count” : 262040,
“size” : 11529796,
“avgObjSize” : 44.000137383605555,
“storageSize” : 21250048,
“numExtents” : 7,
“nindexes” : 1,
“lastExtentSize” : 10067968,
“paddingFactor” : 1,
“flags” : 1,
“totalIndexSize” : 7391104,
“indexSizes” : {
“_id_” : 7391104
},
“ok” : 1
}
},
“ok” : 1
}