1. 复制简介
在MongoDB中,创建副本集后就可以使用复制功能了,副本集是一组服务器,其中一个是用于处理写操作的主节点,还有多个用于保存主节点的数据副本的从节点,如果主节点崩溃了,则从节点会从中选举中一个新的主节点
2. 创建副本集
mongod --replSet study --dbpath /home/data/rs1 --port 27018 --bind_ip_all --oplogSize 200
mongod --replSet study --dbpath /home/data/rs2 --port 27019 --bind_ip_all --oplogSize 200
mongod --replSet study --dbpath /home/data/rs3 --port 27020 --bind_ip_all --oplogSize 200
# 可以看到副本集已创建
# ps -ef|grep mongod
root 247 228 1 13:13 pts/2 00:00:00 mongod --replSet study --dbpath /home/data/rs1 --port 27018 --bind_ip_all --oplogSize 200
root 283 240 3 13:13 pts/3 00:00:00 mongod --replSet study --dbpath /home/data/rs2 --port 27019 --bind_ip_all --oplogSize 200
root 336 323 9 13:14 pts/4 00:00:00 mongod --replSet study --dbpath /home/data/rs3 --port 27020 --bind_ip_all --oplogSize 200
在MongoDB3.6中,mongod仅在默认情况下绑定到localhost(127.0.0.1),为了使副本集中每个成员都可以与其他成员进行通信,还必须绑定其他成员可以访问到的IP,如下
mongod --bind_ip localhost,192.168.0.12 --replSet study --dbpath /home/data/rs1 --port 27018 --oplogSize 200
到目前为止,每个mongod都不知道其他mongod的存在。为了能够彼此交互,需要创建一个包含每个成员的配置,并将此配置发送给其中一个mongod进程。它负责将此配置传播给其他成员
# 连接mongod
mongo --port 27018
# 查看当前状态
> rs.status()
{
"operationTime" : Timestamp(0, 0),
"ok" : 0,
"errmsg" : "no replset config has been received",
"code" : 94,
"codeName" : "NotYetInitialized",
"$clusterTime" : {
"clusterTime" : Timestamp(0, 0),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
# 创建配置并初始化配置
> rsconf = {_id: "study", members: [{_id:0, host:"localhost:27018"},{_id:1, host:"localhost:27019"},{_id:2, host:"localhost:27020"}]}
> rs.initiate(rsconf)
# 再次查看状态
study:SECONDARY> rs.status()
{
"set" : "study",
"date" : ISODate("2022-07-10T05:36:36.198Z"),
"myState" : 1,
"term" : NumberLong(1),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"majorityVoteCount" : 2,
"writeMajorityCount" : 2,
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1657431395, 1),
"t" : NumberLong(1)
},
"lastCommittedWallTime" : ISODate("2022-07-10T05:36:35.602Z"),
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1657431395, 1),
"t" : NumberLong(1)
},
"readConcernMajorityWallTime" : ISODate("2022-07-10T05:36:35.602Z"),
"appliedOpTime" : {
"ts" : Timestamp(1657431395, 1),
"t" : NumberLong(1)
},
"durableOpTime" : {
"ts" : Timestamp(1657431395, 1),
"t" : NumberLong(1)
},
"lastAppliedWallTime" : ISODate("2022-07-10T05:36:35.602Z"),
"lastDurableWallTime" : ISODate("2022-07-10T05:36:35.602Z")
},
"lastStableRecoveryTimestamp" : Timestamp(1657431394, 3),
"lastStableCheckpointTimestamp" : Timestamp(1657431394, 3),
"electionCandidateMetrics" : {
"lastElectionReason" : "electionTimeout",
"lastElectionDate" : ISODate("2022-07-10T05:36:34.192Z"),
"electionTerm" : NumberLong(1),
"lastCommittedOpTimeAtElection" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"lastSeenOpTimeAtElection" : {
"ts" : Timestamp(1657431383, 1),
"t" : NumberLong(-1)
},
"numVotesNeeded" : 2,
"priorityAtElection" : 1,
"electionTimeoutMillis" : NumberLong(10000),
"numCatchUpOps" : NumberLong(0),
"newTermStartDate" : ISODate("2022-07-10T05:36:34.225Z"),
"wMajorityWriteAvailabilityDate" : ISODate("2022-07-10T05:36:35.558Z")
},
"members" : [
{
"_id" : 0,
"name" : "localhost:27018",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY", // 主节点
"uptime" : 1369,
"optime" : {
"ts" : Timestamp(1657431395, 1),
"t" : NumberLong(1)
},
"optimeDate" : ISODate("2022-07-10T05:36:35Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "could not find member to sync from",
"electionTime" : Timestamp(1657431394, 1),
"electionDate" : ISODate("2022-07-10T05:36:34Z"),
"configVersion" : 1,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 1,
"name" : "localhost:27019",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY", // 从节点
"uptime" : 12,
"optime" : {
"ts" : Timestamp(1657431383, 1),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(1657431383, 1),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("2022-07-10T05:36:23Z"),
"optimeDurableDate" : ISODate("2022-07-10T05:36:23Z"),
"lastHeartbeat" : ISODate("2022-07-10T05:36:34.202Z"),
"lastHeartbeatRecv" : ISODate("2022-07-10T05:36:35.519Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 1
},
{
"_id" : 2,
"name" : "localhost:27020",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 12,
"optime" : {
"ts" : Timestamp(1657431383, 1),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(1657431383, 1),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("2022-07-10T05:36:23Z"),
"optimeDurableDate" : ISODate("2022-07-10T05:36:23Z"),
"lastHeartbeat" : ISODate("2022-07-10T05:36:34.202Z"),
"lastHeartbeatRecv" : ISODate("2022-07-10T05:36:35.521Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 1
}
],
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1657431395, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
},
"operationTime" : Timestamp(1657431395, 1)
}
3. 观察副本集
study:PRIMARY> use test
switched to db test
study:PRIMARY> for (i=0;i <1000;i++){db.coll.insert({count:i})}
WriteResult({ "nInserted" : 1 })
# 查看主节点
study:PRIMARY> db.isMaster()
{
"hosts" : [
"localhost:27018",
"localhost:27019",
"localhost:27020"
],
"setName" : "study",
"setVersion" : 1,
"ismaster" : true,
"secondary" : false,
"primary" : "localhost:27018",
"me" : "localhost:27018",
"electionId" : ObjectId("7fffffff0000000000000001"),
"lastWrite" : {
"opTime" : {
"ts" : Timestamp(1657431964, 1),
"t" : NumberLong(1)
},
"lastWriteDate" : ISODate("2022-07-10T05:46:04Z"),
"majorityOpTime" : {
"ts" : Timestamp(1657431964, 1),
"t" : NumberLong(1)
},
"majorityWriteDate" : ISODate("2022-07-10T05:46:04Z")
},
"maxBsonObjectSize" : 16777216,
"maxMessageSizeBytes" : 48000000,
"maxWriteBatchSize" : 100000,
"localTime" : ISODate("2022-07-10T05:46:07.631Z"),
"logicalSessionTimeoutMinutes" : 30,
"connectionId" : 1,
"minWireVersion" : 0,
"maxWireVersion" : 8,
"readOnly" : false,
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1657431964, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
},
"operationTime" : Timestamp(1657431964, 1)
}
// 新开一个shell连接从节点
mongo --port 27020
study:SECONDARY> use test
switched to db test
// 执行查询可看到如下报错
study:SECONDARY> db.coll.find({})
Error: error: {
"operationTime" : Timestamp(1657432094, 1),
"ok" : 0,
"errmsg" : "not master and slaveOk=false",
"code" : 13435,
"codeName" : "NotMasterNoSlaveOk",
"$clusterTime" : {
"clusterTime" : Timestamp(1657432094, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
从节点可能会落后主节点(延迟)而缺少最新的写入,所以默认情况下从节点会拒绝读请求,以防止读取过期数据,如果想让从节点查询,则可以设置
study:SECONDARY> db.setSlaveOk()
study:SECONDARY> db.coll.count({})
1000
从节点不接受写操作。从节点只能通过复制功能写入数据,不接受客户端的写请求
study:SECONDARY> db.coll.insert({count: 33})
WriteCommandError({
"operationTime" : Timestamp(1657432434, 1),
"ok" : 0,
"errmsg" : "not master",
"code" : 10107,
"codeName" : "NotMaster",
"$clusterTime" : {
"clusterTime" : Timestamp(1657432434, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
})
让主节点停止,发现从节点27020成为了主节点,实现了自动选举主节点
study:PRIMARY> db.adminCommand({'shutdown': 1})
study:SECONDARY> db.isMaster()
{
"hosts" : [
"localhost:27018",
"localhost:27019",
"localhost:27020"
],
"setName" : "study",
"setVersion" : 1,
"ismaster" : false,
"secondary" : true,
"primary" : "localhost:27019",
"me" : "localhost:27020",
"lastWrite" : {
"opTime" : {
"ts" : Timestamp(1657432711, 1),
"t" : NumberLong(2)
},
"lastWriteDate" : ISODate("2022-07-10T05:58:31Z"),
"majorityOpTime" : {
"ts" : Timestamp(1657432711, 1),
"t" : NumberLong(2)
},
"majorityWriteDate" : ISODate("2022-07-10T05:58:31Z")
},
"maxBsonObjectSize" : 16777216,
"maxMessageSizeBytes" : 48000000,
"maxWriteBatchSize" : 100000,
"localTime" : ISODate("2022-07-10T05:58:34.144Z"),
"logicalSessionTimeoutMinutes" : 30,
"connectionId" : 20,
"minWireVersion" : 0,
"maxWireVersion" : 8,
"readOnly" : false,
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1657432711, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
},
"operationTime" : Timestamp(1657432711, 1)
}
4. 更改副本集配置
// 添加副本集
study:PRIMARY> rs.add('localhost:27021')
// 移除副本集
study:PRIMARY> rs.remove('localhost:27018')
// 查看配置是否生效
study:PRIMARY> rs.config()
{
"_id" : "study",
"version" : 3,
"protocolVersion" : NumberLong(1),
"writeConcernMajorityJournalDefault" : true,
"members" : [
{
"_id" : 1,
"host" : "localhost:27019",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 1,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 2,
"host" : "localhost:27020",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 1,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 3,
"host" : "localhost:27021",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 1,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
}
],
"settings" : {
"chainingAllowed" : true,
"heartbeatIntervalMillis" : 2000,
"heartbeatTimeoutSecs" : 10,
"electionTimeoutMillis" : 10000,
"catchUpTimeoutMillis" : -1,
"catchUpTakeoverDelayMillis" : 30000,
"getLastErrorModes" : {
},
"getLastErrorDefaults" : {
"w" : 1,
"wtimeout" : 0
},
"replicaSetId" : ObjectId("62ca6557fe65de5513877e3c")
}
}
# 修改配置
study:PRIMARY> var config = rs.config()
study:PRIMARY> config.members[0].host = 'localhost:27019'
localhost:27019
5. 如何设计副本集
大多数:选取主节点时需要由大多数决定,主节点只有在得到大多数支持时才能继续作为主节点,写操作被复制到大多数成员时就是安全的写操作。这里的大多数定义为“副本集中一半以上的成员”,由此可看一般设置为奇数,如3个节点
6. 如何进行选举
MongoDB在3.2版本中引入了第一版复制协议。基于RAFT共识协议,并且包含一些特定于MongoDB的副本集概念,比如仲裁节点、优先级、非选举成员、写入关注点等。它还通过term ID来防止重复投票。
副本集成员间每隔两秒发送一次心跳(ping),如果某个成员10秒内没有反馈心跳,则其他成员将不良成员标记为无法访问。选举算法尽最大努力尝试让最高优先权的从节点发起选举,也更可能成为主节点。
7. 成员配置选项
7.1 优先级
priority优先级为0的成员永远不能成为主节点。这样的成员被称为被动成员。
study:PRIMARY> rs.add({'host': 'localhost:27018', 'priority': 1.5})
// 停掉主节点,然后查看新选取的主节点,可以看到localhost:27018已为主节点
7.2 隐藏成员
study:PRIMARY> rs.isMaster()
{
"hosts" : [
"localhost:27019",
"localhost:27020",
"localhost:27021",
"localhost:27018"
]
}
study:PRIMARY> var config = rs.config()
study:PRIMARY> config.members[2].hidden=true
true
study:PRIMARY> config.members[2].priority=0
0
study:PRIMARY> rs.reconfig(config)
{
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1657435250, 2),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
},
"operationTime" : Timestamp(1657435250, 2)
}
# 再次执行可以看到localhost:27021已隐藏
study:PRIMARY> rs.isMaster()
{
"hosts" : [
"localhost:27019",
"localhost:27020",
"localhost:27018"
]
}
使用rs.status()和rs.config()能够看到隐藏成员,隐藏成员对isMaster()不可见。当客户端连接到副本集时,会调用isMaster来查看副本集中的成员。因此,隐藏成员永远不会收到客户端的读请求
7.3 选举仲裁者
mongod --replSet study --dbpath /home/data/arb --port 27022 --oplogSize 200
study:PRIMARY> rs.addArb('localhost:27022')
成员一旦已仲裁者身份被添加到副本集,它就永远只能是仲裁者,无法将仲裁者重新配置为非仲裁者
- 最多只能使用使用一个仲裁者 如果节点数量为奇数,那就不需要仲裁者
- 如果数据集有两个普通成员与一个仲裁者,如果一个数据成员停止运行了(数据无法恢复),呢么就需要一个新的从节点,呢么主节点的数据副本复制到新的从节点服务器压力会很大。仲裁者毕竟不存储数据副本,只仲裁~~
7.4 创建索引
如果从节点仅仅备份数据或脱机批量处理作业,则可以指定buildIndexes: false,此选项防止从节点创建任何索引。这也是永久性设置,不可重新配置为普通的创建索引的成员,同样要求优先级为0
欢迎关注公众号算法小生或沈健的技术博客shenjian.online