由于各种奇葩的原因 导致一个集群中只剩下一个SECONDARY节点,此时该如何快速处理 让DB能尽快对外提供服务?
如下是一次采用强制修改repletset vote属性 提升SECONDARY为PRIMARY的过程
###############
目前架构 1主2从
###############
repset:SECONDARY> rs.status()rs.status()
{
"set" : "repset",
"date" : ISODate("2017-07-08T07:36:09.833Z"),
"myState" : 2,
"members" : [
{
"_id" : 0,
"name" : "192.168.16.130:10000",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 22,
"optime" : Timestamp(1499488823, 2),
"optimeDate" : ISODate("2017-07-08T04:40:23Z"),
"lastHeartbeat" : ISODate("2017-07-08T07:36:09.337Z"),
"lastHeartbeatRecv" : ISODate("2017-07-08T07:36:09.378Z"),
"pingMs" : 0,
"electionTime" : Timestamp(1499499337, 1),
"electionDate" : ISODate("2017-07-08T07:35:37Z"),
"configVersion" : 1
},
{
"_id" : 1,
"name" : "192.168.16.130:10001",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 22,
"optime" : Timestamp(1499488823, 2),
"optimeDate" : ISODate("2017-07-08T04:40:23Z"),
"lastHeartbeat" : ISODate("2017-07-08T07:36:09.337Z"),
"lastHeartbeatRecv" : ISODate("2017-07-08T07:36:09.587Z"),
"pingMs" : 0,
"configVersion" : 1
},
{
"_id" : 2,
"name" : "192.168.16.130:10002",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 22,
"optime" : Timestamp(1499488823, 2),
"optimeDate" : ISODate("2017-07-08T04:40:23Z"),
"configVersion" : 1,
"self" : true
}
],
"ok" : 1
}
repset:SECONDARY>
mongodb 9895 1 0 12:59 ? 00:01:01 /comm/mongodb/3.0/bin/mongod --fork --dbpath /comm/mg10002/data --logpath=/comm/mg10002/log/mg10002.log --port 10002 --replSet repset
mongodb 10842 1 0 14:35 ? 00:00:10 /comm/mongodb/3.0/bin/mongod --fork --dbpath /comm/mg10000/data --logpath=/comm/mg10000/log/mg10000.log --port 10000 --replSet repset
mongodb 10897 1 0 14:35 ? 00:00:10 /comm/mongodb/3.0/bin/mongod --fork --dbpath /comm/mg10001/data --logpath=/comm/mg10001/log/mg10001.log --port 10001 --replSet repset
####################################
模拟故障现象 先停掉一个从mg10002,再停掉主mg10000
####################################
repset:SECONDARY> rs.status()rs.status()
{
"set" : "repset",
"date" : ISODate("2017-07-08T07:38:02.359Z"),
"myState" : 2,
"members" : [
{
"_id" : 0,
"name" : "192.168.16.130:10000",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : Timestamp(0, 0),
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-07-08T07:38:01.690Z"),
"lastHeartbeatRecv" : ISODate("2017-07-08T07:37:03.428Z"),
"pingMs" : 0,
"lastHeartbeatMessage" : "Failed attempt to connect to 192.168.16.130:10000; couldn't connect to server 192.168.16.130:10000 (192.168.16.130), connection attempt failed",
"configVersion" : -1
},
{
"_id" : 1,
"name" : "192.168.16.130:10001",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 147,
"optime" : Timestamp(1499488823, 2),
"optimeDate" : ISODate("2017-07-08T04:40:23Z"),
"configVersion" : 1,
"self" : true
},
{
"_id" : 2,
"name" : "192.168.16.130:10002",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : Timestamp(0, 0),
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-07-08T07:38:01.690Z"),
"lastHeartbeatRecv" : ISODate("2017-07-08T07:36:49.362Z"),
"pingMs" : 0,
"lastHeartbeatMessage" : "Failed attempt to connect to 192.168.16.130:10002; couldn't connect to server 192.168.16.130:10002 (192.168.16.130), connection attempt failed",
"configVersion" : -1
}
],
"ok" : 1
}
repset:SECONDARY>
########################
现在只剩下一个为secondary的10001
########################
--修改故障节点10000 10001的vote属性为0 不参与选举 保留存活的secondary节点vote属性
################################
use local
db.system.replset.find()
db.system.replset.remove({"_id":"repset"});
db.system.replset.insert({ "_id" : "repset", "version" : 1, "members" : [
{ "_id" : 0, "host" : "192.168.16.130:10000", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 0 },
{ "_id" : 1, "host" : "192.168.16.130:10001", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 1 },
{ "_id" : 2, "host" : "192.168.16.130:10002", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 0 } ],
"settings" : { "chainingAllowed" : true, "heartbeatTimeoutSecs" : 10, "getLastErrorModes" : { }, "getLastErrorDefaults" : { "w" : 1, "wtimeout" : 0 } } });
###############################
repset:SECONDARY> use localuse local
switched to db local
repset:SECONDARY> db.system.replset.find()db.system.replset.find()
{ "_id" : "repset", "version" : 1, "members" : [ { "_id" : 0, "host" : "192.168.16.130:10000", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 1 }, { "_id" : 1, "host" : "192.168.16.130:10001", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 1 }, { "_id" : 2, "host" : "192.168.16.130:10002", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 1 } ], "settings" : { "chainingAllowed" : true, "heartbeatTimeoutSecs" : 10, "getLastErrorModes" : { }, "getLastErrorDefaults" : { "w" : 1, "wtimeout" : 0 } } }
repset:SECONDARY>
repset:SECONDARY> db.system.replset.remove({"_id":"repset"});db.system.replset.remove({"_id":"repset"});
WriteResult({ "nRemoved" : 1 })
repset:SECONDARY>
.......
m.replset.insert({ "_id" : "repset", "version" :db.system.replset.insert({ "_id" : "repset", "version" : db.system.replset.insert({ "_id" : "repset", "version" : 1db.system.replset.insert({ "_id" : "repset", "version" : 1, "members" : [ { "_id" : 0, "host" : "192.168.16.130:10000", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 0 }, { "_id" : 1, "host" : "192.168.16.130:10001", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 1 }, { "_id" : 2, "host" : "192.168.16.130:10002", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 0 } ], "settings" : { "chainingAllowed" : true, "heartbeatTimeoutSecs" : 10, "getLastErrorModes" : { }, "getLastErrorDefaults" : { "w" : 1, "wtimeout" : 0 } } });
WriteResult({ "nInserted" : 1 })
repset:SECONDARY> exitexit
bye
######################
重启存活的节点 已经转换为primary
######################
[mongodb@node1 ~]$ mongod --port ${PORT} --shutdown --dbpath=/comm/mg${PORT}/data
killing process with pid: 12349
[mongodb@node1 ~]$ $MONGODB_HOME/bin/mongod --fork --dbpath /comm/mg${PORT}/data --logpath=/comm/mg${PORT}/log/mg${PORT}.log --port ${PORT} --replSet repset
about to fork child process, waiting until server is ready for connections.
forked process: 12544
child process started successfully, parent exiting
[mongodb@node1 ~]$ $MONGODB_HOME/bin/mongo --port ${PORT}
MongoDB shell version: 3.0.15-9-g0513806
connecting to: 127.0.0.1:10001/test
Server has startup warnings:
repset:PRIMARY> rs.status()rs.status()
{
"set" : "repset",
"date" : ISODate("2017-07-08T07:41:50.053Z"),
"myState" : 1,
"members" : [
{
"_id" : 0,
"name" : "192.168.16.130:10000",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : Timestamp(0, 0),
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-07-08T07:41:49.137Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"configVersion" : -1
},
{
"_id" : 1,
"name" : "192.168.16.130:10001",
"health" : 1,
"state" : 1,
"stateStr" : " PRIMARY",
"uptime" : 14,
"optime" : Timestamp(1499488823, 2),
"optimeDate" : ISODate("2017-07-08T04:40:23Z"),
"electionTime" : Timestamp(1499499699, 1),
"electionDate" : ISODate("2017-07-08T07:41:39Z"),
"configVersion" : 1,
"self" : true
},
{
"_id" : 2,
"name" : "192.168.16.130:10002",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : Timestamp(0, 0),
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-07-08T07:41:49.137Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"configVersion" : -1
}
],
"ok" : 1
}
repset:PRIMARY>
#############
后续将有问题的节点重新搭建即可
#############
如下是一次采用强制修改repletset vote属性 提升SECONDARY为PRIMARY的过程
###############
目前架构 1主2从
###############
repset:SECONDARY> rs.status()rs.status()
{
"set" : "repset",
"date" : ISODate("2017-07-08T07:36:09.833Z"),
"myState" : 2,
"members" : [
{
"_id" : 0,
"name" : "192.168.16.130:10000",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 22,
"optime" : Timestamp(1499488823, 2),
"optimeDate" : ISODate("2017-07-08T04:40:23Z"),
"lastHeartbeat" : ISODate("2017-07-08T07:36:09.337Z"),
"lastHeartbeatRecv" : ISODate("2017-07-08T07:36:09.378Z"),
"pingMs" : 0,
"electionTime" : Timestamp(1499499337, 1),
"electionDate" : ISODate("2017-07-08T07:35:37Z"),
"configVersion" : 1
},
{
"_id" : 1,
"name" : "192.168.16.130:10001",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 22,
"optime" : Timestamp(1499488823, 2),
"optimeDate" : ISODate("2017-07-08T04:40:23Z"),
"lastHeartbeat" : ISODate("2017-07-08T07:36:09.337Z"),
"lastHeartbeatRecv" : ISODate("2017-07-08T07:36:09.587Z"),
"pingMs" : 0,
"configVersion" : 1
},
{
"_id" : 2,
"name" : "192.168.16.130:10002",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 22,
"optime" : Timestamp(1499488823, 2),
"optimeDate" : ISODate("2017-07-08T04:40:23Z"),
"configVersion" : 1,
"self" : true
}
],
"ok" : 1
}
repset:SECONDARY>
mongodb 9895 1 0 12:59 ? 00:01:01 /comm/mongodb/3.0/bin/mongod --fork --dbpath /comm/mg10002/data --logpath=/comm/mg10002/log/mg10002.log --port 10002 --replSet repset
mongodb 10842 1 0 14:35 ? 00:00:10 /comm/mongodb/3.0/bin/mongod --fork --dbpath /comm/mg10000/data --logpath=/comm/mg10000/log/mg10000.log --port 10000 --replSet repset
mongodb 10897 1 0 14:35 ? 00:00:10 /comm/mongodb/3.0/bin/mongod --fork --dbpath /comm/mg10001/data --logpath=/comm/mg10001/log/mg10001.log --port 10001 --replSet repset
####################################
模拟故障现象 先停掉一个从mg10002,再停掉主mg10000
####################################
repset:SECONDARY> rs.status()rs.status()
{
"set" : "repset",
"date" : ISODate("2017-07-08T07:38:02.359Z"),
"myState" : 2,
"members" : [
{
"_id" : 0,
"name" : "192.168.16.130:10000",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : Timestamp(0, 0),
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-07-08T07:38:01.690Z"),
"lastHeartbeatRecv" : ISODate("2017-07-08T07:37:03.428Z"),
"pingMs" : 0,
"lastHeartbeatMessage" : "Failed attempt to connect to 192.168.16.130:10000; couldn't connect to server 192.168.16.130:10000 (192.168.16.130), connection attempt failed",
"configVersion" : -1
},
{
"_id" : 1,
"name" : "192.168.16.130:10001",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 147,
"optime" : Timestamp(1499488823, 2),
"optimeDate" : ISODate("2017-07-08T04:40:23Z"),
"configVersion" : 1,
"self" : true
},
{
"_id" : 2,
"name" : "192.168.16.130:10002",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : Timestamp(0, 0),
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-07-08T07:38:01.690Z"),
"lastHeartbeatRecv" : ISODate("2017-07-08T07:36:49.362Z"),
"pingMs" : 0,
"lastHeartbeatMessage" : "Failed attempt to connect to 192.168.16.130:10002; couldn't connect to server 192.168.16.130:10002 (192.168.16.130), connection attempt failed",
"configVersion" : -1
}
],
"ok" : 1
}
repset:SECONDARY>
########################
现在只剩下一个为secondary的10001
########################
--修改故障节点10000 10001的vote属性为0 不参与选举 保留存活的secondary节点vote属性
################################
use local
db.system.replset.find()
db.system.replset.remove({"_id":"repset"});
db.system.replset.insert({ "_id" : "repset", "version" : 1, "members" : [
{ "_id" : 0, "host" : "192.168.16.130:10000", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 0 },
{ "_id" : 1, "host" : "192.168.16.130:10001", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 1 },
{ "_id" : 2, "host" : "192.168.16.130:10002", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 0 } ],
"settings" : { "chainingAllowed" : true, "heartbeatTimeoutSecs" : 10, "getLastErrorModes" : { }, "getLastErrorDefaults" : { "w" : 1, "wtimeout" : 0 } } });
###############################
repset:SECONDARY> use localuse local
switched to db local
repset:SECONDARY> db.system.replset.find()db.system.replset.find()
{ "_id" : "repset", "version" : 1, "members" : [ { "_id" : 0, "host" : "192.168.16.130:10000", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 1 }, { "_id" : 1, "host" : "192.168.16.130:10001", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 1 }, { "_id" : 2, "host" : "192.168.16.130:10002", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 1 } ], "settings" : { "chainingAllowed" : true, "heartbeatTimeoutSecs" : 10, "getLastErrorModes" : { }, "getLastErrorDefaults" : { "w" : 1, "wtimeout" : 0 } } }
repset:SECONDARY>
repset:SECONDARY> db.system.replset.remove({"_id":"repset"});db.system.replset.remove({"_id":"repset"});
WriteResult({ "nRemoved" : 1 })
repset:SECONDARY>
.......
m.replset.insert({ "_id" : "repset", "version" :db.system.replset.insert({ "_id" : "repset", "version" : db.system.replset.insert({ "_id" : "repset", "version" : 1db.system.replset.insert({ "_id" : "repset", "version" : 1, "members" : [ { "_id" : 0, "host" : "192.168.16.130:10000", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 0 }, { "_id" : 1, "host" : "192.168.16.130:10001", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 1 }, { "_id" : 2, "host" : "192.168.16.130:10002", "arbiterOnly" : false, "buildIndexes" : true, "hidden" : false, "priority" : 1, "tags" : { }, "slaveDelay" : 0, "votes" : 0 } ], "settings" : { "chainingAllowed" : true, "heartbeatTimeoutSecs" : 10, "getLastErrorModes" : { }, "getLastErrorDefaults" : { "w" : 1, "wtimeout" : 0 } } });
WriteResult({ "nInserted" : 1 })
repset:SECONDARY> exitexit
bye
######################
重启存活的节点 已经转换为primary
######################
[mongodb@node1 ~]$ mongod --port ${PORT} --shutdown --dbpath=/comm/mg${PORT}/data
killing process with pid: 12349
[mongodb@node1 ~]$ $MONGODB_HOME/bin/mongod --fork --dbpath /comm/mg${PORT}/data --logpath=/comm/mg${PORT}/log/mg${PORT}.log --port ${PORT} --replSet repset
about to fork child process, waiting until server is ready for connections.
forked process: 12544
child process started successfully, parent exiting
[mongodb@node1 ~]$ $MONGODB_HOME/bin/mongo --port ${PORT}
MongoDB shell version: 3.0.15-9-g0513806
connecting to: 127.0.0.1:10001/test
Server has startup warnings:
repset:PRIMARY> rs.status()rs.status()
{
"set" : "repset",
"date" : ISODate("2017-07-08T07:41:50.053Z"),
"myState" : 1,
"members" : [
{
"_id" : 0,
"name" : "192.168.16.130:10000",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : Timestamp(0, 0),
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-07-08T07:41:49.137Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"configVersion" : -1
},
{
"_id" : 1,
"name" : "192.168.16.130:10001",
"health" : 1,
"state" : 1,
"stateStr" : " PRIMARY",
"uptime" : 14,
"optime" : Timestamp(1499488823, 2),
"optimeDate" : ISODate("2017-07-08T04:40:23Z"),
"electionTime" : Timestamp(1499499699, 1),
"electionDate" : ISODate("2017-07-08T07:41:39Z"),
"configVersion" : 1,
"self" : true
},
{
"_id" : 2,
"name" : "192.168.16.130:10002",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : Timestamp(0, 0),
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-07-08T07:41:49.137Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"configVersion" : -1
}
],
"ok" : 1
}
repset:PRIMARY>
#############
后续将有问题的节点重新搭建即可
#############
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/26526320/viewspace-2141768/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/26526320/viewspace-2141768/