tidb4.0.0 kv 突然leader数量降为0后再无leader分配
前提:没有做任何操作的情况下kvleader消失
1、问题描述:没有做缩容的操作,kv突然leader就降为0了
分析过程:
1.1、查看store状态:“leader_count”: 0,
{
"store": {
"id": 223229,
"address": "192.168.192.27:20160",
"version": "4.0.0",
"status_address": "192.168.192.27:20180",
"git_hash": "198a2cea01734ce8f46d55a29708f123f9133944",
"start_timestamp": 1596157805,
"last_heartbeat": 1596437562910927155,
"state_name": "Up"
},
"status": {
"capacity": "388.2GiB",
"available": "362.3GiB",
"used_size": "18.05GiB",
"leader_count": 0,
"leader_weight": 1,
"leader_score": 0,
"leader_size": 0,
"region_count": 3644,
"region_weight": 1,
"region_score": 95623,
"region_size": 95623,
"start_ts": "2020-07-31T09:10:05+08:00",
"last_heartbeat_ts": "2020-08-03T14:52:42.910927155+08:00",
"uptime": "77h42m37.910927155s"
}
1.2、查看集群config情况
tiup ctl pd -u http://192.168.192.32:2379 config show all
Starting component `ctl`: pd -u http://192.168.192.32:2379 config show all
{
"client-urls": "http://0.0.0.0:2379",
"peer-urls": "http://192.168.192.32:2380",
"advertise-client-urls": "http://192.168.192.32:2379",
"advertise-peer-urls": "http://192.168.192.32:2380",
"name": "pd_huirui-32",
"data-dir": "/home/tidb/deploy/data.pd",
"force-new-cluster": false,
"enable-grpc-gateway": true,
"initial-cluster": "pd_huirui-31=http://192.168.192.31:2380,pd_huirui-32=http://192.168.192.32:2380,pd_huirui-33=http://192.168.192.33:2380",
"initial-cluster-state": "new",
"join": "",
"lease": 3,
"log": {
"level": "info",
"format": "text",
"disable-timestamp": false,
"file": {
"filename": "/home/tidb/deploy/log/pd.log",
"max-size": 300,
"max-days": 0,
"max-backups": 0
},
"development": false,
"disable-caller": false,
"disable-stacktrace": false,
"disable-error-verbose": true,
"sampling": null
},
"tso-save-interval": "3s",
"metric": {
"job": "pd_huirui-32",
"address": "",
"interval": "15s"
},
"schedule": {
"max-snapshot-count": 3,
"max-pending-peer-count": 16,
"max-merge-region-size": 20,
"max-merge-region-keys": 200000,
"split-merge-interval": "1h0m0s",
"enable-one-way-merge": "false",
"enable-cross-table-merge": "false",
"patrol-region-interval": "100ms",
"max-store-down-time": "30m0s",
"leader-schedule-limit": 4,
"leader-schedule-policy": "count",
"region-schedule-limit": 4,
"replica-schedule-limit": 8,
"merge-schedule-limit": 8,
"hot-region-schedule-limit": 4,
"hot-region-cache-hits-threshold": 3,
"store-balance-rate": 15,
"tolerant-size-ratio": 5,
"low-space-ratio": 0.8,
"high-space-ratio": 0.6,
"scheduler-max-waiting-operator": 3,
"enable-remove-down-replica": "true",
"enable-replace-offline-replica": "true",
"enable-make-up-replica": "true",
"enable-remove-extra-replica": "true",
"enable-location-replacement": "true",
"enable-debug-metrics": "false",
"schedulers-v2": [
{
"type": "balance-region",
"args": null,
"disable": false,
"args-payload": ""
},
{
"type": "balance-leader",
"args": null,
"disable": false,
"args-payload": ""
},
{
"type": "hot-region",
"args": null,
"disable": false,
"args-payload": ""
},
{
"type": "label",
"args": null,
"disable": false,
"args-payload": ""
},
{
"type": "evict-leader",
"args": [
"223229"
],
"disable": false,
"args-payload": ""
}
],
"schedulers-payload": {
"balance-hot-region-scheduler": "null",
"balance-leader-scheduler": "{\"name\":\"balance-leader-scheduler\",\"ranges\":[{\"start-key\":\"\",\"end-key\":\"\"}]}",
"balance-region-scheduler": "{\"name\":\"balance-region-scheduler\",\"ranges\":[{\"start-key\":\"\",\"end-key\":\"\"}]}",
"evict-leader-scheduler": "{\"store-id-ranges\":{\"223229\":[{\"start-key\":\"\",\"end-key\":\"\"}]}}",
"label-scheduler": "{\"name\":\"label-scheduler\",\"ranges\":[{\"start-key\":\"\",\"end-key\":\"\"}]}"
},
"store-limit-mode": "manual"
},
"replication": {
"max-replicas": 3,
"location-labels": "",
"strictly-match-label": "false",
"enable-placement-rules": "false"
},
"pd-server": {
"use-region-storage": "true",
"max-gap-reset-ts": "24h0m0s",
"key-type": "table",
"runtime-services": "",
"metric-storage": "http://192.168.192.33:9090",
"dashboard-address": "http://192.168.192.33:2379"
},
"cluster-version": "4.0.0",
"quota-backend-bytes": "8GiB",
"auto-compaction-mode": "periodic",
"auto-compaction-retention-v2": "1h",
"TickInterval": "500ms",
"ElectionInterval": "3s",
"PreVote": true,
"security": {
"cacert-path": "",
"cert-path": "",
"key-path": "",
"cert-allowed-cn": null
},
"label-property": {},
"WarningMsgs": [
"Config contains undefined item: namespace-classifier"
],
"DisableStrictReconfigCheck": false,
"HeartbeatStreamBindInterval": "1m0s",
"LeaderPriorityCheckInterval": "1m0s",
"dashboard": {
"tidb_cacert_path": "",
"tidb_cert_path": "",
"tidb_key_path": "",
"public_path_prefix": "/dashboard"
},
"replication-mode": {
"replication-mode": "majority",
"dr-auto-sync": {
"label-key": "",
"primary": "",
"dr": "",
"primary-replicas": 0,
"dr-replicas": 0,
"wait-store-timeout": "1m0s",
"wait-sync-timeout": "1m0s"
}
}
}
1.3、执行scheduler add/remove evict-leader-scheduler-1:添加/移除 Store 1 的所有 Leader 的调度器,如果显示Success那么leader就会恢复回来
[tidb@back-paas ~]$ tiup ctl pd -u http://192.168.192.32:2379 scheduler remove evict-leader-scheduler-223229
Starting component `ctl`: pd -u http://192.168.192.32:2379 scheduler remove evict-leader-scheduler-223229
Success!
TIDB4.0.5执行的方式:
[tidb@back-paas ~]$ tiup ctl pd -u http://192.168.192.32:2379 scheduler remove evict-leader-scheduler --key "223229"
Starting component `ctl`: pd -u http://192.168.192.32:2379 scheduler remove evict-leader-scheduler --key "223229"
Success!
2、tiup 遇到报错 Error: manifest has expired at xxx
注意:只是联网的环境,离线环境不存在
[tidb@test4 ~]$ tiup list tidb
Error: failed to fetch component: manifest has expired at: 2020-08-23T15:54:02+08:00
[原因分析]
原因是用户本地的 manifest 签名有过期时间,每次用户执行tiup list 会更新这个 manifests 所以经常用不会有问题,但是长时间不用没更新本地的就过期了
[解决方案]
执行 rm ~/.tiup/manifests/* 再重新使用.
之后的版本 tiup 会修复此问题。