1、部署zookeeper
# 节点192
mkdir -p /data/zookeeper/{data,conf,log}
#zookeeper配置文件
cat > /data/zookeeper/conf/zoo.cfg << 'EOF'
#4lw.commands.whitelist=*
4lw.commands.whitelist=mntr,ruok
clientPort=2181
dataDir=/data/zookeeper/data
dataLogDir=/data/zookeeper/log
tickTime=2000
initLimit=5
syncLimit=2
autopurge.snapRetainCount=3
autopurge.purgeInterval=0
maxClientCnxns=60
EOF
cat > /data/zookeeper/start.sh << 'EOF'
docker run -d \
--network host \
--restart=always \
-v /data/zookeeper/data:/data/zookeeper/data \
-v /data/zookeeper/conf:/conf \
-v /etc/localtime:/etc/localtime \
--name zookeeper \
zookeeper:3.6.3
EOF
bash /data/zookeeper/start.sh
2、 zookeeper_exporter
#rabbitmq_exporter监控:
mkdir /data/zookeeper_exporter/ -p
cat >/data/zookeeper_exporter/start.sh<< 'EOF'
docker rm -f zookeeper_exporter
docker run -d \
--name zookeeper_exporter \
--restart=always \
-v /etc/localtime:/etc/localtime \
-p 9141:9141 \
dabealu/zookeeper-exporter \
--zk-hosts="192.168.11.192:2181,192.168.11.193:2181,192.168.11.194:2181"
EOF
bash /data/zookeeper_exporter/start.sh
3、配置prometheus
#添加自动发现脚本
cat >> /data/prometheus/conf/prometheus.yml << 'EOF'
#zookeeper自动发现
- job_name: 'zookeeper'
file_sd_configs:
- files:
- /etc/prometheus/sd_config/zookeeper.yaml
refresh_interval: 5s
relabel_configs:
- source_labels: [__address__]
regex: (.*)
target_label: instance
replacement: $1
- source_labels: [__address__]
regex: (.*):(.*)
target_label: __address__
replacement: $1:9141
EOF
#自动发现配置
cat >> /data/prometheus/conf/sd_config/zookeeper.yaml << 'EOF'
#zookeeper自动发现
- labels:
project: 民生zookeeper
targets:
- 192.168.11.192:2181
EOF
访问 http://192.168.11.221:9090
zk_up
4、配置grafana
grafanaid: 11442
5、警报规则
cat > /data/prometheus/conf/rules/zookeeper.rules << 'EOF'
groups:
- name: zookeeper-监控告警
rules:
- alert: 告警! Zookeeper宕机
expr: zk_up == 0
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Zookeeper宕机"
description: "Zookeeper服务宕机\n 当前值 = {{ $value }}"
- alert: 告警! Zookeeper丢失主节点
expr: sum(zk_server_leader) == 0
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Zookeeper丢失主节点"
description: "Zookeeper当前没有主节点\n 当前值 = {{ $value }}"
- alert: 告警! Zookeeper脑裂,多主节点
expr: sum(zk_server_leader) > 1
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Zookeeper脑裂,多主节点"
description: "Zookeeper当前太多主节点\n 当前值 = {{ $value }}"
- alert: 告警! Zookeeper不OK
expr: zk_ruok == 0
for: 3m
labels:
severity: 一般告警
annotations:
summary: "{{ $labels.instance }} Zookeeper不OK"
description: "Zookeeper实例不正常\n 当前值 = {{ $value }}"
EOF