基于Docker部署Prometheus+Grafana
文章目录
一、Docker部署Prometheus
1.1 启动node-exporter
docker run -d --name node-exporter --restart always --network host \
--volume /etc/localtime:/etc/localtime:ro \
--volume /proc:/host/proc:ro \
--volume /sys:/host/sys:ro \
--volume /:/rootfs:ro \
prom/node-exporter:v1.6.0 \
--path.procfs=/host/proc \
--path.sysfs=/host/sys \
--collector.filesystem.ignored-mount-points="^/(sys|proc|dev|host|etc|rootfs/var/lib/docker)($$|/)"
1.2 启动alertmanager
# 创建目录
mkdir /root/alertmanager
# 创建配置文件
vim /root/alertmanager/config.yml
global:
#旧的告警3分钟没有更新,则认为告警解决
resolve_timeout: 3m
route:
group_by: ['alertname'] #根据告警规则组名进行分组
group_wait: 0s #在组内等待所配置的时间,如果同组内,30秒内出现相同报警,在一个组内出现
group_interval: 10s #控制告警组的发送频率,一条告警消息发送后,等待10秒,发送第二组告警
repeat_interval: 1h #发送报警间隔,如果指定时间内没有修复,则重新发送报警
receiver: 'web.hook'
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://192.168.1.201:8060/dingtalk/webhook1/send'
send_resolved: true # 表示服务恢复后会收到恢复告警
inhibit_rules: #配置了仰制告警
- source_match:
severity: 'error'
target_match:
severity: 'warn'
equal: ['alertname', 'target', 'job', 'instance']
- source_match:
severity: 'warn'
target_match:
severity: 'info'
equal: ['alertname', 'target', 'job', 'instance']
# 运行容器
docker run -d --name alertmanager \
--restart always \
--network host \
--volume /etc/localtime:/etc/localtime:ro \
--volume /root/alertmanager/:/etc/alertmanager/ \
prom/alertmanager:v0.25.0 \
--config.file=/etc/alertmanager/config.yml \
--storage.path=/alertmanager
1.3 启动black-exporter
docker run -dit -p 9115:9115 \
--restart always \
--name blackbox-exporter \
bitnami/blackbox-exporter:latest
1.4 启动prometheus-webhook-dingtalk
# 创建目录
mkdir /root/prometheus-webhook-dingtalk
# 编辑配置文件
vim /root/prometheus-webhook-dingtalk/config.yml
templates:
- /etc/prometheus-webhook-dingtalk/templates/template.tmpl
targets: #配置多个接收方
webhook1:
url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxx #钉钉机器人token
secret: xxxx #钉钉机器人加签
message:
text: '{{ template "email.to.message" . }}'
# 创建告警模板
vim /root/prometheus-webhook-dingtalk/templates/template.tmpl
{{ define "email.to.message" }}
{{- if gt (len .Alerts.Firing) 0 -}}
{{- range $index, $alert := .Alerts -}}
<font color=#FF0000>========= **监控告警** ========= </font>
**告警程序:** Alertmanager
**告警类型:** {{ $alert.Labels.alertname }}
**告警级别:** {{ $alert.Labels.severity }} 级
**告警状态:** {{ .Status }}
**故障主机:** {{ $alert.Labels.instance }} {{ $alert.Labels.device }}
**告警主题:** {{ .Annotations.summary }}
**告警详情:** {{ $alert.Annotations.message }}{{ $alert.Annotations.description}}
**主机标签:** {{ range .Labels.SortedPairs }} </br> [{{ .Name }}: {{ .Value | markdown | html }} ]
{{- end }} </br>
**故障时间:** {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
<font color=#FF0000>========= = **end** = ========= </font>
{{- end }}
{{- end }}
{{- if gt (len .Alerts.Resolved) 0 -}}
{{- range $index, $alert := .Alerts -}}
<font color=#00800>========= **告警恢复** ========= </font>
**告警程序:** Alertmanager
**告警主题:** {{ $alert.Annotations.summary }}
**告警主机:** {{ .Labels.instance }}
**告警类型:** {{ .Labels.alertname }}
**告警级别:** {{ $alert.Labels.severity }} 级
**告警状态:** {{ .Status }}
**告警详情:** {{ $alert.Annotations.message }}{{ $alert.Annotations.description}}
**故障时间:** {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
**恢复时间:** {{ ($alert.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
<font color=#00800>========= = **end** = ========= </font>
{{- end }}
{{- end }}
{{- end }}
# 授权告警模板
chmod 777 /root/prometheus-webhook-dingtalk/templates/template.tmpl
# 运行容器
docker run -d -p 8060:8060 --name=dingding \
--restart always \
--network host \
-v /root/prometheus-webhook-dingtalk:/etc/prometheus-webhook-dingtalk \
timonwong/prometheus-webhook-dingtalk
1.5 启动prometheus
# 创建prometheus数据目录
mkdir -p /root/prometheus/data
chmod 777 /root/prometheus/data
# 创建配置文件
vim /root/prometheus/prometheus.yml
#全局配置
global:
scrape_interval: 15s # 将搜刮间隔设置为每15秒一次。默认是每1分钟一次。
evaluation_interval: 15s # 每15秒评估一次规则。默认是每1分钟一次。
#Alertmanager 配置
alerting:
alertmanagers:
- static_configs:
- targets: ['192.168.1.201:9093']
#报警(触发器)配置
rule_files:
- "rules/*.yml"
#搜刮配置
scrape_configs:
- job_name: 'prometheus'
#覆盖全局默认值,每15秒从该作业中刮取一次目标
scrape_interval: 15s
static_configs:
- targets: ['192.168.1.201:9090']
- job_name: 'alertmanager'
scrape_interval: 15s
static_configs:
- targets: ['192.168.1.201:9093']
- job_name: 'Prometheus-server'
scrape_interval: 15s
static_configs:
- targets: ['192.168.1.201:9100']
labels:
instance: Prometheus-server
#http配置
- job_name: "blackbox_http"
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- http://www.baidu.com
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.1.201:9115
#tcp检查配置
- job_name: "blackbox_tcp"
metrics_path: /probe
params:
module: [tcp_connect]
static_configs:
- targets:
- 192.168.1.201:3306
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.1.201:9115
# 启动容器
docker run -d --name prometheus \
--restart always \
--network host \
--volume /etc/localtime:/etc/localtime:ro \
--volume /root/prometheus/:/etc/prometheus/ \
--volume /root/prometheus/data:/prometheus \
prom/prometheus:v2.37.6 \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/prometheus \
--web.console.libraries=/usr/share/prometheus/console_libraries \
--web.console.templates=/usr/share/prometheus/consoles \
--web.enable-lifecycle \
--web.enable-admin-api \
--storage.tsdb.retention.time=30d
1.6 页面访问
http://192.168.1.201:9090 # 改为自己主机的ip
二、Docker部署Grafana
2.1 启动grafana
# 创建数据目录
mkdir -p /root/grafana/data
chmod 777 /root/grafana/data
# 创建包含grafana配置信息的文件
cat <<-EOF > /root/grafana/config.monitoring
ECURITY_ADMIN_PASSWORD=admin123
GF_USERS_ALLOW_SIGN_UP=false
EOF
# 运行grafana容器
docker run -d --name grafana --restart always \
--network host \
--volume /etc/localtime:/etc/localtime:ro \
--volume /root/grafana/data:/var/lib/grafana \
--volume /root/grafana/provisioning/:/etc/grafana/provisioning/ \
--env-file /root/grafana/config.monitoring \
grafana/grafana:10.4.6-ubuntu
2.2 页面访问
http://192.168.1.201:3000 # 改为自己主机的ip
总结
以上就是基于Docker部署Prometheus+Grafana的实践。