基于Docker部署Prometheus+Grafana

基于Docker部署Prometheus+Grafana



一、Docker部署Prometheus

1.1 启动node-exporter

docker run -d --name node-exporter --restart always --network host \
--volume /etc/localtime:/etc/localtime:ro \
--volume /proc:/host/proc:ro \
--volume /sys:/host/sys:ro \
--volume /:/rootfs:ro \
prom/node-exporter:v1.6.0 \
--path.procfs=/host/proc \
--path.sysfs=/host/sys \
--collector.filesystem.ignored-mount-points="^/(sys|proc|dev|host|etc|rootfs/var/lib/docker)($$|/)"

1.2 启动alertmanager

# 创建目录
mkdir /root/alertmanager

# 创建配置文件
vim /root/alertmanager/config.yml
global:
  #旧的告警3分钟没有更新,则认为告警解决        
  resolve_timeout: 3m

route:
  group_by: ['alertname'] #根据告警规则组名进行分组
  group_wait: 0s       #在组内等待所配置的时间,如果同组内,30秒内出现相同报警,在一个组内出现
  group_interval: 10s   #控制告警组的发送频率,一条告警消息发送后,等待10秒,发送第二组告警
  repeat_interval: 1h   #发送报警间隔,如果指定时间内没有修复,则重新发送报警
  receiver: 'web.hook'

receivers:
- name: 'web.hook'
  webhook_configs:
  - url: 'http://192.168.1.201:8060/dingtalk/webhook1/send'
    send_resolved: true     # 表示服务恢复后会收到恢复告警

inhibit_rules: #配置了仰制告警
  - source_match:
      severity: 'error'
    target_match:
      severity: 'warn'
    equal: ['alertname', 'target', 'job', 'instance']
  - source_match:
      severity: 'warn'
    target_match:
      severity: 'info'
    equal: ['alertname', 'target', 'job', 'instance']

# 运行容器
docker run -d   --name alertmanager \
--restart always \
--network host \
--volume /etc/localtime:/etc/localtime:ro \
--volume /root/alertmanager/:/etc/alertmanager/ \
prom/alertmanager:v0.25.0 \
--config.file=/etc/alertmanager/config.yml \
--storage.path=/alertmanager

1.3 启动black-exporter

docker run -dit  -p 9115:9115 \
--restart always \
--name blackbox-exporter \
bitnami/blackbox-exporter:latest

1.4 启动prometheus-webhook-dingtalk

# 创建目录
mkdir /root/prometheus-webhook-dingtalk

# 编辑配置文件
vim /root/prometheus-webhook-dingtalk/config.yml
templates:
  - /etc/prometheus-webhook-dingtalk/templates/template.tmpl 

targets: #配置多个接收方
  webhook1:
    url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxx  #钉钉机器人token
    secret: xxxx  #钉钉机器人加签
    message:
      text: '{{ template "email.to.message" . }}'

# 创建告警模板
vim /root/prometheus-webhook-dingtalk/templates/template.tmpl
{{ define "email.to.message" }}

{{- if gt (len .Alerts.Firing) 0 -}}
{{- range $index, $alert := .Alerts -}}

<font color=#FF0000>=========  **监控告警** ========= </font>  

**告警程序:**     Alertmanager   
**告警类型:**    {{ $alert.Labels.alertname }}   
**告警级别:**    {{ $alert.Labels.severity }} 级   
**告警状态:**    {{ .Status }}   
**故障主机:**    {{ $alert.Labels.instance }} {{ $alert.Labels.device }}   
**告警主题:**    {{ .Annotations.summary }}   
**告警详情:**    {{ $alert.Annotations.message }}{{ $alert.Annotations.description}}   
**主机标签:**    {{ range .Labels.SortedPairs  }}  </br> [{{ .Name }}: {{ .Value | markdown | html }} ] 
{{- end }} </br>

**故障时间:**    {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}  
<font color=#FF0000>========= = **end** =  ========= </font>
{{- end }}
{{- end }}

{{- if gt (len .Alerts.Resolved) 0 -}}
{{- range $index, $alert := .Alerts -}}

<font color=#00800>========= **告警恢复** ========= </font>  

**告警程序:**     Alertmanager   
**告警主题:**    {{ $alert.Annotations.summary }}  
**告警主机:**    {{ .Labels.instance }}   
**告警类型:**    {{ .Labels.alertname }}  
**告警级别:**    {{ $alert.Labels.severity }} 级   
**告警状态:**    {{   .Status }}  
**告警详情:**    {{ $alert.Annotations.message }}{{ $alert.Annotations.description}}  
**故障时间:**    {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}  
**恢复时间:**    {{ ($alert.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}  

<font color=#00800>========= = **end** =  ========= </font>
{{- end }}
{{- end }}
{{- end }}
# 授权告警模板
chmod 777 /root/prometheus-webhook-dingtalk/templates/template.tmpl

# 运行容器
docker run -d -p 8060:8060 --name=dingding \
--restart always \
--network host \
-v /root/prometheus-webhook-dingtalk:/etc/prometheus-webhook-dingtalk \
timonwong/prometheus-webhook-dingtalk

1.5 启动prometheus

# 创建prometheus数据目录
mkdir -p /root/prometheus/data
chmod 777 /root/prometheus/data

# 创建配置文件
vim /root/prometheus/prometheus.yml
#全局配置
global:
  scrape_interval:     15s # 将搜刮间隔设置为每15秒一次。默认是每1分钟一次。
  evaluation_interval: 15s # 每15秒评估一次规则。默认是每1分钟一次。

#Alertmanager 配置
alerting:
  alertmanagers:
  - static_configs:
    - targets: ['192.168.1.201:9093']

#报警(触发器)配置
rule_files:
  - "rules/*.yml"

#搜刮配置
scrape_configs:
  - job_name: 'prometheus'
    #覆盖全局默认值,每15秒从该作业中刮取一次目标
    scrape_interval: 15s
    static_configs:
    - targets: ['192.168.1.201:9090']
  - job_name: 'alertmanager'
    scrape_interval: 15s
    static_configs:
    - targets: ['192.168.1.201:9093']
  - job_name: 'Prometheus-server'
    scrape_interval: 15s
    static_configs:
    - targets: ['192.168.1.201:9100']
      labels:
        instance: Prometheus-server

#http配置
  - job_name: "blackbox_http"
    metrics_path: /probe
    params:
      module: [http_2xx]
    static_configs:
      - targets:
        - http://www.baidu.com
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: 192.168.1.201:9115

#tcp检查配置
  - job_name: "blackbox_tcp"
    metrics_path: /probe
    params:
      module: [tcp_connect]
    static_configs:
      - targets: 
        - 192.168.1.201:3306
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: 192.168.1.201:9115     

# 启动容器
docker run -d --name prometheus \
--restart always \
--network host \
--volume /etc/localtime:/etc/localtime:ro \
--volume /root/prometheus/:/etc/prometheus/ \
--volume /root/prometheus/data:/prometheus \
prom/prometheus:v2.37.6 \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/prometheus \
--web.console.libraries=/usr/share/prometheus/console_libraries \
--web.console.templates=/usr/share/prometheus/consoles \
--web.enable-lifecycle \
--web.enable-admin-api \
--storage.tsdb.retention.time=30d

1.6 页面访问

http://192.168.1.201:9090  # 改为自己主机的ip

二、Docker部署Grafana

2.1 启动grafana

# 创建数据目录
mkdir -p /root/grafana/data
chmod 777 /root/grafana/data

# 创建包含grafana配置信息的文件
cat <<-EOF > /root/grafana/config.monitoring
ECURITY_ADMIN_PASSWORD=admin123
GF_USERS_ALLOW_SIGN_UP=false
EOF

# 运行grafana容器
docker run -d --name grafana --restart always \
--network host \
--volume /etc/localtime:/etc/localtime:ro \
--volume /root/grafana/data:/var/lib/grafana \
--volume /root/grafana/provisioning/:/etc/grafana/provisioning/ \
--env-file /root/grafana/config.monitoring \
grafana/grafana:10.4.6-ubuntu

2.2 页面访问

http://192.168.1.201:3000  # 改为自己主机的ip

总结

以上就是基于Docker部署Prometheus+Grafana的实践。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值