Prometheus(7)Pormetheus+ Alertmanager配置飞书警告

在之前的博客中,说明了报警的一般步骤前置条件

1 编写Prometheus配置

配置信息:

# my global config
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
       - 192.168.156.135:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"
  - "/opt/prometheus/prometheus-2.6.1.linux-amd64/rules/*.rules"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
    - targets: ['localhost:9090']
  - job_name: 'agent1'
    static_configs: 
    - targets: ['192.168.156.135:9100']
  - job_name: pushgateway
    honor_labels: true
    static_configs:
      - targets: ['192.168.156.135:9091']
        labels:
          instance: pushgateway

实际操作:

[root@localhost prometheus-2.6.1.linux-amd64]# vim prometheus.yml 
# my global config
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
       - 192.168.156.135:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"
  - "/opt/prometheus/prometheus-2.6.1.linux-amd64/rules/*.rules"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
    - targets: ['localhost:9090']
  - job_name: 'agent1'
    static_configs: 
    - targets: ['192.168.156.135:9100']
  - job_name: pushgateway
    honor_labels: true
    static_configs:
      - targets: ['192.168.156.135:9091']
        labels:
          instance: pushgateway
[root@localhost prometheus-2.6.1.linux-amd64]# 

2 编写报警规则

配置信息:

groups:
- name: node-up
  rules:
  - alert: node-up
    expr: up{job="agent1"} == 0
    for: 15s
    labels:
      severity: 1
      team: node
    annotations:
      summary: "{{ $labels.instance }} 已停止运行超过 15s!"
      description: "{{ $labels.instance }} 检测到异常停止!请重点关注!!!"

时间操作:

[root@localhost rules]# vimnode-up.rules 
groups:
- name: node-up
  rules:
  - alert: node-up
    expr: up{job="agent1"} == 0
    for: 15s
    labels:
      severity: 1
      team: node
    annotations:
      summary: "{{ $labels.instance }} 已停止运行超过 15s!"
      description: "{{ $labels.instance }} 检测到异常停止!请重点关注!!!"
[root@localhost rules]# 
[root@localhost rules]# pwd
/opt/prometheus/prometheus-2.6.1.linux-amd64/rules
[root@localhost rules]# ls
node-up.rules
[root@localhost rules]# 

3 编写alertmanager配置

配置信息:

global:
  resolve_timeout: 5m
route:
  group_by: ['alertname']
  group_wait: 5s
  group_interval: 5s
  repeat_interval: 5m
  receiver: 'prometheusalert-feishu'
#  receiver: 'web.hook.prometheusalert'
#  routes:
#  - receiver: 'prometheusalert-feishu'
#    group_wait: 10s
#    match:
#      level: '2'
receivers:
#- name: 'web.hook.prometheusalert'
#  webhook_configs:
#  - url: "http://[prometheusalert_url]:8080/prometheusalert/alert"
- name: 'prometheusalert-feishu'
  webhook_configs:
  - url: "http://192.168.156.135:8080/prometheusalert?type=fs&tpl=prometheus-fsv2&fsurlhttps://open.feishu.cn/open-apis/bot/v2/hook/xxx-3fba-4903-87d3-xxxx"

实际操作:

[root@localhost alertmanager]# vim  alertmanager2.yml 
global:
  resolve_timeout: 5m
route:
  group_by: ['alertname']
  group_wait: 5s
  group_interval: 5s
  repeat_interval: 5m
  receiver: 'prometheusalert-feishu'
#  receiver: 'web.hook.prometheusalert'
#  routes:
#  - receiver: 'prometheusalert-feishu'
#    group_wait: 10s
#    match:
#      level: '2'
receivers:
#- name: 'web.hook.prometheusalert'
#  webhook_configs:
#  - url: "http://[prometheusalert_url]:8080/prometheusalert/alert"
- name: 'prometheusalert-feishu'
  webhook_configs:
  - url: "http://192.168.156.135:8080/prometheusalert?type=fs&tpl=prometheus-fsv2&fsurlhttps://open.feishu.cn/open-apis/bot/v2/hook/xxx-3fba-4903-87d3-xxx"
"alertmanager2.yml" 22L, 660C 已写入                                                                             
[root@localhost alertmanager]# ./amtool check-config  alertmanager2.yml
Checking 'alertmanager2.yml'  SUCCESS
Found:
 - global config
 - route
 - 0 inhibit rules
 - 1 receivers
 - 0 templates

[root@localhost alertmanager]# ls
alertmanager  alertmanager1.yml  alertmanager2.yml  alertmanager.yml  amtool  data  LICENSE  NOTICE  template
[root@localhost alertmanager]# pwd
/opt/prometheus/alertmanager
[root@localhost alertmanager]# 

4 重启服务

4.1 启动PrometheusAlert

在PrometheusAlert的安装目录下面启动

./PrometheusAlert

4.1 重启Prometheus

在安装目录里面如下面操作

[root@localhost prometheus-2.6.1.linux-amd64]# pwd
/opt/prometheus/prometheus-2.6.1.linux-amd64
[root@localhost prometheus-2.6.1.linux-amd64]# ls
console_libraries  consoles  data  LICENSE  NOTICE  prometheus  prometheus.yml  promtool  rules

启动服务

[root@localhost prometheus-2.6.1.linux-amd64]# 
[root@localhost prometheus-2.6.1.linux-amd64]# pkill prometheus
[root@localhost prometheus-2.6.1.linux-amd64]# lsof -i:9090
[root@localhost prometheus-2.6.1.linux-amd64]# ./prometheus --config.file=prometheus.yml & 

4.2 重启alertmanager服务

[root@localhost alertmanager]# pwd
/opt/prometheus/alertmanager
[root@localhost alertmanager]# ls
alertmanager  alertmanager1.yml  alertmanager2.yml  alertmanager.yml  amtool  data  LICENSE  NOTICE  template
[root@localhost alertmanager]# 

启动服务

[root@localhost alertmanager]# ./alertmanager --config.file=alertmanager2.yml

5 关闭node节点制造错误

[root@localhost node_export]# pwd
/opt/node_export
[root@localhost node_export]# ls
LICENSE  node_exporter  nohup.out  NOTICE
[root@localhost node_export]# 

5.1 关闭node

[root@localhost node_export]# lsof -i:9100
COMMAND     PID USER   FD   TYPE  DEVICE SIZE/OFF NODE NAME
prometheu 95405 root   19u  IPv4 1035389      0t0  TCP localhost.localdomain:59670->localhost.localdomain:jetdirect (ESTABLISHED)
node_expo 96011 root    3u  IPv6 1034103      0t0  TCP *:jetdirect (LISTEN)
node_expo 96011 root    5u  IPv6 1035390      0t0  TCP localhost.localdomain:jetdirect->localhost.localdomain:59670 (ESTABLISHED)
[root@localhost node_export]# kill 96011
[root@localhost node_export]# 

查看消息
在这里插入图片描述

5.2 重启node

[root@localhost node_export]# nohup ./node_exporter &
[7] 96267
[6]   已终止               nohup ./node_exporter
[root@localhost node_export]# nohup: 忽略输入并把输出追加到"nohup.out"

[root@localhost node_export]#

查看消息
在这里插入图片描述

评论 33
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

?abc!

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值