c语言开发exporter,prometheus数据采集exporter全家桶

1. exporter介绍

exporter是prometheus监控中重要的组成部分,负责数据指标的采集。上篇文章介绍了prometheus server的相关内容,本文将介绍数据采集插件。官方给出的插件有node_exporter、blackbox_exporter、mysqld_exporter、snmp_exporter等,第三方的插件有redis_exporter,cadvisor等。

下面我将结合实际工作中的使用到的插件来分别介绍。

2. node_exporter

node_exporter主要用来采集机器的性能指标数据,包括cpu,内存,磁盘,io等基本信息。上边文章介绍promehteus server时已详细介绍了node_exporter,这里就不在赘述。

3. mysqld_exporter

mysqld_exporter主要用于监控采集mysql数据库服务器相关指标。

#下载二进制文件

root@db-100-51:~# cd /data/

root@db-100-51:/data# wget https://github.com/prometheus/mysqld_exporter/releases/download/v0.10.0/mysqld_exporter-0.10.0.linux-amd64.tar.gz

root@db-100-51:/data# tar -xf mysqld_exporter-0.10.0.linux-amd64.tar.gz

root@db-100-51:/data# cd mysqld_exporter-0.10.0/

root@db-100-51:/data/mysqld_exporter-0.10.0# ls

LICENSE mysqld_exporter NOTICE

root@db-100-51:/data# mkdir log

#mysqld_exporter需要连接到数据库,创建一个登录数据库的用户

mysql> GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'exporter'@'localhost' identified by cds8080808!';

mysql> flush privileges;

#创建用于连接数据库的配置文件

root@db-100-51:/data/mysqld_exporter-0.10.0# vim .my.cnf

[client]

user=exporter

password=cds8080808!

#创建supervisor启动mysqld_exporter

root@db-100-51:/data/mysqld_exporter-0.10.0# vim /etc/supervisor/conf.d/mysqld_exporter.conf

[program:mysqld_exporter]

command = /data/mysqld_exporter-0.10.0/mysqld_exporter -config.my-cnf="/data/mysqld_exporter-0.10.0/.my.cnf"

autostart = true

autorestart = true

startsecs = 5

startretries = 3

redirect_stderr = true

stdout_logfile=/data/mysqld_exporter-0.10.0/log/out-mysqld_exporter.log

stderr_logfile=/data/mysqld_exporter-0.10.0/log/err-mysqld_exporter.log

stdout_logfile_maxbytes = 20MB

stdout_logfile_backups = 20

#启动mysqld_exporter

root@db-100-51:/data/mysqld_exporter-0.10.0# supervisorctl update mysqld_exporter

root@db-100-51:/data/mysqld_exporter-0.10.0# supervisorctl status mysqld_exporter

4. redis_exporter

redis_exporter主要用于监控采集redis数据库服务器相关指标。

#下载二进制文件

root@redis-node1-slave:~# cd /data/ wget https://github.com/oliver006/redis_exporter/releases/download/v0.24.0/redis_exporter-v0.24.0.linux-amd64.tar.gz

root@redis-node1-slave:/data# tar -xf redis_exporter-v0.24.0.linux-amd64.tar.gz

root@redis-node1-slave:/data# mkdir /data/redis_exporter-v0.24.0

root@redis-node1-slave:/data# mv redis_exporter redis_exporter-v0.24.0

root@redis-node1-slave:/data# cd redis_exporter-v0.24.0/

root@redis-node1-slave:/data/redis_exporter-v0.24.0# mkdir log

#配置supervisor启动redis_exporter

root@redis-node1-slave:/data/redis_exporter-v0.24.0# vim /etc/supervisor/conf.d/redis_exporter.conf

[program:redis_exporter]

command = /data/redis_exporter-v0.24.0/redis_exporter -redis.addr 10.13.0.100:6379 -redis.password cds-china # 该出ip为redis服务器的ip

autostart = true

startsecs = 5

startretries = 3

redirect_stderr = true

stout_logfile = /data/redis_exporter-v0.24.0/log/out-redis_exporter.log

stderr_logfile = /data/redis_exporter-v0.24.0/log/err-redis_exporter.log

stdout_logfile_maxbytes = 20MB

stdout_logfile_backups = 20

#启动redis_exporter

root@redis-node1-slave:/data/redis_exporter-v0.24.0# supervisorctl update redis_exporter

root@redis-node1-slave:/data/redis_exporter-v0.24.0# supervisorctl status redis_exporter

5. black_exporter

black_exporter是prometheus社区提供的官方黑盒监控解决方案,其允许用户通过:http、https、dns、tcp以及icmp的方式对网络进行探测。我们利用icmp探针可以坚持网络是否通畅,利用http,https可以坚持网页是否可以正常访问,利用tcp检测服务端口判断服务是否正常。

#下载二进制文件

root@prometheus:~# cd /data/

root@prometheus:/data# wget https://github.com/prometheus/blackbox_exporter/releases/download/v0.13.0/blackbox_exporter-0.13.0.linux-amd64.tar.gz

root@prometheus:/data# tar -xf blackbox_exporter-0.13.0.linux-amd64.tar.gz

#修改blackbox_exporter配置文件

root@prometheus:/data# cd blackbox_exporter-0.13.0/

root@prometheus:/data/blackbox_exporter-0.13.0# vim blackbox.yml

modules:

http_2xx:

prober: http

timeout: 5s

http:

preferred_ip_protocol: "ip4"

no_follow_redirects: true

valid_http_versions: ["HTTP/1.1", "HTTP/2"]

valid_status_codes: [200,302] # Defaults to 2xx

method: GET

http_post_2xx:

prober: http

http:

method: POST

tcp_connect:

prober: tcp

timeout: 5s

icmp:

prober: icmp

timeout: 5s

#配置systemd启动blackbox_exporter

root@prometheus:/data/blackbox_exporter-0.13.0# vim /usr/lib/systemd/system/blackbox_exporter.service

[Unit]

Description=Prometheus blackbox exporter

After=local-fs.target network-online.target network.target

Wants=local-fs.target network-online.target network.target

[Service]

User=root

Type=simple

WorkingDirectory=/data/blackbox_exporter-0.13.0/

ExecStart=/data/blackbox_exporter-0.13.0/blackbox_exporter --config.file=/data/blackbox_exporter-0.13.0/blackbox.yml

Restart=on-failure

[Install]

WantedBy=multi-user.target

root@prometheus:/data/blackbox_exporter-0.13.0# supervisorctl start blackbox_exporter

root@prometheus:/data/blackbox_exporter-0.13.0# supervisorctl enable blackbox_exporter

root@prometheus:/data/blackbox_exporter-0.13.0# supervisorctl status blackbox_exporter

6. cadvisor

cadvisor是google开源的用于监控容器运行的工具。

#下载二进制文件

root@GZH-vSPC02:~# cd /data/

root@GZH-vSPC02:/data# wget https://github.com/google/cadvisor/releases/download/v0.33.0/cadvisor

root@GZH-vSPC02:/data# mkdir cadvisor-v0.33.0

root@GZH-vSPC02:/data# mv cadvisor cadvisor-v0.33.0/

root@GZH-vSPC02:/data# cd cadvisor-v0.33.0/

root@GZH-vSPC02:/data/cadvisor-v0.33.0# mkdir log

#配置supervisor启动cadvisor

root@GZH-vSPC02:/data/cadvisor-v0.33.0# vim /etc/supervisor/conf.d/cadvisor-server.conf

[program:cadvisor-server]

command = /data/cadvisor-v0.33.0/cadvisor

autostart = true

autorestart = true

startsecs = 5

startretries = 3

redirect_stderr = true

stdout_logfile=/data/cadvisor-v0.33.0/log/out-cadvisor.log

stderr_logfile=/data/cadvisor-v0.33.0/log/err-cadvisor.log

stdout_logfile_maxbytes = 20MB

stdout_logfile_backups = 20

root@GZH-vSPC02:/data/cadvisor-v0.33.0# supervisorctl update cadvisor

root@GZH-vSPC02:/data/cadvisor-v0.33.0# supervisorctl status cadvisor

7. 配置prometheus服务端

上面布置了exporter,我们需要配置prometheus server来拿到exporter采集到数据。

#修改pormetheus配置文件,配置文件上篇文章已经介绍了,这里就不在解释

root@prometheus:~# cd /data/prometheus-2.4.3/

root@prometheus:/data/prometheus-2.4.3# vim prometheus.yml

#my global config

global:

scrape_interval: 30s # Set the scrape interval to every 15 seconds. Default is every 1 minute.

evaluation_interval: 25s # Evaluate rules every 15 seconds. The default is every 1 minute.

scrape_timeout: 25s #is set to the global default (10s).

#Alertmanager configuration

alerting:

alertmanagers:

- static_configs:

- targets:

- 10.13.0.80:9093 #此处ip为alertmanager的ip,有多个alertmanager时填写多个

- 10.13.0.81:9093

#Load rules once and periodically evaluate them according to the global 'evaluation_interval'.

rule_files:

- "/data/prometheus-2.4.3/rules/node_down.yml"

- "/data/prometheus-2.4.3/rules/memory_over.yml"

- "/data/prometheus-2.4.3/rules/disk_over.yml"

- "/data/prometheus-2.4.3/rules/cpu_over.yml"

- "/data/prometheus-2.4.3/rules/http_check.yml"

- "/data/prometheus-2.4.3/rules/tcp_check.yml"

- "/data/prometheus-2.4.3/rules/mysql_check.yml"

- "/data/prometheus-2.4.3/rules/redis_down_check.yml"

- "/data/prometheus-2.4.3/rules/redis_rule_check.yml"

- "/data/prometheus-2.4.3/rules/container_down.yml"

- "/data/prometheus-2.4.3/rules/ping_check.yml"

#A scrape configuration containing exactly one endpoint to scrape:

#Here it's Prometheus itself.

scrape_configs:

#The job name is added as a label `job=` to any timeseries scraped from this config.

- job_name: 'prometheus'

#metrics_path defaults to '/metrics'

#scheme defaults to 'http'.

static_configs:

- targets: ['localhost:9090']

- job_name: 'GICHOST'

file_sd_configs:

- files: ['./node_exporter/host.json']

- job_name: 'blackbox-http'

scrape_interval: 5s

metrics_path: /probe

params:

module: [http_2xx] # Look for a HTTP 200 response.

file_sd_configs:

- files: ['./blackbox_exporter/http.json']

relabel_configs:

- source_labels: [__address__]

target_label: __param_target

- source_labels: [__param_target]

target_label: instance

- target_label: __address__

replacement: 10.13.0.82:9115 #此处ip为blackbox服务的ip

- job_name: 'blackbox-tcp'

scrape_interval: 5s

metrics_path: /probe

params:

module: [tcp_connect]

file_sd_configs:

- files: ['./blackbox_exporter/tcp.json']

relabel_configs:

- source_labels: [__address__]

target_label: __param_target

- source_labels: [__param_target]

target_label: instance

- target_label: __address__

replacement: 10.13.0.82:9115

- job_name: 'mysqld-exporter'

file_sd_configs:

- files: ['./mysqld_exporter/mysqld.json']

- job_name: 'blackbox-ping'

scrape_interval: 5s

metrics_path: /probe

params:

module: [icmp]

file_sd_configs:

- files: ['./blackbox_exporter/ping.json']

relabel_configs:

- source_labels: [__address__]

target_label: __param_target

- source_labels: [__param_target]

target_label: instance

- target_label: __address__

replacement: 10.13.0.82:9115

- job_name: 'redis-exporter'

file_sd_configs:

- files: ['./redis_exporter/redis.json']

- job_name: 'container-exporter'

file_sd_configs:

- files: ['./container_exporter/container.json']

8. 配置prometheus 主机监控文件和告警规则

8.1 主机监控文件

#mysqld_exporter主机监控文件

root@prometheus:/data/prometheus-2.4.3# mkdir mysqld_exporter

root@prometheus:/data/prometheus-2.4.3# cd mysqld_exporter/

root@prometheus:/data/prometheus-2.4.3/mysqld_exporter# vim mysqld.json

[

{

"targets":[

"10.13.0.10:9104"

],

"labels":{

"dbinstance":"db100.51"

}

}

]

#redis_exporter主机监控文件

root@prometheus:/data/prometheus-2.4.3# mkdir redis_exporter

root@prometheus:/data/prometheus-2.4.3# cd redis_exporter/

root@prometheus:/data/prometheus-2.4.3/redis_exporter# vim redis.json

[

{

"targets":[

"10.13.0.11:9121",

"10.13.0.12:9121"

],

"labels":{

"service":"redis"

}

}

]

#blackbox_exporter主机监控文件

root@prometheus:/data/prometheus-2.4.3# mkdir blackbox_exporter

root@prometheus:/data/prometheus-2.4.3# cd blackbox_exporter

root@prometheus:/data/prometheus-2.4.3/blackbox_exporter# vim http.json

[

{

"targets":[

"http://10.13.0.13:6011/health",

"http://10.13.0.14:6011/health"

],

"labels":{

"service":"sre"

}

}

]

root@prometheus:/data/prometheus-2.4.3/blackbox_exporter# vim tcp.json

[

{

"targets":[

"10.13.0.14:13371",

"10.13.0.15:13371"

],

"labels":{

"service":"vspc"

}

}

]

root@prometheus:/data/prometheus-2.4.3/blackbox_exporter# vim ping.json

[

{

"targets":[

"10.13.0.16",

"10.13.0.17"

],

"labels":{

"service":"mysql"

}

}

]

#配置cadvisor主机监控文件

root@prometheus:/data/prometheus-2.4.3# mkdir container_exporter

root@prometheus:/data/prometheus-2.4.3# cd container_exporter/

root@prometheus:/data/prometheus-2.4.3/container_exporter# vim container.json

[

{

"targets":[

"10.13.0.18:8080",

"10.13.0.19:8080"

],

"labels":{

"service":"docker-monitor"

}

}

]

8.2 告警规则

node_exporter监控获取的cpu,磁盘,实例存活规则上篇文章已经介绍,这里不再介绍

root@prometheus:/data/prometheus-2.4.3# cd rules

root@prometheus:/data/prometheus-2.4.3/rules# ls

container_down.yml disk_over.yml mysql_check.yml node_down.yml redis_down_check.yml tcp_check.yml

cpu_over.yml http_check.yml memory_over.yml ping_check.yml redis_rule_check.yml

#mysqld_exporter监控mysql数据告警规则

root@prometheus:/data/prometheus-2.4.3/rules# vim mysql_check.yml

groups:

- name: MySQLStatsAlert

rules:

- alert: MySQL is down

expr: mysql_up == 0

for: 1m

labels:

severity: critical

annotations:

summary: "Instance {{$labels.instance}} MySQL is down"

description: "MySQL database is down. This requires immediate action!(current value is: {{$value}})"

- alert: Mysql_High_QPS

expr: rate(mysql_global_status_questions[5m]) > 8000

for: 1m

labels:

severity: warning

annotations:

summary: "{{$labels.instance}}: Mysql_High_QPS detected"

description: "{{$labels.instance}}: Mysql opreation is more than 5000 per second ,(current value is: {{$value}})"

- alert: Mysql_Too_Many_Slow_Query

expr: rate(mysql_global_status_slow_queries[30m]) > 3

for: 1m

labels:

severity: warning

annotations:

summary: "{{$labels.instance}}: Mysql_Too_Many_Slow_Query detected"

description: "{{$labels.instance}}: Mysql current Slow_Query Sql is more than 3 ,(current value is: {{$value}})"

- alert: Mysql_Deadlock

expr: mysql_global_status_innodb_deadlocks > 300

for: 1m

labels:

severity: warning

annotations:

summary: "{{$labels.instance}}: Mysql_Deadlock detected"

description: "{{$labels.instance}}: Mysql Deadlock was found ,(current value is: {{$value}})"

- alert: open files high

expr: mysql_global_status_innodb_num_open_files > (mysql_global_variables_open_files_limit) * 0.75

for: 1m

labels:

severity: warning

annotations:

summary: "Instance {{$labels.instance}} open files high"

description: "Open files is high. Please consider increasing open_files_limit.(current value is: {{$value}})"

- alert: Used more than 80% of max connections limited

expr: mysql_global_status_max_used_connections > mysql_global_variables_max_connections * 0.8

for: 1m

labels:

severity: warning

annotations:

summary: "Instance {{$labels.instance}} Used more than 80% of max connections limited"

description: "Used more than 80% of max connections limited.(current value is: {{$value}})"

- alert: InnoDB Log File size is too small

expr: mysql_global_variables_innodb_log_file_size < 16777216

for: 1m

labels:

severity: warning

annotations:

summary: "Instance {{$labels.instance}} InnoDB Log File size is too small"

description: "The InnoDB Log File size is possibly too small. Choosing a small InnoDB Log File size can have significant performance impacts.(current value is: {{$value}})"

- alert: Binary Log is disabled

expr: mysql_global_variables_log_bin != 1

for: 1m

labels:

severity: warning

annotations:

summary: "Instance {{$labels.instance}} Binary Log is disabled"

description: "Binary Log is disabled. This prohibits you to do Point in Time Recovery (PiTR).(current value is: {{$value}})"

- alert: IO thread stopped

expr: mysql_slave_status_slave_io_running != 1

for: 1m

labels:

severity: critical

annotations:

summary: "Instance {{$labels.instance}} IO thread stopped"

description: "IO thread has stopped. This is usually because it cannot connect to the Master any more.(current value is: {{$value}})"

- alert: SQL thread stopped

expr: mysql_slave_status_slave_sql_running != 1

for: 1m

labels:

severity: critical

annotations:

summary: "Instance {{$labels.instance}} Sync Binlog is enabled"

description: "SQL thread has stopped. This is usually because it cannot apply a SQL statement received from the master.(current value is: {{$value}})"

- alert: Slave lagging behind Master

expr: rate(mysql_slave_status_seconds_behind_master[1m]) >30

for: 1m

labels:

severity: warning

annotations:

summary: "Instance {{$labels.instance}} Slave lagging behind Master"

description: "Slave is lagging behind Master. Please check if Slave threads are running and if there are some performance issues!(current value is: {{$value}})"

#redis_exporter监控redis指标告警规则

root@prometheus:/data/prometheus-2.4.3/rules# vim redis_down_check.yml

groups:

- name: redis检测规则

rules:

- alert: redis存活检测

expr: redis_up{job="redis-exporter"} == 0

for: 1m

annotations:

description: "机器:{{ $labels.instance }} 所属 job:{{ $labels.job }} redis宕机,请检查!"

summary: "redis服务"

root@prometheus:/data/prometheus-2.4.3/rules# vim redis_rule_check.yml

groups:

- name: RedisStatsAlert

rules:

- alert: last create rdb failed

expr: redis_rdb_last_bgsave_status != 1

for: 1m

labels:

severity: warning

annotations:

summary: " Instance {{ $labels.instance }} rdb_last_bgsave_status "

description: "last create rdb failed"

- alert: Redis linked too many clients

expr: redis_connected_clients / redis_config_maxclients * 100 > 80

for: 1m

labels:

severity: warning

annotations:

summary: "Instance {{ $labels.instance }} Redis linked clients too many"

description: "Redis linked clients too many. This requires immediate action!"

- alert: master link status failed

expr: redis_master_link_up == 0

for: 1m

labels:

severity: warning

annotations:

summary: "Instance {{ $labels.instance }} link failed"

description: "redis_master_link=0 link failed"

- alert: last AOF failed

expr: redis_aof_last_bgrewrite_status != 1

for: 1m

labels:

severity: warning

annotations:

summary: "Instance {{ $labels.instance }} redis aof last rewrite duration sec"

description: "last AOF failed"

- alert: Redis Cluster State Wrong

expr: redis_cluster_state != 1

for: 1m

labels:

severity: warning

annotations:

summary: "Instance {{ $labels.instance }} redis cluster status wrong"

description: "Redis Cluster State Wrong"

#blackbox_exporter监控指标告警规则

root@prometheus:/data/prometheus-2.4.3/rules# vim ping_check.yml

groups:

- name: 机器网络存活检测

rules:

- alert: 网络检测

expr: probe_success{job="blackbox-ping"} == 0

for: 1m

annotations:

description: "机器:{{ $labels.instance }} 所属 job:{{ $labels.job }} 网络不通或者宕机超过1分钟,请检查!"

summary: "网络检测"

root@prometheus:/data/prometheus-2.4.3/rules# vim http_check.yml

groups:

- name: 服务检测规则

rules:

- alert: http服务检测

expr: probe_success{job="blackbox-http"} == 0

for: 1m

annotations:

description: "机器:{{ $labels.instance }} 所属 job:{{ $labels.job }} http状态码: {{ printf `probe_http_status_code{instance='%s'}` $labels.instance | query | first | value }} http检测失败,请检查!"

summary: "http检测"

root@prometheus:/data/prometheus-2.4.3/rules# vim tcp_check.yml

groups:

- name: 服务检测规则

rules:

- alert: tcp服务检测

expr: probe_success{job="blackbox-tcp"} == 0

for: 1m

annotations:

description: "机器:{{ $labels.instance }} 所属 job:{{ $labels.job }} tcp检测失败,请检查!"

summary: "tcp检测"

#cadvisor监控指标告警规则

root@prometheus:/data/prometheus-2.4.3/rules# vim container_down.yml

groups:

- name: 容器存活报警规则

rules:

- alert: DockerInstanceDown

expr: absent(container_last_seen{name="catcti"}) == 1

for: 1m

annotations:

description: "cacti容器:{{ $labels.name }} (所属主机{{ $labels.instance }}) 已经异常退出超过1分钟,请检查!"

summary: "容器:Instance {{ $labels.name }} 存活检测"

参考文档:

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值