Prometheus
架构图
总体架构简述图
本次案列简述图
Prometheus 安装
在 192.16.17.177 规划一下的目录
[root@localhost fs]# tree -d /opt/fs/
/opt/fs/
├── blackbox_export
├── jvm_export
├── mysql_export
├── node_export
├── prometheus
├── rabbitmq_export
└── redis_export
下载安装包
官网: https://prometheus.io/docs/prometheus/latest/getting_started/
安装的节点:192.168.17.177 规划的目录:/fs/prometheus
cd /fs
mkdir prometheus
cd prometheus
wget https://github.com/prometheus/prometheus/releases/download/v2.29.1/prometheus-2.29.1.linux-amd64.tar.gz
或者直接官网浏览器页面下载
# 解压文件
[root@localhost prometheus]# tar xvf prometheus-2.45.2.linux-amd64.tar.gz
# 重命名文件夹
[root@localhost prometheus]# mv prometheus-2.45.2.linux-amd64 prometheus
# 进入重命名后的文件夹
[root@localhost prometheus]# cd prometheus
#创建 data 目录
[root@localhost prometheus]# mkdir data
# 追后的完整的目录和文件
[root@localhost prometheus]# pwd
/opt/fs/prometheus/prometheus
[root@localhost prometheus]# ll
total 229500
drwxr-xr-x 2 1001 docker 38 Dec 19 22:31 console_libraries
drwxr-xr-x 2 1001 docker 173 Dec 19 22:31 consoles
drwxr-xr-x 4 root root 70 Dec 27 15:51 data
-rw-r--r-- 1 1001 docker 11357 Dec 19 22:31 LICENSE
-rw-r--r-- 1 1001 docker 3773 Dec 19 22:31 NOTICE
-rwxr-xr-x 1 1001 docker 120901527 Dec 19 22:09 prometheus
-rw-r--r-- 1 1001 docker 934 Dec 19 22:31 prometheus.yml
-rwxr-xr-x 1 1001 docker 114085841 Dec 19 22:10 promtool
[root@localhost prometheus]#
注册为服务
cat <<-"EOF" > /etc/systemd/system/prometheus.service
[Unit]
Description="prometheus"
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=simple
ExecStart=/opt/fs/prometheus/prometheus/prometheus --config.file=/opt/fs/prometheus/prometheus/prometheus.yml --storage.tsdb.path=/opt/fs/prometheus/prometheus/data --web.enable-lifecycle
Restart=on-failure
RestartSecs=5s
SuccessExitStatus=0
LimitNOFILE=655360
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=prometheus
[Install]
WantedBy=multi-user.target
EOF
启动prometheus 服务
systemctl daemon-reload
systemctl restart prometheus
systemctl status prometheus
检查prometheus 服务
# 查看端口 进程 日志
ss -ntlp |grep 9090
ps -ef |grep prometheus |grep -v grep
tail -100 /var/log/messages |grep prometheus
测试访问
开放端口
[root@localhost prometheus]# firewall-cmd --permanent --add-port=9090/tcp
[root@localhost prometheus]# firewall-cmd --reload
# 检查端口
[root@localhost prometheus]# firewall-cmd --list-all
public (active)
target: default
icmp-block-inversion: no
interfaces: ens192
sources:
services: ssh dhcpv6-client nfs mountd rpc-bind
ports: 9090/tcp # 已经开放
protocols:
masquerade: no
forward-ports:
source-ports:
icmp-blocks:
rich rules:
http://192.168.17.177:9090
配置 Prometheus数据源到Grafana
创建Dashboard
- 创建一个 folder(baseMonitor),然后创建一个dashboard,将dashboard添加到folder(baseMonitor)中,grafana的权限控制其实是基于 folder的
- 然后再 dashboard中创建多个 row, 每个row中是可以放多个 panel的,具体规划如下
下载所有所需的export
官网下载地址:https://prometheus.io/download/
所有Export 地址:https://prometheus.io/docs/instrumenting/exporters/
Tips :export 有些事官网提供的export,有些事来自第三方的export,如果是第三方的export那么对应的grafana也会有提供
将所有所需要的export提前下载,并上传至之前规划好的目录
[root@localhost fs]# pwd
/opt/fs
[root@localhost fs]#
[root@localhost fs]# tree ./
./
├── blackbox_export
│ └── blackbox_exporter-0.24.0.linux-amd64.tar.gz
├── jvm_export
│ └── jmx_prometheus_httpserver-0.19.0.jar
├── mysql_export
│ └── mysqld_exporter-0.15.1.linux-amd64.tar.gz
├── node_export
│ └── node_exporter-1.7.0.linux-amd64.tar.gz
├── rabbitmq_export
│ └── rabbitmq_exporter_1.0.0-RC19_linux_amd64.tar.gz
└── redis_export
└── redis_exporter-v1.20.0.linux-amd64.tar.gz
20 directories, 42 file
node_export安装配置
计划将节点 192.168.177 和 192.167.176 两个节点加入探测,node_export 是那个节点需要探测就在那个节点上部署node_port
192.168.17.177探测
方式一下载
官网下载地址:https://prometheus.io/download/
所有Export 地址:https://prometheus.io/docs/instrumenting/exporters/
Tips :export 有些事官网提供的export,有些事来自第三方的export,如果是第三方的export那么对应的grafana也会有提供
方式二下载
wget -O /opt/tgzs/node_exporter-1.1.2.linux-amd64.tar.gz https://github.com/prometheus/node_exporter/releases/download/v1.1.2/node_exporter-1.1.2.linux-amd64.tar.gz
上传到 17.177规划好的目录,然后解压
[root@localhost node_export]# pwd
/opt/fs/node_export
[root@localhost node_export]# tar xvf node_exporter-1.7.0.linux-amd64.tar.gz # 解压包
node_exporter-1.7.0.linux-amd64/
node_exporter-1.7.0.linux-amd64/LICENSE
node_exporter-1.7.0.linux-amd64/node_exporter
node_exporter-1.7.0.linux-amd64/NOTICE
[root@localhost node_export]# ll
total 10176
drwxr-xr-x 2 1001 1002 56 Nov 13 08:03 node_exporter-1.7.0.linux-amd64
-rw-r--r-- 1 root root 10419253 Dec 28 10:20 node_exporter-1.7.0.linux-amd64.tar.gz
[root@localhost node_export]# mv node_exporter-1.7.0.linux-amd64 node_exporter # 重命名
[root@localhost node_export]# ll
total 10176
drwxr-xr-x 2 1001 1002 56 Nov 13 08:03 node_exporter
-rw-r--r-- 1 root root 10419253 Dec 28 10:20 node_exporter-1.7.0.linux-amd64.tar.gz
[root@localhost node_export]#
直接启动
# 可以直接启动 --web.config.file=web-config.yml 是指定的配置文件,也可以省略该启动参数,则会默认启动
./node_exporter --web.config.file=web-config.yml
注册服务启动-已默认的方式启动 端口是9100
cat <<EOF> /etc/systemd/system/node_exporter.service
[Unit]
Description=Node Exporter
Wants=network-online.target
After=network-online.target
[Service]
ExecStart=/opt/fs/node_export/node_exporter/node_exporter
StandardOutput=file:/opt/fs/node_export/node_exporter/logs/logfile.log
StandardError=file:/opt/fs/node_export/node_exporter/logs/logfile.log
SyslogIdentifier=node_exporter
[Install]
WantedBy=default.target
EOF
注册启动步骤如下:
[root@localhost node_exporter]# mkdir logs # 创建服务日志目录
[root@localhost node_exporter]# cd logs/
[root@localhost logs]# pwd
/opt/fs/node_export/node_exporter/logs
[root@localhost logs]# cat <<EOF> /etc/systemd/system/node_exporter.service
> [Unit]
> Description=Node Exporter
> Wants=network-online.target
> After=network-online.target
>
> [Service]
> ExecStart=/opt/fs/node_export/node_exporter/node_exporter
> StandardOutput=file:/opt/fs/node_export/node_exporter/logs/logfile.log
> StandardError=file:/opt/fs/node_export/node_exporter/logs/logfile.log
> SyslogIdentifier=node_exporter
> [Install]
> WantedBy=default.target
>
> EOF
[root@localhost logs]#
[root@localhost logs]# systemctl daemon-reload # reload 服务
[root@localhost logs]# systemctl start node_exporter.service # 启动服务
[root@localhost logs]# netstat -tlnp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 0.0.0.0:111 0.0.0.0:* LISTEN 852/rpcbind
tcp 0 0 0.0.0.0:6099 0.0.0.0:* LISTEN 2057/./cdap_file_ba
tcp 0 0 192.168.122.1:53 0.0.0.0:* LISTEN 3024/dnsmasq
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 1443/sshd
tcp 0 0 127.0.0.1:631 0.0.0.0:* LISTEN 1432/cupsd
tcp 0 0 127.0.0.1:25 0.0.0.0:* LISTEN 1554/master
tcp 0 0 127.0.0.1:6010 0.0.0.0:* LISTEN 24132/sshd: root@pt
tcp6 0 0 :::9090 :::* LISTEN 7118/prometheus
tcp6 0 0 :::9100 :::* LISTEN 12478/node_exporter
tcp6 0 0 :::111 :::* LISTEN 852/rpcbind
tcp6 0 0 :::22 :::* LISTEN 1443/sshd
tcp6 0 0 ::1:631 :::* LISTEN 1432/cupsd
tcp6 0 0 ::1:25 :::* LISTEN 1554/master
tcp6 0 0 ::1:6010 :::* LISTEN 24132/sshd: root@pt
[root@localhost logs]# ll
# 开放端口
[root@localhost node_exporter]# firewall-cmd --permanent --add-port=9100/tcp
success
[root@localhost node_exporter]# firewall-cmd --reload
success
[root@localhost node_exporter]#
访问页面测试
192.168.17.176探测
重复 192.168.17.177的步骤,注意修改对应的路径即可
将node_export集成到Prometheus
编辑 prometheus.yml
在安装Prometheus的节点17.177编辑prometheus.yml新增以下job配置
- job_name: node_exporter
honor_timestamps: true
scrape_interval: 10s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
follow_redirects: true
static_configs:
- targets: ["192.168.17.177:9100"]
labels:
nodeName: node-177
- targets: ["192.168.17.176:9100"]
labels:
nodeName: node-176
完整配置
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
# node_export ---------------------------------------------------------------------------
- job_name: node_exporter
honor_timestamps: true
scrape_interval: 10s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
follow_redirects: true
static_configs:
- targets: ["192.168.17.177:9100"]
labels:
nodeName: node-177
- targets: ["192.168.17.176:9100"]
labels:
nodeName: node-176
热更新Prometheus的配置
[root@localhost prometheus]# curl -vvv -X POST localhost:9090/-/reload
将Prometheus集成node_export后的数集成到Grafana
Dashboard 插件市场:https://grafana.com/grafana/dashboards/
在插件市场找到 dashboard的模版,我这里需用的模版id=9276
mysq_export安装配置
官网:https://github.com/prometheus/mysqld_exporter
流程大致如下:
1、数据库我们装在了 192.168.52.198和199上,创建一个拉取matrics的账号 exporter/123456
2、在192.168.17.177上安装 mysql_export, 并在mysql_export的启动配置文件my.cnf中配置数据库的 user和password等信息
3、在Prometheus.yml中配置mysql_export的job,mysql host,port,以及mysql_export的相关信息
4、在grafana中配置Prometheus拉取到的matrics信息dashboard显示 id=7362
创建账号
CREATE USER 'exporter'@'%' IDENTIFIED BY '123456';
GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'exporter'@'%';
FLUSH PRIVILEGES;
以上操作在 192.168.52.199上也创建一个相同的账号
安装配置mysql_export
[root@localhost fs]# cd mysql_export/
[root@localhost mysql_export]# ll
total 8096
-rw-r--r-- 1 root root 8287769 Dec 28 10:20 mysqld_exporter-0.15.1.linux-amd64.tar.gz
# 解压 mysql_export tar 包
[root@localhost mysql_export]# tar zxvf mysqld_exporter-0.15.1.linux-amd64.tar.gz
mysqld_exporter-0.15.1.linux-amd64/
mysqld_exporter-0.15.1.linux-amd64/LICENSE
mysqld_exporter-0.15.1.linux-amd64/mysqld_exporter
mysqld_exporter-0.15.1.linux-amd64/NOTICE
# 重命名目录
[root@localhost mysql_export]# mv mysqld_exporter-0.15.1.linux-amd64 mysqld_exporter
[root@localhost mysql_export]# cd mysqld_exporter/
[root@localhost mysqld_exporter]# ll
total 15460
-rw-r--r-- 1 1001 1002 11357 Dec 12 15:56 LICENSE
-rwxr-xr-x 1 1001 1002 15811157 Dec 12 15:55 mysqld_exporter
-rw-r--r-- 1 1001 1002 65 Dec 12 15:56 NOTICE
[root@localhost mysqld_exporter]# vim my.cnf 创建数据库的文件,具体内容如下
-rw-r--r-- 1 1001 1002 11357 Dec 12 15:56 LICENSE
-rw-r--r-- 1 root root 87 Dec 28 19:33 my.cnf
-rwxr-xr-x 1 1001 1002 15811157 Dec 12 15:55 mysqld_exporter
-rw-r--r-- 1 1001 1002 65 Dec 12 15:56 NOTICE
my.cnf
[client]
user=exporter
password=123456
[client.servers]
user=exporter
password=123456
注册并启动mysql_export服务
[root@localhost mysqld_exporter]# pwd
/opt/fs/mysql_export/mysqld_exporter
[root@localhost mysqld_exporter]# cat <<-"EOF" > /etc/systemd/system/mysql_export.service
[Unit]
Description="mysql_export"
After=network.target
[Service]
Type=simple
ExecStart=/opt/fs/mysql_export/mysqld_exporter/mysqld_exporter --config.my-cnf=/opt/fs/mysql_export/mysqld_exporter/my.cnf
Restart=on-failure
RestartSecs=5s
SuccessExitStatus=0
LimitNOFILE=655360
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=mysql_export
[Install]
WantedBy=multi-user.target
> EOF
[root@localhost mysqld_exporter]# systemctl daemon-reload
[root@localhost mysqld_exporter]# systemctl start mysql_export.service
[root@localhost mysqld_exporter]# netstat -tlnp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 0.0.0.0:111 0.0.0.0:* LISTEN 852/rpcbind
tcp 0 0 0.0.0.0:6099 0.0.0.0:* LISTEN 2057/./cdap_file_ba
tcp 0 0 192.168.122.1:53 0.0.0.0:* LISTEN 3024/dnsmasq
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 1443/sshd
tcp 0 0 127.0.0.1:631 0.0.0.0:* LISTEN 1432/cupsd
tcp 0 0 127.0.0.1:25 0.0.0.0:* LISTEN 1554/master
tcp 0 0 127.0.0.1:6010 0.0.0.0:* LISTEN 24132/sshd: root@pt
tcp6 0 0 :::9090 :::* LISTEN 7118/prometheus
tcp6 0 0 :::9100 :::* LISTEN 14084/node_exporter
tcp6 0 0 :::111 :::* LISTEN 852/rpcbind
tcp6 0 0 :::9104 :::* LISTEN 20929/mysqld_export
完整的/etc/systemd/system/mysql_export.service
[Unit]
Description="mysql_export"
After=network.target
[Service]
Type=simple
ExecStart=/opt/fs/mysql_export/mysqld_exporter/mysqld_exporter --config.my-cnf=/opt/fs/mysql_export/mysqld_exporter/my.cnf
Restart=on-failure
RestartSecs=5s
SuccessExitStatus=0
LimitNOFILE=655360
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=mysql_export
[Install]
WantedBy=multi-user.target
页面检测
因为会在192.168.17.177上部署多个export 方便起见直接关闭防火墙
将mysql_export集成到Prometheus
编辑prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
# node_export ---------------------------------------------------------------------------
- job_name: node_exporter
honor_timestamps: true
scrape_interval: 10s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
follow_redirects: true
static_configs:
- targets: ["192.168.17.177:9100"]
labels:
nodeName: node-177
- targets: ["192.168.17.176:9100"]
labels:
nodeName: node-176
# mysql_export --------------------------------------------------------------------------
- job_name: Mariadb
params:
auth_module: [client.servers]
static_configs:
- targets:
- 192.168.52.198:3306
labels:
dbName: mariadb-198
- targets:
- 192.168.52.199:3306
labels:
dbName: mariadb-199
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
# mysql_export 的matrics的 host port
replacement: 192.168.17.177:9104
热更新Prometheus的配置
[root@localhost prometheus]# vim prometheus.yml
[root@localhost prometheus]# curl -vvv -X POST localhost:9090/-/reload
* About to connect() to localhost port 9090 (#0)
* Trying ::1...
* Connected to localhost (::1) port 9090 (#0)
> POST /-/reload HTTP/1.1
> User-Agent: curl/7.29.0
> Host: localhost:9090
> Accept: */*
>
< HTTP/1.1 200 OK
< Date: Thu, 28 Dec 2023 12:17:33 GMT
< Content-Length: 0
<
* Connection #0 to host localhost left intact
[root@localhost prometheus]#
验证集成后的Prometheus
将Prometheus集成mysql_export的数据集成到Grafana
结果展示
redis_export安装配置
官网:https://prometheus.io/docs/instrumenting/exporters/
https://github.com/oliver006/redis_exporter
配置并启动服务
切换到下载的tar包目录
[root@localhost redis_export]# pwd
/opt/fs/redis_export
[root@localhost redis_export]# ll
total 3072
-rw-r--r-- 1 root root 3141985 Dec 28 13:41 redis_exporter-v1.20.0.linux-amd64.tar.gz
# 解压tar
[root@localhost redis_export]# tar zxvf redis_exporter-v1.20.0.linux-amd64.tar.gz
redis_exporter-v1.20.0.linux-amd64/
redis_exporter-v1.20.0.linux-amd64/LICENSE
redis_exporter-v1.20.0.linux-amd64/README.md
redis_exporter-v1.20.0.linux-amd64/redis_exporter
# 重命名
[root@localhost redis_export]# mv redis_exporter-v1.20.0.linux-amd64 redis_exporter
[root@localhost redis_export]# cd redis_exporter/
[root@localhost redis_exporter]# ll
total 8264
-rw-r--r-- 1 root root 1063 Mar 22 2021 LICENSE
-rw-r--r-- 1 root root 24053 Mar 22 2021 README.md
-rwxr-xr-x 1 root root 8433664 Mar 22 2021 redis_exporter
[root@localhost redis_exporter]# pwd
/opt/fs/redis_export/redis_exporter
# 注册服务
[root@localhost redis_exporter]# cat <<-"EOF" > /etc/systemd/system/redis_exporter.service
> [Unit]
> Description="redis_export"
> After=network.target
>
> [Service]
> Type=simple
> ExecStart=/opt/fs/redis_export/redis_exporter/redis_exporter
> Restart=on-failure
> RestartSecs=5s
> SuccessExitStatus=0
> LimitNOFILE=655360
> StandardOutput=syslog
> StandardError=syslog
> SyslogIdentifier=redis_export
>
>
> [Install]
> WantedBy=multi-user.target
> EOF
[root@localhost redis_exporter]# systemctl daemon-reload
[root@localhost redis_exporter]# systemctl start redis_exporter.service
[root@localhost redis_exporter]# netstat -tlnp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 0.0.0.0:111 0.0.0.0:* LISTEN 852/rpcbind
tcp 0 0 0.0.0.0:6099 0.0.0.0:* LISTEN 2057/./cdap_file_ba
tcp 0 0 192.168.122.1:53 0.0.0.0:* LISTEN 3024/dnsmasq
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 1443/sshd
tcp 0 0 127.0.0.1:631 0.0.0.0:* LISTEN 1432/cupsd
tcp 0 0 127.0.0.1:25 0.0.0.0:* LISTEN 1554/master
tcp 0 0 127.0.0.1:6010 0.0.0.0:* LISTEN 24132/sshd: root@pt
tcp 0 0 127.0.0.1:6011 0.0.0.0:* LISTEN 24227/sshd: root@pt
tcp6 0 0 :::9121 :::* LISTEN 29327/redis_exporte
tcp6 0 0 :::9090 :::* LISTEN 7118/prometheus
tcp6 0 0 :::9100 :::* LISTEN 14084/node_exporter
tcp6 0 0 :::111 :::* LISTEN 852/rpcbind
tcp6 0 0 :::9104 :::* LISTEN 20929/mysqld_export
tcp6 0 0 :::22 :::* LISTEN 1443/sshd
tcp6 0 0 ::1:631 :::* LISTEN 1432/cupsd
tcp6 0 0 ::1:25 :::* LISTEN 1554/master
tcp6 0 0 ::1:6010 :::* LISTEN 24132/sshd: root@pt
tcp6 0 0 ::1:6011 :::* LISTEN 24227/sshd: root@pt
[root@localhost redis_exporter]#
页面测试
将redis_exporter集成到Prometheus
编辑prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
# node_export ---------------------------------------------------------------------------
- job_name: node_exporter
honor_timestamps: true
scrape_interval: 10s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
follow_redirects: true
static_configs:
- targets: ["192.168.17.177:9100"]
labels:
nodeName: node-177
- targets: ["192.168.17.176:9100"]
labels:
nodeName: node-176
# mysql_export --------------------------------------------------------------------------
- job_name: Mariadb
params:
auth_module: [client.servers]
static_configs:
- targets:
- 192.168.52.198:3306
labels:
dbName: mariadb-198
- targets:
- 192.168.52.199:3306
labels:
dbName: mariadb-199
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.17.177:9104
# redis_exporter --------------------------------------------------------------------------
- job_name: 'redis_exporter'
static_configs:
- targets:
- 192.168.52.198:6379
labels:
dbName: redis-198
- targets:
- 192.168.52.199:6379
labels:
dbName: redis-199
metrics_path: /scrape
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.17.177:9121
热更新Prometheus的配置
[root@localhost prometheus]# curl -vvv -X POST localhost:9090/-/reload
* About to connect() to localhost port 9090 (#0)
* Trying ::1...
* Connected to localhost (::1) port 9090 (#0)
> POST /-/reload HTTP/1.1
> User-Agent: curl/7.29.0
> Host: localhost:9090
> Accept: */*
>
< HTTP/1.1 200 OK
< Date: Fri, 29 Dec 2023 03:36:42 GMT
< Content-Length: 0
<
* Connection #0 to host localhost left intact
将Prometheus集成redis_exporter的数据集成到Grafana
rabbitmq_export安装配置
官网:https://prometheus.io/docs/instrumenting/exporters/
https://github.com/kbudde/rabbitmq_exporter
配置并启动服务
[root@localhost rabbitmq_export]# pwd
/opt/fs/rabbitmq_export
# 解压
[root@localhost rabbitmq_export]# tar zxvf rabbitmq_exporter_1.0.0-RC19_linux_amd64.tar.gz
LICENSE
README.md
rabbitmq_exporter
[root@localhost rabbitmq_export]# ll
total 20200
-rw-r--r-- 1 1001 docker 1074 Jul 22 2022 LICENSE
-rwxr-xr-x 1 1001 docker 13728776 Jul 22 2022 rabbitmq_exporter
-rw-r--r-- 1 root root 6927992 Dec 28 13:41 rabbitmq_exporter_1.0.0-RC19_linux_amd64.tar.gz
-rw-r--r-- 1 1001 docker 16840 Jul 22 2022 README.md
# 编辑 配置文件
[root@localhost rabbitmq_export]# vim myconf.json
myconf.json
{
"rabbit_url": "http://192.168.52.198:15672",
"rabbit_user": "admin",
"rabbit_pass": "CloudSure",
"publish_port": "9419",
"publish_addr": "",
"output_format": "TTY",
"ca_file": "ca.pem",
"cert_file": "client-cert.pem",
"key_file": "client-key.pem",
"insecure_skip_verify": false,
"exlude_metrics": [],
"include_exchanges": ".*",
"skip_exchanges": "^$",
"include_queues": ".*",
"skip_queues": "^$",
"skip_vhost": "^$",
"include_vhost": ".*",
"rabbit_capabilities": "no_sort,bert",
"aliveness_vhost": "/",
"enabled_exporters": [
"exchange",
"node",
"overview",
"queue",
"aliveness"
],
"timeout": 30,
"max_queues": 0
}
注册服务
cat <<-"EOF" > /etc/systemd/system/rabbitmq_exporter.service
[Unit]
Description="rabbitmq_exporter"
After=network.target
[Service]
Type=simple
ExecStart=/opt/fs/rabbitmq_export/rabbitmq_exporter -config-file /opt/fs/rabbitmq_export/myconf.json
Restart=on-failure
RestartSecs=5s
SuccessExitStatus=0
LimitNOFILE=655360
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=rabbitmq_exporter
[Install]
WantedBy=multi-user.target
EOF
启动服务
pwd
/opt/fs/rabbitmq_export
[root@localhost rabbitmq_export]# cat <<-"EOF" > /etc/systemd/system/rabbitmq_exporter.service
> [Unit]
> Description="rabbitmq_exporter"
> After=network.target
>
> [Service]
> Type=simple
> ExecStart=/opt/fs/rabbitmq_export/rabbitmq_exporter -config-file /opt/fs/rabbitmq_export/myconf.json
> Restart=on-failure
> RestartSecs=5s
> SuccessExitStatus=0
> LimitNOFILE=655360
> StandardOutput=syslog
> StandardError=syslog
> SyslogIdentifier=rabbitmq_exporter
>
>
> [Install]
> WantedBy=multi-user.target
> EOF
[root@localhost rabbitmq_export]# systemctl daemon-reload
[root@localhost rabbitmq_export]# systemctl start rabbitmq_exporter.service
[root@localhost rabbitmq_export]#
[root@localhost rabbitmq_export]#
[root@localhost rabbitmq_export]#
[root@localhost rabbitmq_export]# netstat -tlnp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 0.0.0.0:111 0.0.0.0:* LISTEN 852/rpcbind
tcp 0 0 0.0.0.0:6099 0.0.0.0:* LISTEN 2057/./cdap_file_ba
tcp 0 0 192.168.122.1:53 0.0.0.0:* LISTEN 3024/dnsmasq
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 1443/sshd
tcp 0 0 127.0.0.1:631 0.0.0.0:* LISTEN 1432/cupsd
tcp 0 0 127.0.0.1:25 0.0.0.0:* LISTEN 1554/master
tcp 0 0 127.0.0.1:6010 0.0.0.0:* LISTEN 24132/sshd: root@pt
tcp 0 0 127.0.0.1:6011 0.0.0.0:* LISTEN 24227/sshd: root@pt
tcp6 0 0 :::9121 :::* LISTEN 29327/redis_exporte
tcp6 0 0 :::9090 :::* LISTEN 7118/prometheus
tcp6 0 0 :::9419 :::* LISTEN 30896/rabbitmq_expo
tcp6 0 0 :::9100 :::* LISTEN 14084/node_exporter
tcp6 0 0 :::111 :::* LISTEN 852/rpcbind
tcp6 0 0 :::9104 :::* LISTEN 20929/mysqld_export
tcp6 0 0 :::22 :::* LISTEN 1443/sshd
tcp6 0 0 ::1:631 :::* LISTEN 1432/cupsd
tcp6 0 0 ::1:25 :::* LISTEN 1554/master
tcp6 0 0 ::1:6010 :::* LISTEN 24132/sshd: root@pt
tcp6 0 0 ::1:6011 :::* LISTEN 24227/sshd: root@pt
页面测试
将Rabbitmq_exporter 集成到Prometheus
编辑prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
# node_export ---------------------------------------------------------------------------
- job_name: node_exporter
honor_timestamps: true
scrape_interval: 10s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
follow_redirects: true
static_configs:
- targets: ["192.168.17.177:9100"]
labels:
nodeName: node-177
- targets: ["192.168.17.176:9100"]
labels:
nodeName: node-176
# mysql_export --------------------------------------------------------------------------
- job_name: Mariadb
params:
auth_module: [client.servers]
static_configs:
- targets:
- 192.168.52.198:3306
labels:
dbName: mariadb-198
- targets:
- 192.168.52.199:3306
labels:
dbName: mariadb-199
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.17.177:9104
# redis_exporter --------------------------------------------------------------------------
- job_name: 'redis_exporter'
static_configs:
- targets:
- 192.168.52.198:6379
labels:
dbName: redis-198
- targets:
- 192.168.52.199:6379
labels:
dbName: redis-199
metrics_path: /scrape
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.17.177:9121
# rabbitmq_export ---------------------------------------------------------------------
- job_name: 'rabbitmq_exporter'
static_configs:
- targets: ["192.168.17.177:9419"]
labels:
dbName: mq-198
热更新Prometheus的配置
[root@localhost prometheus]# curl -vvv -X POST localhost:9090/-/reload
* About to connect() to localhost port 9090 (#0)
* Trying ::1...
* Connected to localhost (::1) port 9090 (#0)
> POST /-/reload HTTP/1.1
> User-Agent: curl/7.29.0
> Host: localhost:9090
> Accept: */*
>
< HTTP/1.1 200 OK
< Date: Fri, 29 Dec 2023 06:23:08 GMT
< Content-Length: 0
<
* Connection #0 to host localhost left intact
[root@localhost prometheus]#
在Prometheus的控制台查看target
将Prometheus集成rabbitmq_exporter的数据集成到Grafana
https://grafana.com/grafana/dashboards/4279
jmx_export安装配置
官网:https://prometheus.io/docs/instrumenting/exporters/
https://github.com/prometheus/jmx_exporter
JMX Exporter是一个工具,它利用Java的JMX机制读取JVM运行时的一些监控数据,然后将其转换为Prometheus可以理解的metrics格式,以便让Prometheus进行监控采集。JMX代表Java Management Extensions,它是管理Java的一种扩展框架,而JMX Exporter就是基于此框架来读取JVM的运行时状态。
JMX Exporter提供了两种使用方式:
第一种:是启动独立进程, 下载一个 - jmx_prometheus_httpserver-xx.xx.xx.jar 然后单独部署
第二种:下载一个 - jmx_prometheus_javaagent-0.20.0.jar的代理程序,作为被监控应用程序的代理程序启动,需要和被监控的服务部署在同一个节点
流程简述:
1、下载 jmx_prometheus_javaagent-0.20.0.jar并上传到 api-server的规划目录下(我们的服务api-server和core-server是部署在192.168.52.198上,所以将jmx_prometheus_javaagent-0.20.0.jar上传到 /opt/fs/jmx 下)
2、新建配置文件config.yaml (官网有提供)
3、将代理程序jmx_prometheus_javaagent-0.20.0.jar配置到Tomcat的环境中,便于Tomcat启动的时候也启动代理程序
4、配置Prometheus.yml并热加载配置
5、配置 Grafana 可视化显示
上传代理程序
[root@localhost jmx]# pwd
/opt/fs/jmx
[root@localhost jmx]# ll
总用量 556
# 已经代理程序上传到应用服务所在的节点
-rw-r--r--. 1 root root 568919 12月 29 19:56 jmx_prometheus_javaagent-0.19.0.jar
[root@localhost jmx]#
新建 config.yaml
lowercaseOutputLabelNames: true
lowercaseOutputName: true
whitelistObjectNames: ["java.lang:type=OperatingSystem", "Catalina:*"]
blacklistObjectNames: []
rules:
- pattern: 'Catalina<type=Server><>serverInfo: (.+)'
name: tomcat_serverinfo
value: 1
labels:
serverInfo: "$1"
type: COUNTER
- pattern: 'Catalina<type=GlobalRequestProcessor, name=\"(\w+-\w+)-(\d+)\"><>(\w+):'
name: tomcat_$3_total
labels:
port: "$2"
protocol: "$1"
help: Tomcat global $3
type: COUNTER
- pattern: 'Catalina<j2eeType=Servlet, WebModule=//([-a-zA-Z0-9+&@#/%?=~_|!:.,;]*[-a-zA-Z0-9+&@#/%=~_|]), name=([-a-zA-Z0-9+/$%~_-|!.]*), J2EEApplication=none, J2EEServer=none><>(requestCount|processingTime|errorCount):'
name: tomcat_servlet_$3_total
labels:
module: "$1"
servlet: "$2"
help: Tomcat servlet $3 total
type: COUNTER
- pattern: 'Catalina<type=ThreadPool, name="(\w+-\w+)-(\d+)"><>(currentThreadCount|currentThreadsBusy|keepAliveCount|connectionCount|acceptCount|acceptorThreadCount|pollerThreadCount|maxThreads|minSpareThreads):'
name: tomcat_threadpool_$3
labels:
port: "$2"
protocol: "$1"
help: Tomcat threadpool $3
type: GAUGE
- pattern: 'Catalina<type=Manager, host=([-a-zA-Z0-9+&@#/%?=~_|!:.,;]*[-a-zA-Z0-9+&@#/%=~_|]), context=([-a-zA-Z0-9+/$%~_-|!.]*)><>(processingTime|sessionCounter|rejectedSessions|expiredSessions):'
name: tomcat_session_$3_total
labels:
context: "$2"
host: "$1"
help: Tomcat session $3 total
type: COUNTER
将代理程序配置到Tomcat的环境中
[root@localhost bin]# pwd
/usr/local/cdap/7.0/server/web/api_server/tomcat/bin
[root@localhost bin]# cat setenv.sh
# 使用的jdk目录
export JAVA_HOME=/usr/local/cdap/7.0/server/web/jdk
# 使用的Tomcat目录
export CATALINA_HOME=/usr/local/cdap/7.0/server/web/api_server/tomcat
# JAVA_OPTS参数需要CATALINA_PID参数
export CATALINA_PID="$CATALINA_HOME/tomcat.pid"
# Tomcat的JVM参数设置
export JAVA_OPTS="-server -Xmx4G -Xms4G -Xss256k -XX:MaxDirectMemorySize=512M -XX:+UseG1GC -XX:MaxGCPauseMillis=50 -XX:G1ReservePercent=15 -XX:InitiatingHeapOccupancyPercent=40 -XX:MaxInlineLevel=15 -Xlog:gc:$CATALINA_HOME/logs/gc.log -javaagent:/opt/fs/jmx/jmx_prometheus_javaagent-0.19.0.jar=38081:/opt/fs/jmx/config.yaml"
[root@localhost bin]#
启动tomcat服务
查看服务状态
[root@localhost bin]# cs_boot_service status apiserver
mysql : Running
redis : Running
nginx : Running
rabbitmq : Running
zookeeper : Running
apiserver : Running
coreserver : Running
gateway : Running
vnc : Running
# 暂停服务
[root@localhost bin]# cs_boot_service stop apiserver
[root@localhost bin]# cs_boot_service status apiserver
mysql : Running
redis : Running
nginx : Running
rabbitmq : Running
zookeeper : Running
apiserver : Stopped
coreserver : Running
gateway : Running
vnc : Running
# 重启服务
[root@localhost bin]# cs_boot_service start apiserver
[root@localhost bin]# ps -ef |grep tomcat
root 2080074 1 5 16:49 ? 00:11:56 /usr/local/cdap/7.0/server/web/jdk/bin/java -Djava.util.logging.config.file=/usr/local/cdap/7.0/server/web/core_server/tomcat/conf/logging.properties -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager -server -Xmx4G -Xms4G -Xss256k -XX:MaxDirectMemorySize=512M -XX:+UseG1GC -XX:MaxGCPauseMillis=50 -XX:G1ReservePercent=15 -XX:InitiatingHeapOccupancyPercent=40 -XX:MaxInlineLevel=15 -Xlog:gc:/usr/local/cdap/7.0/server/web/core_server/tomcat/logs/gc.log -Djdk.tls.ephemeralDHKeySize=2048 -Djava.protocol.handler.pkgs=org.apache.catalina.webresources -Dorg.apache.catalina.security.SecurityListener.UMASK=0027 -Dignore.endorsed.dirs= -classpath /usr/local/cdap/7.0/server/web/core_server/tomcat/bin/bootstrap.jar:/usr/local/cdap/7.0/server/web/core_server/tomcat/bin/tomcat-juli.jar -Dcatalina.base=/usr/local/cdap/7.0/server/web/core_server/tomcat -Dcatalina.home=/usr/local/cdap/7.0/server/web/core_server/tomcat -Djava.io.tmpdir=/usr/local/cdap/7.0/server/web/core_server/tomcat/temp org.apache.catalina.startup.Bootstrap start
root 2195638 1 99 20:09 pts/5 00:00:44 /usr/local/cdap/7.0/server/web/jdk/bin/java -Djava.util.logging.config.file=/usr/local/cdap/7.0/server/web/api_server/tomcat/conf/logging.properties -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager -server -Xmx4G -Xms4G -Xss256k -XX:MaxDirectMemorySize=512M -XX:+UseG1GC -XX:MaxGCPauseMillis=50 -XX:G1ReservePercent=15 -XX:InitiatingHeapOccupancyPercent=40 -XX:MaxInlineLevel=15 -Xlog:gc:/usr/local/cdap/7.0/server/web/api_server/tomcat/logs/gc.log -javaagent:/opt/fs/jmx/jmx_prometheus_javaagent-0.19.0.jar=38081:/opt/fs/jmx/config.yaml -Djdk.tls.ephemeralDHKeySize=2048 -Djava.protocol.handler.pkgs=org.apache.catalina.webresources -Dorg.apache.catalina.security.SecurityListener.UMASK=0027 -Dignore.endorsed.dirs= -classpath /usr/local/cdap/7.0/server/web/api_server/tomcat/bin/bootstrap.jar:/usr/local/cdap/7.0/server/web/api_server/tomcat/bin/tomcat-juli.jar -Dcatalina.base=/usr/local/cdap/7.0/server/web/api_server/tomcat -Dcatalina.home=/usr/local/cdap/7.0/server/web/api_server/tomcat -Djava.io.tmpdir=/usr/local/cdap/7.0/server/web/api_server/tomcat/temp org.apache.catalina.startup.Bootstrap start
root 2195861 2191605 0 20:10 pts/5 00:00:00 grep --color=auto tomcat
# 已经成功启动
[root@localhost bin]# netstat -tlnp | grep 38081
tcp6 0 0 :::38081 :::* LISTEN 2195638/java
[root@localhost bin]# cd
测试
# 开放端口
[root@localhost bin]# firewall-cmd --permanent --add-port=38081/tcp
success
[root@localhost bin]# firewall-cmd --reload
将jmx_export集成到Prometheus
编辑prometheus.yml
................................
target_label: instance
- target_label: __address__
replacement: 192.168.17.177:9121
# rabbitmq_export ---------------------------------------------------------------------
- job_name: 'rabbitmq_exporter'
static_configs:
- targets: ["192.168.17.177:9419"]
labels:
dbName: mq-198
# jmx_export ---------------------------------------------------------------------------
- job_name: 'tomcat-198'
static_configs:
- targets: ["192.168.52.198:38081"]
labels:
serverName: apiserver
热更新Prometheus的配置
[root@localhost prometheus]# curl -vvv -X POST localhost:9090/-/reload
* About to connect() to localhost port 9090 (#0)
* Trying ::1...
* Connected to localhost (::1) port 9090 (#0)
> POST /-/reload HTTP/1.1
> User-Agent: curl/7.29.0
> Host: localhost:9090
> Accept: */*
>
< HTTP/1.1 200 OK
< Date: Fri, 29 Dec 2023 12:52:34 GMT
< Content-Length: 0
<
* Connection #0 to host localhost left intact
将Prometheus集成jmx_exporter的数据集成到Grafana
https://grafana.com/grafana/dashboards/14845-jmx-dashboard-basic/
重复以上步骤配置 192.168.52.198的coreserver和192.168.52.199的apiserver 和 coreserver,同一个节点可以共享同一个代理程序 jmx_prometheus_javaagent-0.20.0.jar 和 config.ymal
最终prometheus.yml配置
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
# node_export ---------------------------------------------------------------------------
- job_name: node_exporter
honor_timestamps: true
scrape_interval: 10s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
follow_redirects: true
static_configs:
- targets: ["192.168.17.177:9100"]
labels:
nodeName: node-177
- targets: ["192.168.17.176:9100"]
labels:
nodeName: node-176
# mysql_export --------------------------------------------------------------------------
- job_name: Mariadb
params:
auth_module: [client.servers]
static_configs:
- targets:
- 192.168.52.198:3306
labels:
dbName: mariadb-198
- targets:
- 192.168.52.199:3306
labels:
dbName: mariadb-199
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.17.177:9104
# redis_exporter --------------------------------------------------------------------------
- job_name: 'redis_exporter'
static_configs:
- targets:
- 192.168.52.198:6379
labels:
dbName: redis-198
- targets:
- 192.168.52.199:6379
labels:
dbName: redis-199
metrics_path: /scrape
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.17.177:9121
# rabbitmq_export ---------------------------------------------------------------------
- job_name: 'rabbitmq_exporter'
static_configs:
- targets: ["192.168.17.177:9419"]
labels:
dbName: mq-198
# jmx_export ---------------------------------------------------------------------------
- job_name: 'Tomcat'
static_configs:
- targets: ["192.168.52.198:38081"]
labels:
serverName: apiserver-198
- targets: ["192.168.52.198:38082"]
labels:
serverName: coreserver-198
- targets: ["192.168.52.199:38081"]
labels:
serverName: apiserver-199
- targets: ["192.168.52.199:38082"]
labels:
serverName: coreserver-199