这里我就是在这个基础上缩减了一下,用了两台机器搭建,把Prometheushe和influxdb放同一台机器上做主备了。
三个ip——
VIP(172.19.100.231)
master(172.19.100.232)
slave(172.19.100.233)
1.安装prometheus
wget https://github.com/prometheus/prometheus/releases/download/v2.45.3/prometheus-2.45.3.linux-arm64.tar.gz
mv prometheus-2.45.3.linux-arm64.tar.gz /etc/
cd /etc
tar -zxvf prometheus-2.45.3.linux-arm64.tar.gz
mv prometheus-2.45.3.linux-arm64 prometheus
rm -rf prometheus-2.45.3.linux-arm64.tar.gz
cd prometheus
mkdir data
mv prometheus promtool /usr/local/bin/
cat > /etc/systemd/system/prometheus.service << EOF
[Unit]
Description=Prometheus
Documentation=https://prometheus.io/
After=network.target
[Service]
ExecStart=/usr/local/bin/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/etc/prometheus/data --web.listen-address=0.0.0.0:9090
WorkingDirectory=/etc/prometheus/
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF
2.安装node_export
wget https://github.com/prometheus/node_exporter/releases/download/v1.7.0/node_exporter-1.7.0.linux-arm64.tar.gz
tar -xvzf node_exporter-1.7.0.linux-arm64.tar.gz
cp node_exporter-1.7.0.linux-arm64/node_exporter /usr/local/bin/node_exporter
rm -rf node_exporter-1.7.0.linux-arm64*
cat > /etc/systemd/system/node_exporter.service << EOF
[Unit]
Description=node_exporter
Documentation=https://prometheus.io/
After=network.target
[Service]
ExecStart=/usr/local/bin/node_exporter
Restart=on-failure
RestartSec=20
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl restart node_exporter
systemctl status node_exporter
systemctl enable node_exporter
3.安装keepalived
yum install -y keepalived
systemctl enable keepalived
prometheus01机器Keepalived配置文件
vim /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id 1
}
vrrp_script check_prome {
script "/etc/keepalived/check_prome.sh"
interval 1
weight -50
}
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 1
mcast_src_ip 172.19.100.232
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 231
}
track_script {
check_prome
}
virtual_ipaddress {
172.19.100.231
}
notify_master "/etc/keepalived/notify.sh master"
notify_backup "/etc/keepalived/notify.sh backup"
notify_fault "/etc/keepalived/notify.sh fault"
}
prometheus02机器Keepalived配置文件
vim /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id 2
}
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 1
mcast_src_ip 172.19.100.233
priority 80
advert_int 1
authentication {
auth_type PASS
auth_pass 231
}
virtual_ipaddress {
172.19.100.231
}
notify_master "/etc/keepalived/notify.sh master"
notify_backup "/etc/keepalived/notify.sh backup"
notify_fault "/etc/keepalived/notify.sh fault"
}
4. prometheus_check.sh脚本
vim /etc/keepalived/check_prome.sh
chmod +x /etc/keepalived/check_prome.sh
#!/bin/bash
processes_num=$(ps -ef |grep [p]rometheus|wc -l)
if [ $processes_num = 0 ]
then
exit 1
else
exit 0
fi
4.1 master机器
vim /etc/keepalived/notify.sh
chmod +x /etc/keepalived/notify.sh
#!/bin/bash
case $1 in
master)
systemctl start prometheus
influx -host 172.19.100.232 -username root -password Abzh4rd7 -database prometheus -execute """CREATE SUBSCRIPTION "prometheus" ON "prometheus"."prometheus" DESTINATIONS ALL 'http://prometheus:prometheus@172.19.100.233:8086';"""
;;
backup)
systemctl stop prometheus
influx -host 172.19.100.232 -username root -password Abzh4rd7 -database prometheus -execute """DROP SUBSCRIPTION "prometheus" ON "prometheus"."prometheus";"""
;;
fault)
systemctl stop prometheus
influx -host 172.19.100.232 -username root -password Abzh4rd7 -database prometheus -execute """DROP SUBSCRIPTION "prometheus" ON "prometheus"."prometheus";"""
;;
*)
echo "不支持该参数,请检查输入的参数是否正确"
esac
4.2 slave机器
#!/bin/bash
case $1 in
master)
systemctl start prometheus
influx -host 173.19.100.233 -username root -password Abzh4rd7 -database prometheus -execute """CREATE SUBSCRIPTION "prometheus" ON "prometheus"."prometheus" DESTINATIONS ALL 'http://prometheus:prometheus@172.19.100.232:8086';"""
;;
backup)
systemctl stop prometheus
influx -host 172.19.100.233 -username root -password Abzh4rd7 -database prometheus -execute """DROP SUBSCRIPTION "prometheus" ON "prometheus"."prometheus";"""
;;
fault)
systemctl stop prometheus
influx -host 172.19.100.233 -username root -password Abzh4rd7 -database prometheus -execute """DROP SUBSCRIPTION "prometheus" ON "prometheus"."prometheus";"""
;;
*)
echo "不支持该参数,请检查输入的参数是否正确"
esac
5. 安装influxdb
wget https://dl.influxdata.com/influxdb/releases/influxdb-1.8.1.x86_64.rpm
sudo yum localinstall influxdb-1.8.1.x86_64.rpm
systemctl enable influxdb --now
systemctl status influxdb
5.1创建管理员用户
influx
# 创建管理员账户
> CREATE USER root WITH PASSWORD 'Abzh4rd7' WITH ALL PRIVILEGES
> show databases;
# 进行认证
> auth
username: root
password:
5.2 创建存储监控数据的库
influx
auth
create database prometheus;
# 创建默认的保留策略
use prometheus
CREATE RETENTION POLICY "prometheus" ON "prometheus" DURATION 1h REPLICATION 1 DEFAULT
show databases;
5.3 创建prometheus账户并设置权限
# 创建Prometheus账户并设置权限
CREATE USER prometheus WITH PASSWORD 'prometheus'
GRANT ALL ON "prometheus" TO "prometheus"
SHOW USERS
//查看用户,两列数据,一列是用户名称,一列是是否为管理员用户
show users
//创建普通户
create user “influx” with password ‘123456’
//创建管理员用户
create user “root” with password ‘123456’ with all privileges
//修改用户密码
set password for root= ‘root’
//通过cli操作influxdb
influx -username root -password root
5.4 Influxdb-master手动创建订阅
use prometheus
CREATE SUBSCRIPTION "prometheus" ON "prometheus"."prometheus" DESTINATIONS ALL 'http://prometheus:prometheus@172.19.100.233:8086'
5.5 Influxdb-slave手动创建订阅
CREATE SUBSCRIPTION "prometheus" ON "prometheus"."prometheus" DESTINATIONS ALL 'http://prometheus:prometheus@172.19.100.232:8086'
删除订阅
drop SUBSCRIPTION “prometheus” ON “prometheus”.“prometheus”
5.6检查订阅情况
SHOW SUBSCRIPTIONS
6. 配置Prometheus数据源
6.1 Prometheus01
vim /etc/prometheus/prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["172.19.100.232:9100", "172.19.100.233:9100"]
# influxdb
remote_write:
- url: "http://172.19.100.232:8086/api/v1/prom/write?db=prometheus&u=prometheus&p=prometheus"
remote_timeout: 30s
queue_config:
capacity: 100000
max_shards: 1000
max_samples_per_send: 1000
batch_send_deadline: 5s
min_backoff: 30ms
max_backoff: 100ms
remote_read:
- url: "http://172.19.100.232:8086/api/v1/prom/read?db=prometheus&u=prometheus&p=prometheus"
remote_timeout: 10s
read_recent: true
6.2 Prometheus02
vim /etc/prometheus/prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["172.19.100.232:9100", "172.19.100.233:9100"]
# influxdb
remote_write:
- url: "http://172.19.100.233:8086/api/v1/prom/write?db=prometheus&u=prometheus&p=prometheus"
remote_timeout: 30s
queue_config:
capacity: 100000
max_shards: 1000
max_samples_per_send: 1000
batch_send_deadline: 5s
min_backoff: 30ms
max_backoff: 100ms
remote_read:
- url: "http://172.19.100.233:8086/api/v1/prom/read?db=prometheus&u=prometheus&p=prometheus"
remote_timeout: 10s
read_recent: true
启动prometheus
systemctl daemon-reload
systemctl enable prometheus --now
systemctl status prometheus
启动keepalived并检查VIP
查看influxdb数据
use prometheus
show measurements