1. 安装 Prometheus
# 官网
https://prometheus.io/download/
[root@promethues ~]# tar zxvf prometheus-2.30.3.linux-amd64.tar.gz -C /usr/local/
[root@promethues ~]# ln -sv /usr/local/prometheus-2.30.3.linux-amd64 /usr/local/prometheus
[root@promethues ~]# mkdir -p /usr/local/prometheus/data
# 添加到系统服务
[root@promethues system]# cat /usr/lib/systemd/system/prometheus.service
[Unit]
Description=Prometheus Monitoring System
Documentation=Prometheus Monitoring System
After=network.target
[Service]
Restart=on-failure
Type=simple
ExecStart=/usr/local/prometheus/prometheus --storage.tsdb.path=/usr/local/prometheus/data --config.file=/usr/local/prometheus/prometheus.yml
[Install]
WantedBy=multi-user.target
# 启动并设置开机自启
[root@promethues system]# systemctl enable --now prometheus
[root@promethues system]# systemctl status prometheus
● prometheus.service - Prometheus Monitoring System
Loaded: loaded (/usr/lib/systemd/system/prometheus.service; enabled; vendor preset: disabled)
Active: active (running) since Sun 2021-10-31 11:21:37 CST; 56s ago
Main PID: 1868 (prometheus)
......
# 查看启动端口
[root@promethues system]# ss -anltp|grep 9090
LISTEN 0 128 [::]:9090 [::]:* users:(("prometheus",pid=1868,fd=7))
[root@promethues system]#
# 访问
http://ip:9090
2. Prometheus 配置文件介绍
[root@promethues prometheus]# cat /usr/local/prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
[root@promethues prometheus]#
-
global: 此片段指定的是 prometheus 的全局配置, 比如采集间隔,抓取超时时间等。
-
alerting: 此片段指定报警配置, 这里主要是指定 prometheus 将报警规则推送到指定的 alertmanager 实例地址。
-
rule_files: 此片段指定报警规则文件, prometheus 根据这些规则信息,会推送报警信息到 alertmanager 中。
-
scrape_configs: 此片段指定抓取配置,prometheus 的数据采集通过此片段配置。
-
remote_write: 指定后端的存储的写入 api 地址,Prometheus 使用的是时序性数据库存储监控数据,默认存储的时间是两个月,如果需要长期存储,可以连接其他数据库
-
remote_read: 指定后端的存储的读取 api 地址。
Global 配置参数
# How frequently to scrape targets by default.
[ scrape_interval: <duration> | default = 1m ] # 抓取间隔
# How long until a scrape request times out.
[ scrape_timeout: <duration> | default = 10s ] # 抓取超时时间
# How frequently to evaluate rules.
[ evaluation_interval: <duration> | default = 1m ] # 评估规则间隔
scrapy_config 主要参数
一个 scrape_config 片段指定一组目标和参数, 目标就是实例,指定采集的端点, 参数描 述如何采集这些实例,主要参数如下
-
scrape_interval: 抓取间隔,默认继承 global 值
-
scrape_timeout: 抓取超时时间,默认继承 global 值
-
metric_path: 抓取路径, 默认是/metrics
-
*_sd_configs: 指定服务发现配置
-
static_configs: 静态指定服务 job
-
relabel_config: relabel 设置