一、 安装prometheus
端口号默认为9090
# 下载包
wget https://github.com/prometheus/prometheus/releases/download/v2.36.2/prometheus-2.36.2.linux-amd64.tar.gz
tar -zxvf prometheus-2.36.2.linux-amd64.tar.gz
mv prometheus-2.36.2.linux-amd64 /usr/local/prometheus
groupadd prometheus
useradd -g prometheus -m -d /var/lib/prometheus -s /sbin/nologin prometheus
chown prometheus.prometheus -R /usr/local/prometheus
# 以下涉及到vim编辑文件的内容均在所有命令行下方贴出
# 系统服务配置
vim /etc/systemd/system/prometheus.service
# prometheus.yml为配置文件,会跟着node_exporter、alertmanager、nginx、influxdb的安装而变动
# 配置告警指标 在同目录下建rules目录,规则参考https://awesome-prometheus-alerts.grep.to/rules
vim /usr/local/prometheus/prometheus.yml
systemctl daemon-reload
systemctl start prometheus
# 查看prometheus的启动状态
systemctl status prometheus
# 设置开机自启
systemctl enable prometheus
# 验证是否成功
curl 127.0.0.1:9090
1. /etc/systemd/system/prometheus.service添加内容
[Unit]
Description=prometheus
After=network.target
[Service]
Type=simple
User=prometheus
ExecStart=/usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml --storage.tsdb.path=/var/lib/prometheus/data
Restart=on-failure
[Install]
WantedBy=multi-user.target
2. prometheus.yml 需要修改alerting、rule_files、job_name、influxdb配置
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 127.0.0.1:9095
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "rules/*_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
# linux服务监控
- job_name: "nodes"
static_configs:
# 添加多个服务器节点
- targets: ['*.24.*.240:9109','*.104.*.244:9109']
# alertmanager
- job_name: "alertmanager"
static_configs:
# 添加多个服务器节点
- targets: ['127.0.0.1:9093']
# nginx
- job_name: "nginx"
static_configs:
# 添加多个服务器节点
- targets: ['*.104.*.244:9113']
#set db 这里为influxdb的链接
remote_write:
- url: "http://127.0.0.1:8086/api/v1/prom/write?db=prometheus"
remote_read:
- url: "http://127.0.0.1:8086/api/v1/prom/read?db=prometheus"
二、 安装node_exporter
端口号默认为9100,本文自定成了9109 (只在被监控的服务器上安装)
# 下载包
wget https://github.com/prometheus/node_exporter/releases/download/v1.3.1/node_exporter-1.3.1.linux-amd64.tar.gz
tar -zxvf node_exporter-1.3.1.linux-amd64.tar.gz
mv node_exporter-1.3.1.linux-amd64 /usr/local/node_exporter
# 已创建可以忽略
groupadd prometheus
# 已创建可以忽略
useradd -g prometheus -m -d /var/lib/prometheus -s /sbin/nologin prometheus
chown prometheus.prometheus -R /usr/local/node_exporter
vim /etc/systemd/system/node_exporter.service
systemctl daemon-reload
systemctl start node_exporter
systemctl status node_exporter
systemctl enable node_exporter
# 验证是否成功
curl 127.0.0.1:9109
curl 127.0.0.1:9109/metrics
1. /etc/systemd/system/node_exporter.service添加内容
[Unit]
Description=node_exporter
After=network.target
[Service]
Type=simple
User=prometheus
# 指定启动的端口和日志级别
ExecStart=/usr/local/bin/node_exporter --web.listen-address=:9109 --log.level=error
Restart=on-failure
[Install]
WantedBy=multi-user.target
三、 安装alertmanager
默认端口号9093
# 下载包
wget https://github.com/prometheus/alertmanager/releases/download/v0.24.0/alertmanager-0.24.0.linux-amd64.tar.gz
tar -zxvf alertmanager-0.24.0.linux-amd64.tar.gz
mv alertmanager-0.24.0.linux-amd64 /usr/local/alertmanager
useradd prometheus
chown -R prometheus:prometheus /usr/local/alertmanager /data/alertmanager/
# 添加启动服务
vim /etc/systemd/system/alertmanager.service
# 修改配置
vim /usr/local/alertmanager/alertmanager.yml
# 检查配置文件对错
./amtool check-config alertmanager.yml
systemctl daemon-reload
systemctl start alertmanager
systemctl status alertmanager
systemctl enable alertmanager
# 验证是否成功
curl 127.0.0.1:9093/#/alerts
1. /etc/systemd/system/alertmanager.service 添加内容
[Unit]
Description=alertmanager
After=network.target
[Service]
Type=simple
User=prometheus
ExecStart=/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml --storage.path=/data/alertmanager
Restart=on-failure
[Install]
WantedBy=multi-user.target
2. /usr/local/alertmanager/alertmanager.yml 修改内容,修改route和receivers部分
global:
resolve_timeout: 5m
# 需要发送邮件才配置
smtp_smarthost: 'smtp.exmail.qq.com:465'
smtp_from: 'develop@company.com' # 自己的
smtp_auth_username: 'develop@company.com' # 自己的
smtp_auth_password: '***' # 16位qq邮箱授权码作为密码
smtp_require_tls: false
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h #正式改为1h 多久发一次
receiver: 'web.hook' # 选用web脚本发送
routes:
- receiver: 'web.hook'
group_wait: 10s
match: #满足什么条件,rules中有team为node的发送
team: node
#continue: true #打开会接着往下过滤
- receiver: 'email'
group_wait: 10s
match:
team: node
receivers:
- name: 'email'
email_configs:
- to: 'test@company.com' # 接受邮件的邮箱地址
- name: 'web.hook'
webhook_configs:
# webhook 接收地址,自己写的脚本,可以用来干任何事情,会接收到告警内容
- url: 'http://**.cn/alertmanager/alert.php'
send_resolved: false
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
3. 收到的json数据
{
"receiver": "web\\.hook",
"status": "firing",
"alerts": [{
"status": "firing",
"labels": {
"alertname": "HostOutOfMemory",
"instance": "*.120.*.10:9109",
"job": "nodes",
"severity": "1",
"team": "node"
},
"annotations": {
"description": "Node memory is filling up (\u003c 10% left)\n VALUE = 6.943662692524177\n LABELS = map[instance:*.120.*.10:9109 job:nodes]",
"summary": "Host out of memory (instance *.120.*.10:9109)"
},
"startsAt": "2022-07-24T03:32:46.086Z",
"endsAt": "0001-01-01T00:00:00Z",
"generatorURL": "http://test:9090/graph?g0.expr=node_memory_MemAvailable_bytes+%2F+node_memory_MemTotal_bytes+%2A+100+%3C+10\u0026g0.tab=1",
"fingerprint": "07938f23a68be83e"
}],
"groupLabels": {
"alertname": "HostOutOfMemory"
},
"commonLabels": {
"alertname": "HostOutOfMemory",
"instance": "*.120.*.10:9109",
"job": "nodes",
"severity": "1",
"team": "node"
},
"commonAnnotations": {
"description": "Node memory is filling up (\u003c 10% left)\n VALUE = 6.943662692524177\n LABELS = map[instance:*.120.*.10:9109 job:nodes]",
"summary": "Host out of memory (instance *.120.*.10:9109)"
},
"externalURL": "http://test:9095",
"version": "4",
"groupKey": "{}/{team=\"node\"}:{alertname=\"HostOutOfMemory\"}",
"truncatedAlerts": 0
}
**4. alert.php 本脚本是发送钉钉和短信的 **
<?php
//-----配置项-----
$needMsg = 0;//需要短信吗 1=需要;0=不需要
$phones = [
'13****161'
];
$dingUrl = 'https://oapi.dingtalk.com/robot/send?access_token=*****';
$logUrl = './alertmanagerlog.txt';
//------配置项结束----
$input = trim(file_get_contents('php://input'));
$log = date('Y-m-d H:i:s').' 监听到: '.$input.PHP_EOL;
file_put_contents($logUrl, $log, FILE_APPEND);
$alertContent = json_decode($input, true);
if (!isset($alertContent['alerts'])) {
echo "error";exit;
}
$alerts = $alertContent['alerts'];
foreach ($alerts as $k=>$val) {
$data[$k] = [
'alertname' => '告警项:'.$val['labels']['alertname'], //告警项
'instance' => '实例名称:'. $val['labels']['instance'], //实例名称
'severity' => '告警级别:'. $val['labels']['severity'], //告警级别
'summary' => '摘要:'.$val['annotations']['summary'],
'description' => '描述:'.$val['annotations']['description'] ?? "",
'startsAt' => '开始时间:'. $val['startsAt'],
];
}
if (!empty($data)) {
foreach ($data as $k=>$alert) {
$message = implode($alert, PHP_EOL);
if ($needMsg == 1) {
sendMsg($message, $phones);
}
sendDing($message, $dingUrl);
}
}
function sendMsg($message, $phones)
{
$url = 'http://**/sendsms&';
foreach ($phones as $k) {
$curlUrl = $url.http_build_query(['phone'=>$k, 'msg'=>'当前时间为:' . date('Y-m-d H:i:s') . PHP_EOL.$message]);
//echo $curlUrl;exit;
request_by_curl($curlUrl);
}
return true;
}
function sendDing($msgTip, $url)
{
$message = '异常报警'. PHP_EOL;
$message .= '当前时间为:' . date('Y-m-d H:i:s') . PHP_EOL;
$message .= $msgTip . PHP_EOL;
$msg = [];
$msg['msgtype'] = "text";
$msg['text'] = ['content' => $message];
$msg['at'] = [];
$msg['at']['isAtAll'] = false;
$rst = request_by_curl($url, json_encode($msg), null, null, null, true);
return $rst;
}
function request_by_curl($remote_server, $post_string=null)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $remote_server);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: application/json;charset=utf-8'));
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
$data = curl_exec($ch);
//print_R($data);exit;
curl_close($ch);
return $data;
}
四、 安装grafana
默认端口号 3000
不同的系统选择不用的方法,以下为debian的安装方法
sudo apt-get install -y adduser libfontconfig1
wget https://dl.grafana.com/enterprise/release/grafana-enterprise_9.0.4_amd64.deb
sudo dpkg -i grafana-enterprise_9.0.4_amd64.deb
# 配置mail smtp,邀请用户会用到
vim /etc/grafana/grafana.ini
systemctl daemon-reload
systemctl start grafana-server
systemctl status grafana-server
systemctl enable grafana-server
# 可以不开通tcp端口号通过nginx转发
1. /etc/grafana/grafana.ini 修改内容,找到smtp
[smtp]
enabled = true
host = smtp.exmail.qq.com:465 #自己的
user = develop@company.com #自己的
# If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;"""
password = ******
;cert_file =
;key_file =
;skip_verify = false
from_address = develop@company.com
from_name = Grafana
安装完成grafana后,打开界面重新修改密码。可以邀请用户,赋予不同的角色。
- import导入dashboard。nginx的ID12708 ; node的dashboard id推荐 8919。
- 导入数据源 添加sourcedata,指到端口号就行 http://127.0.0.1:9090 (prometheus的地址)
为了安全,不开放grafana端口的话,可以用nginx做转发
vim nginx/vhost/grafana.conf
upstream mygrafana {
server 127.0.0.1:3000;
}
server {
server_name grafana.test.com;
server_tokens off;
access_log /home/logs/grafana_access.log;
error_log /home/logs/grafana_error.log;
location / {
client_max_body_size 0;
gzip off;
proxy_read_timeout 300;
proxy_connect_timeout 300;
proxy_redirect off;
proxy_http_version 1.1;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-Ssl on;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://mygrafana;
}
}
五、 安装nginx-prometheus-exporter
# 查看nginx的这个模块是否存在
nginx -V
http部分 server里配置,改完记得重启nginx服务
server{
location /metrics {
stub_status on;
}
}
配置成功后的样子
curl localhost:/metrics
安装nginx-prometheus-exporter
# 下载包
wget https://github.com/nginxinc/nginx-prometheus-exporter/releases/download/v0.7.0/nginx-prometheus-exporter-0.7.0-linux-amd64.tar.gz
tar -xf nginx-prometheus-exporter-0.7.0-linux-amd64.tar.gz
mv nginx-prometheus-exporter /usr/local/bin
useradd -r nginx_exporter
# Create Systemd Service File
vim /etc/systemd/system/nginx_prometheus_exporter.service
systemctl daemon-reload
service nginx_prometheus_exporter status
service nginx_prometheus_exporter start
1./etc/systemd/system/nginx_prometheus_exporter.service需要添加
web.listen-address 可以根据自己的情况加ip
[Unit]
Description=NGINX Prometheus Exporter
After=network.target
[Service]
Type=simple
User=nginx_exporter
Group=nginx_exporter
ExecStart=/usr/local/bin/nginx-prometheus-exporter \
-web.listen-address=:9113 \
-nginx.scrape-uri http://127.0.0.1/metrics
SyslogIdentifier=nginx_prometheus_exporter
Restart=always
[Install]
WantedBy=multi-user.target
成功后的样子
最后。记得在prometheus配置文件中加上nginx相关的监控
- job_name: 'nginx'
static_configs:
- targets: ['0.0.0.0:9113']
六、 安装influxdb
wget https://dl.influxdata.com/influxdb/releases/influxdb_1.8.3_amd64.deb
sudo dpkg -i influxdb_1.8.3_amd64.deb
# 如需修改数据存储目录,三个地方
vim /etc/influxdb/influxdb.conf
service influxdb start
#如果启动失败,很多可能是数据目录权限不正确
使用
# 输入influx进入数据库
influx
# 创建数据库 给prometheus使用
create database prometheus
# 查看有哪些数据库
show databases
验证
# 切换到prometheus数据库,执行show measurements
use prometheus
# 相当于看表
show measurements
# sql查看表里边内容
select * from nginx_http_requests_total limit 10
#
show tag keys from nginx_http_requests_total