ceph 监控
环境
192.168.126.101 ceph01
192.168.126.102 ceph02
192.168.126.103 ceph03
192.168.126.104 ceph04
192.168.126.105 ceph-admin
192.168.48.11 ceph01
192.168.48.12 ceph02
192.168.48.13 ceph03
192.168.48.14 ceph04
192.168.48.15 ceph-admin
192.168.48.57 prometheus
###所有节点内核版本要求4.5以上
uname -r
5.2.2-1.el7.elrepo.x86_64
安装prometheus
官方下载地址
https://prometheus.io/download/
[root@prometheus ~]# ll -h
total 48M
-rw-r--r-- 1 root root 48M Jul 27 20:36 prometheus-2.11.1.linux-amd64.tar.gz
[root@prometheus ~]# tar xvf prometheus-2.11.1.linux-amd64.tar.gz
[root@prometheus ~]# mv prometheus-2.11.1.linux-amd64 prometheus
[root@prometheus ~]# ll -h
total 48M
drwxr-xr-x 4 3434 3434 132 Jul 10 23:33 prometheus
-rw-r--r-- 1 root root 48M Jul 27 20:36 prometheus-2.11.1.linux-amd64.tar.gz
[root@prometheus ~]# mv prometheus /usr/local/
创建prometheus用户
[root@prometheus prometheus]# useradd -r -d /var/lib/prometheus -m prometheus
[root@prometheus prometheus]# ls -ld /var/lib/prometheus/
drwx------ 2 prometheus prometheus 76 Jul 27 20:46 /var/lib/prometheus/
创建prometheus服务
[root@prometheus ~]# vim /usr/lib/systemd/system/prometheus.service
[Unit]
Description=The Prometheus 2 monitoring system and time series database.
Documentation=https://prometheus.io
After=network.target
[Service]
EnvironmentFile=-/etc/sysconfig/prometheus
User=prometheus
ExecStart=/usr/local/prometheus/prometheus \
--storage.tsdb.path=/var/lib/prometheus \
--config.file=/usr/local/prometheus/prometheus.yml \
--web.listen-address=0.0.0.0:9090 \
--web.external-url=
Restart=on-failure
StartLimitInterval=1
RestartSec=3
[Install]
WantedBy=multi-user.target
启动prometheus
[root@prometheus ~]# systemctl daemon-reload
[root@prometheus ~]# systemctl start prometheus.service
[root@prometheus ~]# ss -ntl
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 *:22 *:*
LISTEN 0 128 :::9090 :::*
LISTEN 0 128 :::22 :::*
[root@prometheus ~]#
所有节点安装node_exporter
ll -h
total 7.8M
-rw-r--r-- 1 root root 7.8M Jul 27 20:42 node_exporter-0.18.1.linux-amd64.tar.gz
tar xvf node_exporter-0.18.1.linux-amd64.tar.gz
mv node_exporter-0.18.1.linux-amd64 node_exporter
mv node_exporter /usr/local/
useradd -r -d /var/lib/prometheus -m prometheus
[root@ceph01 ~]# vim /usr/lib/systemd/system/node_exporter.service
[Unit]
Description=node_exporter
Documentation=https://prometheus.io/
After=network.target
[Service]
EnvironmentFile=-/etc/sysconfig/node_exporter
Type=simple
User=prometheus
ExecStart=/usr/local/node_exporter/node_exporter
Restart=on-failure
[Install]
WantedBy=multi-user.target
systemctl daemon-reload
systemctl start node_exporter.service
编辑prometheus配置文件
[root@prometheus ~]# vim /usr/local/prometheus/prometheus.yml
......
......
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'prometheus-node'
static_configs:
- targets: ['192.168.48.57:9100']
- job_name: 'ceph01-node'
static_configs:
- targets: ['192.168.48.11:9100']
- job_name: 'ceph02-node'
static_configs:
- targets: ['192.168.48.12:9100']
- job_name: 'ceph03-node'
static_configs:
- targets: ['192.168.48.13:9100']
- job_name: 'ceph04-node'
static_configs:
- targets: ['192.168.48.14:9100']
- job_name: 'ceph-admin-node'
static_configs:
- targets: ['192.168.48.15:9100']
[root@prometheus ~]# systemctl restart prometheus.service
监控ceph
ceph开启prometheus
[cephadm@ceph-admin ceph-cluster]$ ceph mgr module enable prometheus
mgr在ceph04上运行着
[cephadm@ceph-admin ceph-cluster]$ ceph -s
cluster:
id: 231d5528-bab4-49fa-9d68-d5382d2e9f6c
health: HEALTH_OK
services:
mon: 3 daemons, quorum ceph01,ceph02,ceph03 (age 5h)
mgr: ceph04(active, since 2m), standbys: ceph03
mds: cephfs:2 {0=ceph02=up:active,1=ceph01=up:active} 1 up:standby
osd: 8 osds: 8 up (since 5h), 8 in (since 5h)
rgw: 1 daemon active (ceph01)
data:
pools: 9 pools, 352 pgs
objects: 251 objects, 14 MiB
usage: 8.1 GiB used, 64 GiB / 72 GiB avail
pgs: 352 active+clean
[root@ceph04 ~]# ss -ntl | grep 9283
LISTEN 0 5 :::9283 :::*
prometheus编辑配置文件
[root@prometheus ~]# vim /usr/local/prometheus/prometheus.yml
.....
.....
- job_name: 'ceph04-mgr'
static_configs:
- targets: ['192.168.48.14:9283']
[root@prometheus ~]# systemctl restart prometheus.service
安装grafana
wget https://dl.grafana.com/oss/release/grafana-6.2.5-1.x86_64.rpm
yum localinstall grafana-6.2.5-1.x86_64.rpm
systemctl start grafana-server.service
访问grafana
输入账号密码 admin/admin
添加数据源
导入模板