大家好,我是Linux运维工程师 Linke,技术过硬,从不挖坑。
通过上篇文档中记录的的测试方法,便有了如果只做一个prometheus 的自定义端口监控插件的思路。要监控不同的 server 的不同的端口,首先我要只做一个配置文件是不。既然 prometheus 的配置文件就是 yaml 格式的,那我为了贴切,也搞个 yaml 格式的配置文件。于是乎,有了下面的故事。
使用和配置报警说明写到了github:
https://github.com/OpYangLin/prometheus_monitor_port.git
配置文件
vim host_port_conf.yaml
# Prometheus monitor server port config.
zookeeper:
host:
- "192.168.7.51"
- "192.168.7.52"
- "192.168.7.53"
port:
- 2181
elasticsearch:
host:
- "192.168.7.41"
- "192.168.7.42"
- "192.168.7.43"
port:
- 9200
- 9300
监控服务器端口客户端启动
# Prometheus monitor server port.
# YangLin
# -*- coding:utf-8 -*-
import socket
import os
import yaml
import prometheus_client
from prometheus_client import Gauge
from prometheus_client.core import CollectorRegistry
from flask import Response, Flask
app = Flask(__name__)
def Getconfigdic():
"""
将 yaml 配置文件中数据格式化为字典
"""
proPath = os.path.dirname(os.path.realpath(__file__))
yamlPath = os.path.join(proPath, "host_port_conf.yaml")
f = open(yamlPath, "r", encoding="utf-8")
sdata = yaml.full_load(f)
f.close()
return sdata
def Exploreport(sertype,ip,port):
"""
检查端口是否存在
"""
try:
tel = socket.socket()
tel.connect((ip, int(port)))
socket.setdefaulttimeout(0.5)
result_dic = {"sertype": sertype, "host": ip, "port": str(port), "status": 1}
return result_dic
except:
result_dic = {"sertype": sertype, "host": ip, "port": str(port), "status": 0}
return result_dic
def Checkport():
"""
Getconfigdic()函数拿到的数据格式
sdic = {'zookeeper': {'host': ['192.168.7.51', '192.168.7.52', '192.168.7.53'], 'port': [2181, 22]},
'harbor': {'host': ['192.168.7.41', '192.168.7.42', '192.168.7.43'], 'port': [9200, 9301]}}
"""
sdic = Getconfigdic()
result_list = []
for sertype in sdic.keys():
iplist = sdic.get(sertype).get("host")
portlist = sdic.get(sertype).get("port")
for ip in iplist:
for port in portlist:
result_dic = Exploreport(sertype, ip, port)
result_list.append(result_dic)
return result_list
@app.route("/metrics")
def ApiResponse():
"""
Checkport() 取出来的数据是这样的
checkport = [{"sertype":"zookeeper","host": "192.168.1.22", "port": "2181", "status": 0},
{"sertype":"zookeeper","host": "192.168.1.23", "port": "2181", "status": 1}]
"""
checkport = Checkport()
# 定义metrics仓库,存放多条数据
REGISTRY = CollectorRegistry(auto_describe=False)
muxStatus = Gauge("server_port_up", "Api response stats is:", ["sertype","host", "port"], registry=REGISTRY)
for datas in checkport:
sertype = "".join(datas.get("sertype"))
host = "".join(datas.get("host"))
port = "".join(datas.get("port"))
status = datas.get("status")
muxStatus.labels(sertype,host, port).set(status)
return Response(prometheus_client.generate_latest(REGISTRY),
mimetype="text/plain")
if __name__ == "__main__":
app.run(host="0.0.0.0", port=31672, debug=True)
启动方式
nohup python3 export_moniotr_port.py &
报警配置
prometheus数据抓取配置:
scrape_configs:
- job_name: 'monitor-port'
scrape_interval: 10s
static_configs:
- targets:
- "ip:port"
prometheus监控报警配置:
- alert: Zookeeper 端口未探测到
expr: server_port_up{sertype=="zookeeper"} != 1
for: 3m
labels:
severity: "非常严重"
annotations:
summary: "{{$labels.host}}:{{$labels.port}} 端口未探测到"
description: "请到服务器查看"