使用prometheus_client监控程序

promethues提供了一些常用的exporter,但是在进程监控上,我们需要一些更加详细的监控,程序的存活,内存,CPU,句柄数,线程数等等。

下面我有一个示例,如何在机器上监控指定进程,并且推送到promethues。

 

#!/usr/bin/python
# -*- coding:utf-8 -*-

import glob
import logging
import os
import psutil
import requests
import socket
import struct
import time

import fcntl
from prometheus_client import Gauge, start_http_server

DataBase = {
    "user": "prometheus",
    "password": "AgTKZu4RkOZvqKZA",
    "project": "ht",
    "host": "127.0.0.1"
}
monitor = ['_cpu', '_memory', '_threads', '_fs', '_status', '_ctime']

logger = logging.getLogger("LOG FORMAT")
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
ch.setFormatter(formatter)
logger.addHandler(ch)


class GameServer(object):

    def __init__(self):
        pass

    @property
    def channel(self):
        """
        :return: 读取游戏服数量,方法是从/data/下读取游戏服目录[当前使用渠道区分版本,每个渠道下有自己独立的登录服,游戏服]
        """
        channel = glob.glob("/data/%s_*" % DataBase["project"])
        return channel

    @property
    def mysql(self):
        """
        :return: mysql数据库进程详细状态
        """
        pid_file = glob.glob("/data/mysql_data/data/*.pid")[0]
        with open(pid_file) as f:
            pid = f.read()
        p = psutil.Process(int(pid))
        if p.is_running():
            mem = p.memory_info().rss / 1024 / 1024
            return mem
        else:
            return 0

    @property
    def local_ip(self):
        """
        :return: 主机内网ip
        """
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        return socket.inet_ntoa(fcntl.ioctl(
            s.fileno(),
            0x8915,  # SIOCGIFADDR
            struct.pack('256s', 'eth0'[:15])
        )[20:24])

    @property
    def wan_ip(self):
        """
        :return: 腾讯云主机获取本机外网IP
        """
        try:
            r = requests.get('http://metadata.tencentyun.com/meta-data/public-ipv4', timeout=5)
            data = r.content
        except Exception as e:
            logging.exception(e)
            data = '0.0.0.0'
        return data


class Action:

    @staticmethod
    def step_1(channels):
        """
        :return: 读取渠道内的pid获取游戏的pid
        """
        temp_dict = dict()
        for channel in channels:
            os.chdir(channel)
            servers = glob.glob("*_*")

            for server in servers:
                if not os.path.isdir(server):
                    continue
                try:
                    pid_file = "%s/run/%s.pid" % (server, "".join(server.split("_")))
                    if not os.path.exists(pid_file):
                        temp_dict[server] = 0
                    else:
                        with open('%s/run/%s.pid' % (server, "".join(server.split("_"))), 'r') as f:
                            pid = f.readline().strip('\n')
                            temp_dict[server] = pid
                except Exception as e:
                    logging.exception(e)
                    temp_dict[server] = 0
        return temp_dict

    @staticmethod
    def step_2(m_data=None):
        """
        :return: 提取游戏服的重要指标作为监控项目
        """
        monitor_data = dict()
        for server, pid in m_data.iteritems():
            if int(pid) > 0:
                try:
                    d = dict()
                    p = psutil.Process(int(pid))
                    if p.is_running():
                        d['status'] = 1
                    d['cpu'] = p.cpu_percent(interval=0.1)
                    d['memory'] = p.memory_info().rss / 1024 / 1024
                    d['threads'] = p.num_threads()
                    d['fs'] = p.num_fds()
                    d['ctime'] = p.create_time()
                    monitor_data[server] = d
                except Exception as e:
                    logging.exception(e)
                    d = dict()
                    d['cpu'] = 0
                    d['memory'] = 0
                    d['threads'] = 0
                    d['status'] = 0
                    d['fs'] = 0
                    d['ctime'] = 0
                    monitor_data[server] = d
            else:
                d = dict()
                d['cpu'] = 0
                d['memory'] = 0
                d['threads'] = 0
                d['status'] = 0
                d['fs'] = 0
                d['ctime'] = 0
                monitor_data[server] = d
        return monitor_data

    @staticmethod
    def create_series(role=None):
        """
        :return: 创建 promethues gauge series 实例方法
        """
        role_list = []
        func = dict()
        for item in monitor:
            gague = Gauge(role.split("_")[0] + item, role + item, ['role', 'serverid', 'local_ip', 'wan_ip'])
            func[role + item] = gague
        return func


if __name__ == '__main__':

    gs = GameServer()
    wan_ip = gs.wan_ip
    local_ip = gs.local_ip
    channels = gs.channel
    start_http_server(8111)
    temp_roles_data = [x for x,y in Action.step_1(channels=channels).iteritems()]
    roles = list(set([x.split("_")[0] for x in temp_roles_data]))
    series_tmp = [ Action.create_series(x) for x in roles]
    series = dict()
    for d in series_tmp:
        series.update(d)
        
    while True:
        s1 = Action.step_1(channels=channels)
        s2 = Action.step_2(m_data=s1)
        try:
            for role, data in s2.iteritems():
                key_role, key_id = role.split("_")[0], role.split("_")[1]
                for m in monitor:
                    series.get(key_role + m).labels(role=key_role, serverid=key_id, local_ip=local_ip, wan_ip=wan_ip).set(data.get(m.strip("_")))
        time.sleep(60)   #60秒刷新一次数据
        except Exception as e:
            logging.exception(e) 

启动程序,使用ip+端口访问web,就可以得到你想要的数据了。

在promethues页面我们使用采集的数据检查下

可以看到,我们自己的exporter也采集到了数据

这样我们就能在granfana里进行展现了

好了,自定义的exporter就完成了,下次我们说说promethues如何和consul搭配来完成我们的自动发现

转载于:https://my.oschina.net/jastme/blog/1548721

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值