【 PushGateway 】采集Nginx stub_status指标

⭐️欢迎关注⭐️

一、Nginx开启状态页

Nginx 具体指标含义这里不再赘述,详情参考 :Nginx配置主动健康检查 - https://www.cnblogs.com/szz1113/p/11772055.html

server {
    listen 80;
    server_name status.pyenv.cc;
    location /upstream-status {
        stub_status on;
        access_log off;
        allow 127.0.0.1;
        deny all;
    }
}
  • http://status.pyenv.cc/upstream-status

image.png

  • http://status.pyenv.cc/upstream-status?format=json

image.png

二、将状态信息推送至PushGateway

# cat openresty_status_monitoring.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

try:
    import sys
    import os
    import json
    import time
    import requests
    import subprocess
    import prometheus_client
    from prometheus_client import Counter, Gauge, push_to_gateway
    from prometheus_client.core import CollectorRegistry
    REGISTRY = CollectorRegistry(auto_describe=False)
except ImportError:
    print("Import Error")
    exit()

class Component_monitor:
    """
    // http://FQDN/status?format=json
    {"servers": {
      "total": 2,
      "generation": 2,
      "server": [
        {"index": 0, "upstream": "wt_teams_dev", "name": "10.6.1.211:31231", "status": "down", "rise": 0, "fall": 1860, "type": "http", "port": 0},
        {"index": 1, "upstream": "wt_teams_dev", "name": "10.6.1.211:31232", "status": "up", "rise": 0, "fall": 1860, "type": "http", "port": 0},
        {"index": 2, "upstream": "wt_rd_nodes", "name": "10.6.1.211:31230", "status": "up", "rise": 4053, "fall": 0, "type": "http", "port": 0}
      ]
    }}
    """

    def __init__(self, gateway_ip, gateway_port, status_url, env):
        self.gateway_ip = gateway_ip
        self.gateway_port = gateway_port
        self.gateway_url = self.gateway_ip + ':' + self.gateway_port
        self.env = env
        self.status_url = status_url

    def post_pushgateway(self, data_type, data,):
        registry = CollectorRegistry()
        # total = Upstream node
        if data_type == "total":
            try:
                g = Gauge('openresty_total', 'Openresty Upstream Total', ['indicators'], registry=registry)
                g.labels('total').set(data)
                push_to_gateway(self.gateway_url, job=self.env + '_openresty_total', registry=registry)
                return "【 Success 】- total is ok"
            except Exception as e:
                return "【 Error 】- total - {}".format(e)

        elif data_type == "upstream_node_sum":
            try:
                g = Gauge('openresty_upstream_node_sum', 'Openresty Upstream Sum', ['indicators', 'upstream_name',], registry=registry)
                for k,v in data.items():
                    g.labels('upstream_node_sum', k).set(v)
                push_to_gateway(self.gateway_url, job=self.env + '_upstream_node_sum', registry=registry)
                return "【 Success 】- upstream_node_sum is ok"
            except Exception as e:
                return "【 Error 】- upstream_node_sum - {}".format(e)
        elif data_type == "upstream_node_status":
            try:
                g = Gauge('openresty_upstream_node_status', 'Openresty Upstream Status', ['indicators', 'upstream_name', 'name'], registry=registry)
                for sv in data:
                    if sv.get('status') == 'up':
                        sv['status'] = 0
                    else:
                        sv['status'] = 1
                    # print(sv.get('upstream'), sv.get('name'), sv.get('status'))
                # for k,v in data.items():
                    g.labels('upstream_node_status', sv.get('upstream'), sv.get('name')).set(sv.get('status'))
                push_to_gateway(self.gateway_url, job=self.env + '_upstream_node_status', registry=registry)
                return "【 Success 】- upstream_node_status is ok"
            except Exception as e:
                return "【 Error 】- upstream_node_status - {}".format(e)
        elif data_type == "upstream_node_rise":
            try:
                g = Gauge('openresty_upstream_node_rise', 'Openresty Upstream Rise', ['indicators', 'upstream_name', 'name'], registry=registry)
                for sv in data:
                    # print(sv.get('upstream'), sv.get('name'), sv.get('status'))
                # for k,v in data.items():
                    g.labels('upstream_node_rise', sv.get('upstream'), sv.get('name')).set(sv.get('rise'))
                push_to_gateway(self.gateway_url, job=self.env + '_upstream_node_rise', registry=registry)
                return "【 Success 】- upstream_node_rise is ok"
            except Exception as e:
                return "【 Error 】- upstream_node_rise - {}".format(e)
        elif data_type == "upstream_node_fall":
            try:
                g = Gauge('openresty_upstream_node_fall', 'Openresty Upstream Fall', ['indicators', 'upstream_name', 'name'], registry=registry)
                for sv in data:
                    g.labels('upstream_node_fall', sv.get('upstream'), sv.get('name')).set(sv.get('fall'))
                push_to_gateway(self.gateway_url, job=self.env + '_upstream_node_fall', registry=registry)
                return "【 Success 】- upstream_node_fall is ok"
            except Exception as e:
                return "【 Error 】- upstream_node_fall - {}".format(e)
    def http_status_response(self):
        with requests.get(url=self.status_url) as re:
            result = re.json()
            response = {
                "date": time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime()),
                "total": {},
                "node_sum": {},
                "node_status": {},
                "node_rise": {},
                "node_fall": {},
            }
            # total
            total = result['servers']['total']
            response['total'] = self.post_pushgateway(data_type='total', data=total)

            # upstream_node_sum
            server = result['servers']['server']
            upstream = []
            for sv in server:
                upstream.append(sv['upstream'])
            num = {}
            # {'wt-ios': 2, 'wt_teams_dev': 2, 'wt_teams_api': 1, 'wt_teams_lib': 1, 'wt_rd_nodes': 1}
            for i in upstream:
                if upstream.count(i) > 1:
                    num[i] = upstream.count(i)
                else:
                    num[i] = upstream.count(i)
            response['node_sum'] = self.post_pushgateway(data_type='upstream_node_sum', data=num)

            # upstream_node_status
            response['node_status'] = self.post_pushgateway(data_type='upstream_node_status', data=server)
            # upstream_node_rise
            response['node_rise'] = self.post_pushgateway(data_type='upstream_node_rise', data=server)
            # upstream_node_fall
            response['node_fall'] = self.post_pushgateway(data_type='upstream_node_fall', data=server)
        return response

if __name__ == '__main__':
    status_url = "http://status.pyenv.cc/upstream-status?format=json"
    # Pushgateway Service Address
    gateway_ip = "127.0.0.1"
    # Pushgateway Service NodePort Port
    gateway_port = "9091"
    # 环境标识
    environment = "alpha"
    Com_monitor = Component_monitor(gateway_ip=gateway_ip, gateway_port=gateway_port, status_url=status_url, env=environment)
    res = Com_monitor.http_status_response()
    print(res)
# 执行脚本测试
root@yc-dev-k8s-control-plane-01-ningxia:~/xxx_scripts/python_scripts/pushgateway_scripts# python3 openresty_status_monitoring.py
{'date': '2022-01-20-02_50_31', 'total': '【 Success 】- total is ok', 'node_sum': '【 Success 】- upstream_node_sum is ok', 'node_status': '【 Success 】- upstream_node_status is ok', 'node_rise': '【 Success 】- upstream_node_rise is ok', 'node_fall': '【 Success 】- upstream_node_fall is ok'}
  • pushgateway job

image.png

  • prometheus graph

image.png

image.png

image.png

image.png

image.png

三、Grafana Template

模版过于简单,这里不再阐述;

image.png

四、Prometheus Rules

# cat prometheus-rules.yaml
  - name: kubernetes-absent
    rules:
    # openresty is down
    # alpha
    - alert: OpenrestyNodeDownOther01
      annotations:
        message: Alpha Node group {{ $labels.exported_job }}/{{ $labels.upstream_name }} ({{ $labels.name
          }}) is down;
      expr: |
        rate(alpha_openresty_upstream_node_status[5m]) 60 * 5 > 0
      for: 1m
      labels:
        severity: critical

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
一、prometheus简介 Prometheus是一个开源的系统监控和告警系统,现在已经加入到CNCF基金会,成为继k8s之后第二个在CNCF维护管理的项目,在kubernetes容器管理系统中,通常会搭配prometheus进行监控,prometheus支持多种exporter采集数据,还支持通过pushgateway进行数据上报,Prometheus再性能上可支撑上万台规模的集群。 二、prometheus架构图 三、prometheus组件介绍 1.Prometheus Server: 用于收集和存储时间序列数据。 2.Client Library: 客户端库,检测应用程序代码,当Prometheus抓取实例的HTTP端点时,客户端库会将所有跟踪的metrics指标的当前状态发送到prometheus server端。 3.Exporters: prometheus支持多种exporter,通过exporter可以采集metrics数据,然后发送到prometheus server端 4.Alertmanager: 从 Prometheus server 端接收到 alerts 后,会进行去重,分组,并路由到相应的接收方,发出报警,常见的接收方式有:电子邮件,微信,钉钉, slack等。 5.Grafana:监控仪表盘 6.pushgateway: 各个目标主机可上报数据到pushgatewy,然后prometheus server统一从pushgateway拉取数据。 四、课程亮点 五、效果图展示 六、讲师简介 先超(lucky):高级运维工程师、资深DevOps工程师,在互联网上市公司拥有多年一线运维经验,主导过亿级pv项目的架构设计和运维工作 主要研究方向: 1.云计算方向:容器 (kubernetes、docker),虚拟化(kvm、Vmware vSphere),微服务(istio),PaaS(openshift),IaaS(openstack)等2.系统/运维方向:linux系统下的常用组件(nginx,tomcat,elasticsearch,zookeeper,kafka等),DevOps(Jenkins+gitlab+sonarqube+nexus+k8s),CI/CD,监控(zabbix、prometheus、falcon)等 七、课程大纲

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值