一 监控原理描述

twemproxy默认监听端口22222,可以使用nc等工具直接访问,以JSON格式输出

$ nc 127.0.0.1 22222
{"service":"nutcracker", "source":"gintama-taiwan-redis2", "version":"0.4.0", "uptime":67135, "timestamp":1427100529, "total_connections":376, "curr_connections":7, "gintama": {"client_eof":0, "client_err":311, "client_connections":0, "server_ejects":58, "forward_error":53, "fragments":0, "192.168.100.68:6379": {"server_eof":1, "server_err":12, "server_timedout":0, "server_connections":1, "server_ejected_at":1427074216551863, "requests":553, "request_bytes":333854, "responses":541, "response_bytes":265633, "in_queue":0, "in_queue_bytes":0, "out_queue":0, "out_queue_bytes":0},"192.168.100.68:6380": {"server_eof":1, "server_err":9, "server_timedout":0, "server_connections":1, "server_ejected_at":1427074216552893, "requests":20, "request_bytes":1551, "responses":11, "response_bytes":207, "in_queue":0, "in_queue_bytes":0, "out_queue":0, "out_queue_bytes":0},"192.168.100.68:6381": {"server_eof":1, "server_err":1, "server_timedout":0, "server_connections":1, "server_ejected_at":1427037406770236, "requests":287, "request_bytes":10718, "responses":286, "response_bytes":23430, "in_queue":0, "in_queue_bytes":0, "out_queue":0, "out_queue_bytes":0},"192.168.100.69:6379": {"server_eof":1, "server_err":11, "server_timedout":0, "server_connections":1, "server_ejected_at":1427074216550647, "requests":55, "request_bytes":2633, "responses":44, "response_bytes":220, "in_queue":0, "in_queue_bytes":0, "out_queue":0, "out_queue_bytes":0},"192.168.100.69:6380": {"server_eof":1, "server_err":9, "server_timedout":0, "server_connections":1, "server_ejected_at":1427040015877803, "requests":291, "request_bytes":15563, "responses":282, "response_bytes":1402, "in_queue":0, "in_queue_bytes":0, "out_queue":0, "out_queue_bytes":0},"192.168.100.69:6381": {"server_eof":0, "server_err":11, "server_timedout":0, "server_connections":1, "server_ejected_at":1427059816411041, "requests":193, "request_bytes":17292, "responses":182, "response_bytes":1767, "in_queue":0, "in_queue_bytes":0, "out_queue":0, "out_queue_bytes":0}}}


可以使用工具整理一下输出

{
    "service": "nutcracker",
    "source": "gintama-taiwan-redis2",
    "version": "0.4.0",
    "uptime": 64944,
    "timestamp": 1427098338,
    "total_connections": 374,
    "curr_connections": 7,
    "gintama": {
        "client_eof": 0,
        "client_err": 309,
        "client_connections": 0,
        "server_ejects": 58,
        "forward_error": 53,
        "fragments": 0,
        "192.168.100.68:6379": {
            "server_eof": 1,
            "server_err": 12,
            "server_timedout": 0,
            "server_connections": 1,
            "server_ejected_at": 1427074216551863,
            "requests": 552,
            "request_bytes": 333808,
            "responses": 540,
            "response_bytes": 265628,
            "in_queue": 0,
            "in_queue_bytes": 0,
            "out_queue": 0,
            "out_queue_bytes": 0
        },
        "192.168.100.68:6380": {
            "server_eof": 1,
            "server_err": 9,
            "server_timedout": 0,
            "server_connections": 1,
            "server_ejected_at": 1427074216552893,
            "requests": 19,
            "request_bytes": 1498,
            "responses": 10,
            "response_bytes": 202,
            "in_queue": 0,
            "in_queue_bytes": 0,
            "out_queue": 0,
            "out_queue_bytes": 0
        },
        "192.168.100.68:6381": {
            "server_eof": 1,
            "server_err": 1,
            "server_timedout": 0,
            "server_connections": 1,
            "server_ejected_at": 1427037406770236,
            "requests": 286,
            "request_bytes": 10672,
            "responses": 285,
            "response_bytes": 23425,
            "in_queue": 0,
            "in_queue_bytes": 0,
            "out_queue": 0,
            "out_queue_bytes": 0
        },
        "192.168.100.69:6379": {
            "server_eof": 1,
            "server_err": 11,
            "server_timedout": 0,
            "server_connections": 1,
            "server_ejected_at": 1427074216550647,
            "requests": 54,
            "request_bytes": 2580,
            "responses": 43,
            "response_bytes": 215,
            "in_queue": 0,
            "in_queue_bytes": 0,
            "out_queue": 0,
            "out_queue_bytes": 0
        },
        "192.168.100.69:6380": {
            "server_eof": 1,
            "server_err": 9,
            "server_timedout": 0,
            "server_connections": 1,
            "server_ejected_at": 1427040015877803,
            "requests": 291,
            "request_bytes": 15563,
            "responses": 282,
            "response_bytes": 1402,
            "in_queue": 0,
            "in_queue_bytes": 0,
            "out_queue": 0,
            "out_queue_bytes": 0
        },
        "192.168.100.69:6381": {
            "server_eof": 0,
            "server_err": 11,
            "server_timedout": 0,
            "server_connections": 1,
            "server_ejected_at": 1427059816411041,
            "requests": 193,
            "request_bytes": 17292,
            "responses": 182,
            "response_bytes": 1767,
            "in_queue": 0,
            "in_queue_bytes": 0,
            "out_queue": 0,
            "out_queue_bytes": 0
        }
    }
}


由此可以知道zabbix需要监控service,total_connections几个规定字段。同时由于配置文件中各个pool是变更的,不固定的,各个pool下面的redis主机也是变更的,不固定的。所以,需要使用zabbix低级发现功能来发现由pool和redis主机构成的键值对。例如通过zabbix来发现

"{#REDIS_SERVER}":"gintamaXXXX192.168.100.69:6381"

这样的键值对。然后再根据键值对获取各个字段的值。


各个字段的含义可以使用nutcracker -D获取

pool stats:
  client_eof          "# eof on client connections"
  client_err          "# errors on client connections"
  client_connections  "# active client connections"
  server_ejects       "# times backend server was ejected"
  forward_error       "# times we encountered a forwarding error"
  fragments           "# fragments created from a multi-vector request"

server stats:
  server_eof          "# eof on server connections"
  server_err          "# errors on server connections"
  server_timedout     "# timeouts on server connections"
  server_connections  "# active server connections"
  server_ejected_at   "timestamp when server was ejected in usec since epoch"
  requests            "# requests"
  request_bytes       "total request bytes"
  responses           "# responses"
  response_bytes      "total response bytes"
  in_queue            "# requests in incoming queue"
  in_queue_bytes      "current request bytes in incoming queue"
  out_queue           "# requests in outgoing queue"
  out_queue_bytes     "current request bytes in outgoing queue"







二 编写twemproxy的pool和redis主机发现脚本


twemproxy_pools_discovery.py

#/usr/bin/python

import yaml
import json

config_file='/data/app_platform/twemproxy/conf/nutcracker.yml'

pools=[]

with open(config_file,'r') as f:
  data=yaml.load(f)
  for pool in  sorted(data.keys()):
      pools.append({'{#REDIS_POOL}':pool})

print json.dumps({'data':pools},indent=4,separators=(',',':'))



执行情况

$ python twemproxy_pools_discovery.py 
{
    "data":[
        {
            "{#REDIS_POOL}":"gintama"
        }
    ]
}



twemproxy_pools_servers_discovery.py 


#/usr/bin/python

import yaml
import json

config_file='/tmp/nutcracker.yml'

pools_servers=[]

with open(config_file,'r') as f:
  data=yaml.load(f)
  for pool in  sorted(data.keys()):
      server_lists=data[pool]['servers']
      for server in server_lists:
           lists=(pool + 'XXXX' + server).split(':')
           pools_servers.append({'{#REDIS_SERVERS}':(lists[0] + ':' + lists[1])})

print json.dumps({'data':pools_servers},indent=4,separators=(',',':'))



在编写脚本的过程中需要边编写边调试才能达到自己想要的效果



执行情况如下:


$ python twemproxy_pools_servers_discovery.py 
{
    "data":[
        {
            "{#REDIS_SERVER}":"gintamaXXXX192.168.100.68:6379"
        },
        {
            "{#REDIS_SERVER}":"gintamaXXXX192.168.100.68:6380"
        },
        {
            "{#REDIS_SERVER}":"gintamaXXXX192.168.100.68:6381"
        },
        {
            "{#REDIS_SERVER}":"gintamaXXXX192.168.100.69:6379"
        },
        {
            "{#REDIS_SERVER}":"gintamaXXXX192.168.100.69:6380"
        },
        {
            "{#REDIS_SERVER}":"gintamaXXXX192.168.100.69:6381"
        }
    ]
}



三 编写twemproxy状态信息获取脚本

twemproxy_status.py

这个脚本需要安装argparse模块

Python2.6使用pip install argparse安装

Python2.7,argparse模块已经设为默认模块


#! /usr/bin/env python
import socket
import json
import argparse

#{
#    "service": "nutcracker",
#    "source": "gintama-taiwan-redis1",
#    "version": "0.4.0",
#    "uptime": 136873,
#    "timestamp": 1427168759,
#    "total_connections": 489,
#    "curr_connections": 7,
#    "gintama": {
#        "client_eof": 0,
#        "client_err": 420,
#        "client_connections": 0,
#        "server_ejects": 62,
#        "forward_error": 57,
#        "fragments": 0,
#        "192.168.100.68:6379": {
#            "server_eof": 1,
#            "server_err": 10,
#            "server_timedout": 0,
#            "server_connections": 1,
#            "server_ejected_at": 1427074216548518,
#            "requests": 627,
#            "request_bytes": 329980,
#            "responses": 617,
#            "response_bytes": 280709,
#            "in_queue": 0,
#            "in_queue_bytes": 0,
#            "out_queue": 0,
#            "out_queue_bytes": 0
#        },
#
#

##"{#REDIS_SERVER}":"gintama@192.168.100.69:6381"
##"{#REDIS_POOL}":"gintama"




class NutcrackerServer(object):
    def __init__(self): 
        self.server = '127.0.0.1'
        self.port = '22222'

    def nutcracker_status(self):        
        conn = socket.create_connection((self.server, self.port))
        buf = True
        content = ''

        while buf:
            buf = conn.recv(4096)
            content += buf
        conn.close()
        self.data = json.loads(content)

    def nutcracker_info(self,base_metric):
        self.nutcracker_status()
        print self.data[base_metric]

    def nutcracker_pool_info(self,pool,pool_metric):
        self.nutcracker_status()
        print self.data[pool][pool_metric]

    def nutcracker_server_info(self,pool,redis_server,server_metric):
        self.nutcracker_status()
        print self.data[pool][redis_server][server_metric]


def parse_args():
    parser=argparse.ArgumentParser(description='Twemproxy monitoring tool with Zabbix!', argument_default=False)
    parser.add_argument('--metric',default='service',dest='metric',action='store',required=True,help='the twemproxy metric,such as uptime or version,etc')
    parser.add_argument("--poolname",default='gintama',dest='poolname',action='store',required=False,help='the twemproxy pool and server discoveried by zabbix')
    parser.add_argument("--poolserver",default='gintamaXXXX192.168.100.69:6380',dest='poolserver',action='store',required=False,help='the twemproxy pool and server discoveried by zabbix')
    args=parser.parse_args()
    return args

def main():
    args=parse_args()
    metric=''
    pool=''
    redis_server=''
    if args.metric:       
       metric=args.metric
    else:
       print "invalid metric" 
    if args.poolname:
       pool=args.poolname
    else:
       print "invalid pool name"

    if args.poolserver:
       pool=args.poolserver.split('XXXXX')[0]
       redis_server=args.poolserver.split('XXXX')[1]
    else:
       print "invalid pool server"

    twemproxy = NutcrackerServer()
    

    if not metric:    
       print "invalid metric"
    elif metric in ["service","source","version","uptime","timestamp","total_connections","curr_connections"]:
#       print "nutcracker base information"
       twemproxy.nutcracker_info(metric)
    elif pool!="" and metric in ["client_eof","client_err","client_connections","server_ejects","forward_error","fragments"]:
#       print "nutcracker" + " " + pool + " " + "information"
       twemproxy.nutcracker_pool_info(pool,metric)
    elif pool and redis_server and metric in ["server_eof","server_err","server_timedout","server_connections","server_ejected_at","requests","request_bytes","responses","response_bytes","in_queue","in_queue_bytes","out_queue","out_queue_bytes"]:
#       print "nutcracker" + " " + pool + " " + redis_server + " " + "information"
       twemproxy.nutcracker_server_info(pool,redis_server,metric)
    else:
       print "invalid metric" 



if __name__ == '__main__':
     main()


四 添加zabbix子配置文件

twemproxy_status.conf

### Option: UserParameter
#       User-defined parameter to monitor. There can be several user-defined parameters.
#       Format: UserParameter=<key>,<shell command>
#       See 'zabbix_agentd' directory for examples.
#
# Mandatory: no
# Default:
# UserParameter=
UserParameter=redis_pool.discovery,/usr/bin/python /usr/local/zabbix/bin/twemproxy_pools_discovery.py
UserParameter=redis_server.discovery,/usr/bin/python /usr/local/zabbix/bin/twemproxy_pools_servers_discovery.py
UserParameter=nutcracker_info[*],/usr/bin/python /usr/local/zabbix/bin/twemproxy_status.py --metric $1
UserParameter=nutcracker_pool_info[*],/usr/bin/python /usr/local/zabbix/bin/twemproxy_status.py --metric $1 --poolname $2
UserParameter=nutcracker_server_info[*],/usr/bin/python /usr/local/zabbix/bin/twemproxy_status.py --metric $1 --poolserve
r $2


五 添加zabbix模板

参见附件


参考文档:

http://www.bejson.com/

https://github.com/gfranxman/NutcrackerMonitor/blob/master/ballgazer.py