ganglia与nagios组合使用

1.复制check_ganglia.py到/usr/lib64/nagios/plugins

check_ganglia.py(自行修改的,官方的有BUG)


#!/usr/bin/env python

import sys
import getopt
import socket
import xml.parsers.expat

class GParser:
  def __init__(self, host, metric):
    self.inhost =0
    self.inmetric = 0
    self.value = None
    self.host = host
    self.metric = metric

  def parse(self, file):
    p = xml.parsers.expat.ParserCreate()
    p.StartElementHandler = parser.start_element
    p.Parse(file)
    if self.value == None:
      raise Exception('Host/value not found')
    return float(self.value)


  def start_element(self, name, attrs):
    if name == "HOST":
      if attrs["NAME"]==self.host:
        self.inhost=1
    elif self.inhost==1 and name == "METRIC":
      if attrs["NAME"]==self.metric:
        self.value=attrs["VAL"]


def usage():
  print """Usage: check_ganglia \
-h|--host= -m|--metric= -w|--warning= \
-c|--critical= [-s|--server=] [-p|--port=] """
  sys.exit(3)

if __name__ == "__main__":
##############################################################
  ganglia_host = '127.0.0.1'
  ganglia_port = 8649
  host = None
  metric = None
  warning = None
  critical = None
  opposite = 0

  try:
    options, args = getopt.getopt(sys.argv[1:],
      "h:m:w:c:s:p:",
      ["host=", "metric=", "warning=", "critical=", "server=", "port="],
      )
  except getopt.GetoptError, err:
    print "check_gmond:", str(err)
    usage()
    sys.exit(3)

  for o, a in options:
    if o in ("-h", "--host"):
       host = a
    elif o in ("-m", "--metric"):
       metric = a
    elif o in ("-w", "--warning"):
       warning = float(a)
    elif o in ("-c", "--critical"):
       critical = float(a)
    elif o in ("-p", "--port"):
       ganglia_port = int(a)
    elif o in ("-s", "--server"):
       ganglia_host = a

  if critical == None or warning == None or metric == None or host == None:
    usage()
    sys.exit(3)

  try:
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect((ganglia_host,ganglia_port))
    parser = GParser(host, metric)
    makefile = s.makefile("r")
    linea = ""
    for line in makefile.readlines():
      line = line.replace("\n"," ")
      linea += line
    value = parser.parse(linea)
    s.close()
  except Exception, err:
    print "CHECKGANGLIA UNKNOWN: Error while getting value \"%s\"" % (err)
    sys.exit(3)

  if critical > warning:
    if value >= critical:
      print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value)
      sys.exit(2)
    elif value >= warning:
      print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value)
      sys.exit(1)
    else:
      print "CHECKGANGLIA OK: %s is %.2f" % (metric, value)
      sys.exit(0)
  else:
      if critical >=value:
        print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value)
        sys.exit(2)
      elif warning >=value:
        print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value)
        sys.exit(1)
      else:
        print "CHECKGANGLIA OK: %s is %.2f" % (metric, value)
        sys.exit(0)

2.创建/etc/nagios/objects/ganglia-services.cfg

define host {
    use linux-server        
    host_name 1.1.1.1   # 名字随便起,监控的是1上的flume,就写1的ip
    address 1.1.1.1 # 名字随便起,监控的是1上的flume,就写1的ip
}

define hostgroup {
    hostgroup_name ganglia-servers
    alias   nagios server
    members *
}

define servicegroup { 
    servicegroup_name ganglia-metrics 
    alias Ganglia Metrics
}

define command {
    command_name check_ganglia
    command_line /usr/lib64/nagios/plugins/check_ganglia.py -h mg -m $ARG1$ -w $ARG2$ -c $ARG3$ # -h 这个需要在命令行上执行脚本看用ip还是主机名合适
}

define service { 
    use generic-service 
    name ganglia-service 
    hostgroup_name ganglia-servers
    service_groups ganglia-metrics 
    notifications_enabled 0
}

# 监控flume.CHANNEL.memoryChannel.EventPutSuccessCount,其他复制的改两个地方就行【service_description和check_command】
define service{
        max_check_attempts      5       ;
        normal_check_interval   3       ;
        retry_check_interval    2       ;
        check_period            24x7    ;
        notification_interval   60      ;
        notification_period     24x7    ;
        notification_options    w,u,c,r ;
        contact_groups          admins  ;
        use                             ganglia-service
        service_description             FLUME发送event数量 # 网页上显示用的
        check_command                   check_ganglia!flume.CHANNEL.memoryChannel.EventPutSuccessCount!10!50 # 直接从ganglia标题上复制就行
} 

3.修改contacts.cfg

vi /etc/nagios/objects/contacts.cfg

define contact{
    contact_name                    nagiosadmin     ; Short name of user
    use             generic-contact     ; Inherit default values from generic-contact template (defined above)
    alias                           Nagios Admin        ; Full name of user
    service_notification_period     workhours                    ;
    host_notification_period        workhours                    ;
    service_notification_options    w,u,c,r                 ;
    host_notification_options       d,u,r                   ;
    service_notification_commands   notify-service-by-email        ;
    host_notification_commands      notify-host-by-email     ;
    email                          12345@qq.com; 【复制以后只改接收邮箱地址就行】
}

define contactgroup{
    contactgroup_name       admins
    alias                   bfire
    members                 nagiosadmin
}

4.修改nagios.cfg

vi /etc/nagios/nagios.cfg

加入cfg_file=/etc/nagios/objects/ganglia-services.cfg

5.重启nagios和apache

service nagios restart
service httpd restart

6.网页设置(http://ip/ganglia

这里写图片描述

这里写图片描述

7.查看nagios日志

more /var/log/nagios/nagios.log
这里写图片描述
SERVICE NOTIFICATION代表邮件发送成功。
这里写图片描述

8.邮件配置

yum remove sendmail
service postfix restart
## 发送测试邮件
echo "how are you today" | mail -s "test" 12345@qq.com

其他相关文章:

1. ganglia安装和配置

2. nagios安装和配置

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值