nagios 监控内存和CPU,磁盘等使用情况

一、

i.被监控机上监控内存使用情况,在被监控机上操作
cd /usr/local/nagios/libexec
vi check_mem.sh

#!/bin/bash

if [ "$1" = "-w" ] && [ "$2" -gt "0" ] && [ "$3" = "-c" ] && [ "$4" -gt "0" ]; then

        memTotal_b=`free -b |grep Mem |awk '{print $2}'`
        memFree_b=`free -b |grep Mem |awk '{print $4}'`
        memBuffer_b=`free -b |grep Mem |awk '{print $6}'`
        memCache_b=`free -b |grep Mem |awk '{print $7}'`

        memTotal_m=`free -m |grep Mem |awk '{print $2}'`
        memFree_m=`free -m |grep Mem |awk '{print $4}'`
        memBuffer_m=`free -m |grep Mem |awk '{print $6}'`
        memCache_m=`free -m |grep Mem |awk '{print $7}'`

        memUsed_b=$(($memTotal_b-$memFree_b-$memBuffer_b-$memCache_b))
        memUsed_m=$(($memTotal_m-$memFree_m-$memBuffer_m-$memCache_m))

        memUsedPrc=$((($memUsed_b*100)/$memTotal_b))


        if [ "$memUsedPrc" -ge "$4" ]; then
                echo "Memory: CRITICAL Total: $memTotal_m MB - Used: $memUsed_m MB - $memUsedPrc% used!|TOTAL=$memTotal_b;;;; USED=$memUsed_b;;;; CACHE=$memCache_b;;;; BUFFER=$memBuffer_b;;;;"
                $(exit 2)
        elif [ "$memUsedPrc" -ge "$2" ]; then
                echo "Memory: WARNING Total: $memTotal_m MB - Used: $memUsed_m MB - $memUsedPrc% used!|TOTAL=$memTotal_b;;;; USED=$memUsed_b;;;; CACHE=$memCache_b;;;; BUFFER=$memBuffer_b;;;;"
                $(exit 1)
        else
                echo "Memory: OK Total: $memTotal_m MB - Used: $memUsed_m MB - $memUsedPrc% used|TOTAL=$memTotal_b;;;; USED=$memUsed_b;;;; CACHE=$memCache_b;;;; BUFFER=$memBuffer_b;;;;"
                $(exit 0)
        fi

else
        echo "check_mem v1.1"
        echo ""
        echo "Usage:"
        echo "check_mem.sh -w <warnlevel> -c <critlevel>"
        echo ""
        echo "warnlevel and critlevel is percentage value without %"
        echo ""
        echo "Copyright (C) 2012 Lukasz Gogolin (lukasz.gogolin@gmail.com)"
        exit
fi

ii.chmod +x check_mem.sh
测试check_mem脚本是否能正常使用
./check_mem.sh -w 80 -c 90

iii.修改nrpe.cfg
vi /usr/local/nagios/etc/nrpe.cfg
增加一行:command[check_mem]=/usr/lib64/nagios/plugins/check_mem.sh -w 80 -c 90

iiii.重启电脑

iv.执行命令启动nrpe:/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

二、在监控机上添加commands.cfg配置
vi /etc/nagios/objects/commands.cfg

define command{
        command_name        check_mem
        command_line        $USER1$/check_mem.sh -w $ARG1$ -c $ARG2$
        }

在被监控机配置文件里面添加vi /usr/local/nagios/etc/objects/linux37.cfg

define service{
        use                     generic-service
        host_name               Nagios_Centos_Client1
        service_description     Memery Monitoring
        check_command           check_nrpe!check_mem
        }
重启nagios即可systemctl restart nagios

三、检查CPU,步骤同上,

 把check_cpu.sh 文件放在libexec目录下。

#!/bin/sh
# Filename: check_cpu.sh

procinfo=`which procinfo 2>/dev/null`
sar=`which sar 2>/dev/null`
function help {
 echo -e "\n\tThis plugin shows the % of used CPU, using either procinfo or sar (whichever is available)\n\n\t$0:\n\t\t-c <integer>\tIf the % of used CPU is above <integer>, returns CRITICAL state\n\t\t-w <integer>\tIf the % of used CPU is below CRITICAL and above <integer>, returns WARNING state\n"
 exit -1
}
# Getting parameters:
while getopts "w:c:h" OPT; do
 case $OPT in
  "w") warning=$OPTARG;;
  "c") critical=$OPTARG;;
  "h") help;;
 esac
done
# Checking parameters:
( [ "$warning" == "" ] || [ "$critical" == "" ] ) && echo "ERROR: You must specify warning and critical levels" && help
[[ "$warning" -ge  "$critical" ]] && echo "ERROR: critical level must be highter than warning level" && help
# Assuring that the needed tools exist:
( ( [ -f $procinfo ] && command="procinfo") ||  [ -f $sar ] ) || \
 ( echo "ERROR: You must have either procinfo or sar installer in order to run this plugin" && exit -1 )
# Doing the actual check:

idle=`top -b -n 1 | grep Cpu |awk '{print $5}' |cut -f 1 -d "%"`
used=10
# Comparing the result and setting the correct level:
if [[ $used -ge $critical ]]; then
        msg="CRITICAL"
        status=2
else if [[ $used -ge $warning ]]; then
        msg="WARNING"
        status=1
     else
        msg="OK"
        status=0
     fi
fi
# Printing the results:
echo "$msg - CPU used=$used% idle=$idle% | 'CPU Usage'=$used%;$warning;$critical;"
# Bye!
exit $status

四、Nagios服务端check_nt 检查命令介绍,windows系统的

# 检查本次系统启动总时间
check_nt -H 192.168.1.121 -p 12489 -s 12345 -v UPTIME

# 检查内存占用情况
check_nt -H 192.168.1.121 -p 12489 -s 12345 -v MEMUSE -w 80 -c 90

# 检查客户端版本信息
check_nt -H 192.168.1.121 -p 12489 -s 12345 -v CLIENTVERSION

# 检查5分钟内CPU占用情况
check_nt -H 192.168.1.121 -p 12489 -s 12345 -v CPULOAD -w 80 -c 90 -l 5,80,90

# 检查磁盘C占用情况
check_nt -H 192.168.1.121 -p 12489 -s 12345 -v USEDDISKSPACE -d SHOWALL -l C

# 检查服务状态
check_nt -H 192.168.1.121 -p 12489 -s 12345 -v SERVICESTATE -l Spooler -d SHOWALL

# 检查进程状态
check_nt -H 192.168.1.121 -p 12489 -s 12345 -v PROCSTATE -l spark.exe -d SHOWALL

# 查看所有进程列表
check_nt -H 192.168.1.121 -p 12489 -s 12345 -v INSTANCES -l process

五、一个完整的windows配置文件

[root@nagios objects]# cd winserver
[root@nagios winserver]# vi winhost_172.cfg
#定义监控主机,设置主机名(不可重复)、别名、IP地址。
define host{
        use             windows-server
        host_name       winhost_172
        alias           ywzhou_pc
        address         10.188.1.172
        }
#定义主机组,在winserver文件下只需要一个文件定义了主机组,其他文件就不要再定义了。
define hostgroup{
        hostgroup_name  windows-servers
        alias           Windows Servers
        }
#第一部分:定义基于check_nt命令的监控服务。
#一个配置文件中的service_description不能重复
#监控NSClient++客户端软件版本
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     NSClient++ Version
        check_command           check_nt!CLIENTVERSION
        }
#监控在线时长
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     Uptime
        check_command           check_nt!UPTIME
        }
#监控CPU负载,超过80%报警,超过90%严重
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     CPU Load
        check_command           check_nt!CPULOAD!-l 5,80,90
        }
#监控内存使用情况,超过80%报警,超过90%严重
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     Memory Usage
        check_command           check_nt!MEMUSE!-w 80 -c 90
        }
#监控C盘使用情况,可以复制该组服务来监控其他磁盘
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     C:\ Drive Space
        check_command           check_nt!USEDDISKSPACE!-l c -w 80 -c 90
        }
#监控系统服务状态(是否启动),默认的W3SVC是IIS的服务,可以复制该组服务来监控其他系统服务
#可以在任务管理器中的服务项查看哪些服务器比较重要就监控起来,比如IIS、SQLServer等。
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     W3SVC
        check_command           check_nt!SERVICESTATE!-d SHOWALL -l W3SVC
        }
#监控程序状态(是否运行),默认的Explorer.exe是桌面进程的程序,可以复制该组服务来监控其他系统服务
#可以在任务管理器中的进程项查看哪些服务器比较重要就监控起来。
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     Explorer
        check_command           check_nt!PROCSTATE!-d SHOWALL -l Explorer.exe
        }

#第二部分:定义基于check plugins的监控服务。
#安装NSCP时启用了Enable common check plugins功能
#check plugins是位于/usr/local/nagios/libexec下的Nagios自带的监控插件
#监控ftp服务
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     FTP
        check_command           check_ftp
        }
#监控http服务
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     HTTP
        check_command           check_http
        }
#监控ssh服务
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     SSH
        check_command           check_ssh
        }
#监控dhcp服务
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     DHCP
        check_command           check_dhcp
        }
#监控pop3服务
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     POP
        check_command           check_pop
        }
#监控imap服务
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     IMAP
        check_command           check_imap
        }
#监控smtp服务
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     SMTP
        check_command           check_smtp
        }
#监控tcp端口,常用于监控多个网站使用不同端口时,监控端口状态
define service{
        use                     generic-service
        host_name               winhost_172
        service_description     TCP
        check_command           check_tcp!80
        }

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值