nagios客户端部署脚本
把监控脚本写好上传到客户端上
#!/bin/bash
#把脚本定义为一个变量
check_a=‘check_cpu’
#把目录定义为变量
dir=’/usr/local/nagios/libexec’
dir2=’/usr/local/nagios/etc’
#把脚本拷贝到nagios目录下
cp /root/$check_a $dir/
#授权
chmod +x
d
i
r
/
dir/
dir/check_a
#定义告警次数
echo “command[
c
h
e
c
k
a
]
=
/
u
s
r
/
l
o
c
a
l
/
n
a
g
i
o
s
/
l
i
b
e
x
e
c
/
check_a]=/usr/local/nagios/libexec/
checka]=/usr/local/nagios/libexec/check_a -w 3 -c 4” >> $dir2/nrpe.cfg
#重启nrpe服务
/etc/init.d/nrpe restart
远程执行
pscp -h ip.txt /root/a.sh /root/
pssh -h ip.txt -i ‘sh /root/a.sh’
nagios服务端
#!/bin/bash
#设监控名称为变量
check_b=‘check_cpu’
#设置目录为变量
dir=’/usr/local/nagios/etc/objects’
#添加监控项
echo “define command{” >> $dir/commands.cfg
echo " command_name $check_b" >> $dir/commands.cfg
echo " command_line
U
S
E
R
1
USER1
USER1/$check_b -H
H
O
S
T
A
D
D
R
E
S
S
HOSTADDRESS
HOSTADDRESS -c
A
R
G
1
ARG1
ARG1" >> $dir/commands.cfg
echo " }" >> $dir/commands.cfg
#for循环写入文件
for i in cat /root/ip.txt
do
hostname=cat /usr/local/nagios/etc/objects/server/$i.cfg|grep host_name|head -1
echo " define service{" >>/usr/local/nagios/etc/objects/server/$i.cfg
echo "
h
o
s
t
n
a
m
e
"
>
>
/
u
s
r
/
l
o
c
a
l
/
n
a
g
i
o
s
/
e
t
c
/
o
b
j
e
c
t
s
/
s
e
r
v
e
r
/
hostname" >>/usr/local/nagios/etc/objects/server/
hostname">>/usr/local/nagios/etc/objects/server/i.cfg
echo " service_description
c
h
e
c
k
b
"
>
>
/
u
s
r
/
l
o
c
a
l
/
n
a
g
i
o
s
/
e
t
c
/
o
b
j
e
c
t
s
/
s
e
r
v
e
r
/
check_b" >>/usr/local/nagios/etc/objects/server/
checkb">>/usr/local/nagios/etc/objects/server/i.cfg
echo " check_period 24x7" >>/usr/local/nagios/etc/objects/server/
i
.
c
f
g
e
c
h
o
"
n
o
r
m
a
l
c
h
e
c
k
i
n
t
e
r
v
a
l
2
"
>
>
/
u
s
r
/
l
o
c
a
l
/
n
a
g
i
o
s
/
e
t
c
/
o
b
j
e
c
t
s
/
s
e
r
v
e
r
/
i.cfg echo " normal_check_interval 2" >>/usr/local/nagios/etc/objects/server/
i.cfgecho"normalcheckinterval2">>/usr/local/nagios/etc/objects/server/i.cfg
echo " retry_check_interval 1" >>/usr/local/nagios/etc/objects/server/
i
.
c
f
g
e
c
h
o
"
m
a
x
c
h
e
c
k
a
t
t
e
m
p
t
s
2
"
>
>
/
u
s
r
/
l
o
c
a
l
/
n
a
g
i
o
s
/
e
t
c
/
o
b
j
e
c
t
s
/
s
e
r
v
e
r
/
i.cfg echo " max_check_attempts 2" >>/usr/local/nagios/etc/objects/server/
i.cfgecho"maxcheckattempts2">>/usr/local/nagios/etc/objects/server/i.cfg
echo " notification_period 24x7" >>/usr/local/nagios/etc/objects/server/
i
.
c
f
g
e
c
h
o
"
n
o
t
i
f
i
c
a
t
i
o
n
o
p
t
i
o
n
s
w
,
u
,
c
,
r
"
>
>
/
u
s
r
/
l
o
c
a
l
/
n
a
g
i
o
s
/
e
t
c
/
o
b
j
e
c
t
s
/
s
e
r
v
e
r
/
i.cfg echo " notification_options w,u,c,r" >>/usr/local/nagios/etc/objects/server/
i.cfgecho"notificationoptionsw,u,c,r">>/usr/local/nagios/etc/objects/server/i.cfg
echo " check_command check_nrpe!
c
h
e
c
k
b
"
>
>
/
u
s
r
/
l
o
c
a
l
/
n
a
g
i
o
s
/
e
t
c
/
o
b
j
e
c
t
s
/
s
e
r
v
e
r
/
check_b" >>/usr/local/nagios/etc/objects/server/
checkb">>/usr/local/nagios/etc/objects/server/i.cfg
echo " }" >>/usr/local/nagios/etc/objects/server/$i.cfg
done
#重启nagios
systemctl restart nagios
监控脚本
#!/bin/bash
a=uptime | awk 'NR==1{T=$8}END{print T*100}' | cut -d "," -f 1
if [ $a -gt 300 ];then
echo taigao
exit 2
fi
echo $a
if [ $a -lt 300 ] && [ $a -gt 200 ];then
echo haixing
exit 1
else
echo meiwent
exit 0
fi
告警信息
Nagios可以识别4种状态返回信息:
0(OK)表示状态正常/绿色、
1(WARNING)表示出现警告/黄色
2(CRITICAL)表示出现非常严重的错误/红色
3(UNKNOWN)表示未知错误/深黄色。