1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
#!/bin/bash
#监控用户登录
Usermonitor () {
LoginUser=`uptime |
awk
'{print $6}'
`
if
[ $LoginUser -
ge
2 ]
then
Critical=
"当前系统登录用户人数超过1人,具体人数为:$LoginUser 个,请确认操作者人数。"
status=0
else
echo
"loginuser ok"
status=1
fi
}
#监控内存
MemMonitor () {
MemTotal=`
free
-m |
grep
Mem |
awk
-F:
'{print $2}'
|
awk
'{print $1}'
`
MemFree=`
free
-m |
grep
cache |
awk
NR==2 |
awk
'{print $4}'
`
MemFreeB=`
awk
'BEGIN{printf "%.2f%\n",'
$MemFree/$MemTotal\*100
'}'
`
MemFreeS=`
awk
'BEGIN{printf "%.f",'
$MemFree/$MemTotal\*100
'}'
`
if
[ $MemFreeS -lt 10 ]
then
Critical=
"系统可用内存小于10%,实际可用内存为:$MemFreeB ,请处理。"
status=0
elif
[ $MemFreeS -lt 20 ]
then
Warning=
"系统可用内存小于20%,实际可用内存为:$MemFreeB ,请查看。"
WarningT=
"内存报警"
status=1
else
echo
"Mem OK"
status=2
fi
}
#监控分区空间大小
DiskMonitorG () {
#根分区
DiskGB=`
df
-h |
awk
NR==2 |
awk
'{print $5}'
`
DiskGS=`
df
-h |
awk
NR==2 |
awk
'{print $5}'
|
awk
-F%
'{print $1}'
`
if
[ $DiskGS -gt 90 ]
then
Critical=
"根分区使用率超过90%,实际已使用 $DiskGB ,请处理。"
status=0
elif
[ $DiskGS -gt 80 -a $DiskGS -lt 90 ]
then
Warning=
"根分区使用率超过80%,实际已使用 $DiskGB , 请查看。"
WarningT=
"根分区报警"
status=1
else
echo
"DiskGB Ok"
status=2
fi
}
DiskMonitorA () {
#应用分区
ApplyB=`
df
-h |
awk
NR==4 |
awk
'{print $5}'
`
ApplyS=`
df
-h |
awk
NR==4 |
awk
'{print $5}'
|
awk
-F%
'{print $1}'
`
if
[ $ApplyS -gt 90 ]
then
Critical=
"应用分区使用率超过90%,实际已使用 $ApplyB ,请处理."
status=0
elif
[ $ApplyS -gt 80 -a $ApplyS -lt 90 ]
then
Warning=
"应用分区使用率超过80%,实际已使用 $ApplyB ,请查看。"
WarningT=
"应用分区报警"
status=1
else
echo
"Apply ok"
status=2
fi
}
#监控CPU负载
CPULoad () {
CPULoad1=`uptime |
awk
'{print $10}'
|
awk
-F.
'{print $1}'
`
CPULoad2=`uptime`
if
[ $CPULoad1 -gt 5 ]
then
Critical=
"CPU负载过高,请即使处理。 $CPULoad2 "
status=0
elif
[ $CPULoad1 -gt 3 -a $CPULoad1 -lt 5 ]
then
Warning=
"CPU负载警告, $Warning "
WarningT=
"CPU负载报警"
status=1
else
echo
"CPU OK"
status=2
fi
}
#监控服务状态
ServerMonitor () {
#服务状态监控
timeout=10
makfails=2
fails=0
success=0
while
true
do
/usr/bin/wget
--timeout=$timeout --tries=1 http:
//192
.168.20.84/ -q -O
/dev/null
if
[ $? -
ne
0 ]
then
let
fails=fails+1
success=0
else
fails=0
let
success=1
fi
if
[ $success -
ge
1 ]
then
exit
0
fi
if
[ $fails -
ge
1 ]
then
Critical=
"TMS应用服务出现故障,请紧急处理!"
echo
$Critical | mutt -s
"服务down"
hao.lulu@chinaebi.com
exit
-1
fi
done
}
#发送报警短信、报警邮件
for
n
in
Usermonitor MemMonitor DiskMonitorG DiskMonitorA CPULoad ServerMonitor
do
$n
if
[ $status -
eq
0 ]
then
curl
"http://172.20.36.118/app/tms.do?tranCode=TM0311&content=$Critical"
elif
[ $status -
eq
1 ]
then
curl
"http://172.20.36.118/app/tms.do?tranCode=TM0310&title=$WarningT&content=Warning"
else
echo
"ok"
fi
done
|
shell 监控脚本
最新推荐文章于 2024-08-07 21:52:22 发布