其实也是在easyhadoop做第二次重构的时候用到了这个嵌入式的webserver去做服务器状态的监控,可以单独摘出来写个东西。
思路主要是用python脚本获取linux服务器的各种状态信息,然后用webserver的方式,以json数据发给http,主控节点去读取webserver返回的json,生成系统监控报表。代码简单,开发和部署都很方便。
用到的主要东西是python的第三方嵌入式web模块叫cherrypy,之所以选择cherrypy,主要原因就是开发快速,学习也很快,基本我用了一天多就基本学会了怎么来写了。当然,也可以用python自带的simpleHTTPserver。不过那个确实太simple了。cherrypy的优点在于,多线程,多并发。又不像Tornado和Django那样重量级。因为我们返回的是json,也用不到什么html模板,数据库的功能。当然也可以选择web.py,不过相比还是cherrypy更好一点我认为,web.py也借鉴了cherrypy不少的思想。
其实我的集群里都装了Ganglia, Cacti, Nagios。但是我想看实时的图表生成,就自己写了这样一段程序。做自动化运维的朋友可以当一个参考,添加自己的方法,监控别的也是可以的。
主要流程是这样:
想办法读取Linux系统的各种数据 ---> 解析数据转成json发给http服务器 ---> 监控服务器扫描数据生成图表
#!/usr/bin/python
# -*- coding: utf8 -*-
import sys
import cherrypy
import platform
import os
import time
#python 2.4为simplejson,python 2.6以上为json
try:
import json
except ImportError:
import simplejson as json
#假装做一个index出来
class Index(object):
#下面这句表示修饰index方法,这个index方法是暴露给http server的
@cherrypy.expose
def index(self):
return "hello cherrypy"
class Node(object):
'''
url /node/dist/
'''
#获取目标机器的发行分支,版本号,架构类型,主机名称等等,返回json
@cherrypy.expose
def dist(self):
dist_json = ''
sysinstaller = ''
installer = ''
ostype = platform.dist()
if(ostype[0] in ['Ubuntu','debian','ubuntu','Debian']):
sysinstaller = 'apt-get'
installer = 'dpkg'
elif(ostype[0] in ['SuSE']):
sysinstaller = 'zypper'
installer = 'rpm'
elif(ostype[0] in ['CentOS', 'centos', 'redhat','RedHat']):
sysinstaller = 'yum'
installer = 'rpm'
machine = platform.machine()
hostname = platform.node()
dist_json = {'os.system':ostype[0], 'os.version':ostype[1], 'os.release':ostype[2], 'os.sysinstall':sysinstaller, 'os.installer':installer, 'os.arch':machine, 'os.hostname':hostname}
return json.dumps(dist_json, sort_keys=False, indent=4, separators=(',', ': '))
'''
url /node/GetCpuInfo/
'''
#获取CPU型号等,返回json
@cherrypy.expose
def GetCpuInfo(self):
cpu = []
cpuinfo = {}
f = open("/proc/cpuinfo")
lines = f.readlines()
f.close()
for line in lines:
if line == 'n':
cpu.append(cpuinfo)
cpuinfo = {}
if len(line) < 2: continue
name = line.split(':')[0].strip().replace(' ','_')
var = line.split(':')[1].strip()
cpuinfo[name] = var
return json.dumps(cpuinfo, sort_keys=False, indent=4, separators=(',', ': '))
'''
url /node/GetMemInfo/
'''
#获取内存使用的详细信息
@cherrypy.expose
def GetMemInfo(self):
mem = {}
f = open("/proc/meminfo")
lines = f.readlines()
f.close()
for line in lines:
if len(line) < 2:
continue
name = line.split(':')[0]
var = line.split(':')[1].split()[0]
mem[name] = long(var) * 1024.0
mem['MemUsed'] = mem['MemTotal'] - mem['MemFree'] - mem['Buffers'] - mem['Cached']
return json.dumps(mem, sort_keys=False, indent=4, separators=(',', ': '))
'''
url /node/GetLoadAvg//
'''
#获取系统负载的详细信息
@cherrypy.expose
def GetLoadAvg(self):
loadavg = {}
f = open("/proc/loadavg")
con = f.read().split()
f.close()
loadavg['lavg_1']=con[0]
loadavg['lavg_5']=con[1]
loadavg['lavg_15']=con[2]
loadavg['nr']=con[3]
loadavg['last_pid']=con[4]
return json.dumps(loadavg, sort_keys=False, indent=4, separators=(',', ': '))
'''
url /node/GetIfInfo/eth(x)
'''
获取指定网卡的流量信息,这里面有点复杂
@cherrypy.expose
def GetIfInfo(self, interface):
dist_json = self.dist()
f = open("/proc/net/dev")
lines = f.readlines()
f.close()
intf = {}
for line in lines[2:]:
con = line.split()
#if部分是给centos使用的,centos在流量大的情况下,网卡信息里面字符串会连上,所以需要单独拆分处理,else部分则是ubuntu或者其他系统格式化很好的使用
if con[0][-1].isdigit() == True:
offset = con[0].split(':')
intf['interface'] = str(offset[0])
intf['ReceiveBytes'] = str(offset[1])
intf['ReceivePackets'] = str(con[1])
intf['ReceiveErrs'] = str(con[2])
intf['ReceiveDrop'] = str(con[3])
intf['ReceiveFifo'] = str(con[4])
intf['ReceiveFrames'] = str(con[5])
intf['ReceiveCompressed'] = str(con[6])
intf['ReceiveMulticast'] = str(con[7])
intf['TransmitBytes'] = str(con[8])
intf['TransmitPackets'] = str(con[9])
intf['TransmitErrs'] = str(con[10])
intf['TransmitDrop'] = str(con[11])
intf['TransmitFifo'] = str(con[12])
intf['TransmitFrames'] = str(con[13])
intf['TransmitCompressed'] = str(con[14])
intf['TransmitMulticast'] = str(con[15])
else:
intf['interface'] = str(con[0])
intf['ReceiveBytes'] = str(con[1])
intf['ReceivePackets'] = str(con[2])
intf['ReceiveErrs'] = str(con[3])
intf['ReceiveDrop'] = str(con[4])
intf['ReceiveFifo'] = str(con[5])
intf['ReceiveFrames'] = str(con[6])
intf['ReceiveCompressed'] = str(con[7])
intf['ReceiveMulticast'] = str(con[8])
intf['TransmitBytes'] = str(con[9])
intf['TransmitPackets'] = str(con[10])
intf['TransmitErrs'] = str(con[11])
intf['TransmitDrop'] = str(con[12])
intf['TransmitFifo'] = str(con[13])
intf['TransmitFrames'] = str(con[14])
intf['TransmitCompressed'] = str(con[15])
intf['TransmitMulticast'] = str(con[16])
return json.dumps(intf, sort_keys=False)
#获取全部网卡的接口和流量信息
@cherrypy.expose
def GetIfTraffic(self):
ifs = []
nettraffic = {}
f = open("/proc/net/dev")
lines = f.readlines()
f.close()
for line in lines[2:]:
con = line.split()
ifname = con[0].split(':')
if(ifname[0].strip() != 'lo'):
ifs.append(ifname[0].strip())
else:
continue
for interface in ifs:
nettraffic[interface] = self.GetIfInfo( interface)
return json.dumps(nettraffic)
#获取硬盘的分区信息和使用量
@cherrypy.expose
def GetHddInfo(self):
hdds = []
mount = {}
file_system = []
type = []
size = []
used = []
avail = []
used_percent = []
mounted_on = []
hdds = os.popen('df -lhT | grep -v tmpfs | grep -v boot | grep -v usr | grep -v tmp | sed \'1d;/ /!N;s/\\n//;s/[ ]*[ ]/\\t/g;\'').readlines()
for line in hdds:
file_system.append(line.replace('\\n','').replace('\\t',' ').split()[0])
type.append(line.replace('\\n','').replace('\\t',' ').split()[1])
size.append(line.replace('\\n','').replace('\\t',' ').split()[2])
used.append(line.replace('\\n','').replace('\\t',' ').split()[3])
avail.append(line.replace('\\n','').replace('\\t',' ').split()[4])
used_percent.append(line.replace('\\n','').replace('\\t',' ').split()[5])
mounted_on.append(line.replace('\\n','').replace('\\t',' ').split()[6])
mount['file_system'] = file_system
mount['type'] = type
mount['size'] = size
mount['used'] = used
mount['avail'] = avail
mount['used_percent'] = used_percent
mount['mounted_on'] = mounted_on
dist_json = json.dumps(mount)
return dist_json
#获取CPU的使用量信息,需要系统安装sysstat支持
@cherrypy.expose
def GetCpuDetail(self):
dist_json = self.dist()
dist = json.loads(dist_json)
if(dist['os.system'] in ['CentOS', 'centos', 'redhat', 'RedHat']):
if(int(dist['os.version'].split('.')[0]) < 6): #For CentOS only
cmd = 'mpstat 1 1 | sed \'1d;2d;3d;4d\' | awk \'{print "{\\\"user\\\":\\\"\"$3\"\\\",\\\"nice\\\":\\\"\"$4\"\\\",\\\"sys\\\":\\\"\"$5\"\\\",\\\"iowait\\\":\\\"\"$6\"\\\",\\\"irq\\\":\\\"\"$7\"\\\",\\\"soft\\\":\\\"\"$8\"\\\",\\\"steal\\\":\\\"\"$9\"\\\",\\\"idle\\\":\\\"\"$10\"\\\"}"}\''
else:
cmd = 'mpstat 1 1 | sed \'1d;2d;3d;4d\' | awk \'{print "{\\\"user\\\":\\\"\"$3\"\\\",\\\"nice\\\":\\\"\"$4\"\\\",\\\"sys\\\":\\\"\"$5\"\\\",\\\"iowait\\\":\\\"\"$6\"\\\",\\\"irq\\\":\\\"\"$7\"\\\",\\\"soft\\\":\\\"\"$8\"\\\",\\\"steal\\\":\\\"\"$9\"\\\",\\\"idle\\\":\\\"\"$11\"\\\"}"}\''
else:
cmd = 'mpstat 1 1 | sed \'1d;2d;3d;4d\' | awk \'{print "{\\\"user\\\":\\\"\"$3\"\\\",\\\"nice\\\":\\\"\"$4\"\\\",\\\"sys\\\":\\\"\"$5\"\\\",\\\"iowait\\\":\\\"\"$6\"\\\",\\\"irq\\\":\\\"\"$7\"\\\",\\\"soft\\\":\\\"\"$8\"\\\",\\\"steal\\\":\\\"\"$9\"\\\",\\\"idle\\\":\\\"\"$11\"\\\"}"}\''
cpu = os.popen(cmd).readline().strip()
return cpu
if "__main__" == __name__:
#服务器配置
settings = {
'global': {
#绑定端口
'server.socket_port' : 60090,
#ip地址设置,觉得够安全就用0.0.0.0,否则就单独写那台服务器的ip
'server.socket_host': '0.0.0.0',
'server.socket_file': '',
'server.socket_queue_size': 100,
'server.protocol_version': 'HTTP/1.1',
'server.log_to_screen': True,
'server.log_file': '',
'server.reverse_dns': False,
'server.thread_pool': 200,
'server.environment': 'production',
'engine.timeout_monitor.on': False
}
}
#使用配置和映射路由并启动webserver
cherrypy.config.update(settings)
cherrypy.tree.mount(Index(), '/')
cherrypy.tree.mount(Node(), '/node')
cherrypy.engine.start()
图表生成端随便拿什么语言写就无所谓了,反正数据都是json格式的。
当然,我也用他监控hadoop和hbase。代码加点跟hadoop和hbase相关的就可以了。