Windows其他版本操作系统服务器安装open-falcon监控agent详情见python脚本实现open-falcon监控windows服务器
1、安装可执行文件
公司有一台非常老的服务器,操作系统为Windows2003Server,因为太老了,偶尔会重启,但是近期因为各种原因没法迁移,所以打算装个监控上去。
一开始打算和别的服务器一样,丢个exe文件上去,改改配置执行就好,然而,执行了才发现,是我太天真。
哦豁,这会才想起来,这台服务器是32位的,不适用啊。
2、脚本安装监控服务
2.1执行
可执行文件无法使用,那就直接执行脚本,此服务器本来就安装了python3.3.3
python3.3.3下载地址python3.3.3下载
选择x86下载
安装好python后,直接执行
python windows_collect_service.py
2.2报错与解决
2.2.1报错1
找不到psutil模块。(此处应该配图,但我忘记截了,表情包冒充一下)
解决1
- 进入下载页面psutil模块下载
- 找到psutil-3.3.0.win32-py3.3.exe 下载
- 在服务器上直接双击执行,它会自动找到python的安装路径然后安装。
2.2.2报错2
找不到requests模块。(依旧没有截图)
因为服务器没有开外网权限,连不上公网,所以一开始打算使用离线安装的方式安装,结果……百度了一下,发现requests模块的依赖很多,要没完没了的下很多模块,就放弃了。
解决1
给服务器设置了全局代理连接公网,进入python的script路径下,执行命令
easy_install.exe requests
ok,解决。
2.2.3报错3
找不到 win32api(无图)
引用的模块真的好多……
解决3
easy_install pypiwin32
2.3 改配置
更改配置文件push.ini中的主机名、数据上报url
2.4 安装服务
- 打开终端
进入脚本所在文件夹,shift+鼠标右键选择在此处打开终端。windows2003server没有这个选项……
那就 开始–>运行–>输入cmd–>回车,打开cmd终端。
- 进入脚本所在路径
cd C:\windows\work
- 安装python服务
python windows_collect_service.exe --startup auto install
python windows_collect_service.exe --start
成功
- 打开服务看到服务已启动
打开open-falcon dashboard ,看到数据已经上传。
3、 脚本及配置文件内容
3.1 windows_collect_service.py
#coding=utf8
import psutil
import time
import json
import requests
import copy
import configparser
import sys
import win32api
import win32event
import win32service
import win32serviceutil
import servicemanager
import win32timezone
config = configparser.ConfigParser()
#此处修改配置文件路径
config.read("C:/Windows/work/push.ini",encoding = 'utf-8')
push_url = config.get('config','push_url')
endpoint = config.get('config','hostname')
cpu_interval = config.getint('config','cpu_interval')
push_interval = config.getint('config','push_interval')
python_sleep_time = config.getint('config','python_sleep_time')
#计算cpu使用率的时间间隔
#cpu_interval = 10
#push_interval = 60 #数据推送的时间间隔
zh_decode = "gbk"
#endpoint = "testlocal"
ignore_interface = ["Loopback","Teredo Tunneling","isatap","6TO4 Adapter"]
#push_url = "http://10.200.1.2:1988/v1/push"
#######################################################
class WindowsCollectService(win32serviceutil.ServiceFramework):
_svc_name_ = "WindowsCollectService"
_svc_display_name_ = "Windows Collect Service"
_svc_description_ = "open-falcon 上传基础项的服务"
def __init__(self, args):
self.log('init')
win32serviceutil.ServiceFramework.__init__(self, args)
self.stop_event = win32event.CreateEvent(None, 0, 0, None)
def SvcDoRun(self):
while True:
try:
main()
except:
("service error")
time.sleep(python_sleep_time)
self.ReportServiceStatus(win32service.SERVICE_START_PENDING)
try:
self.ReportServiceStatus(win32service.SERVICE_RUNNING)
self.log('start')
self.start()
self.log('wait')
win32event.WaitForSingleObject(self.stop_event, win32event.INFINITE)
self.log('done')
except BaseException as e:
self.log('Exception : %s' % e)
self.SvcStop()
def SvcStop(self):
self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
self.log('stopping')
self.stop()
self.log('stopped')
win32event.SetEvent(self.stop_event)
self.ReportServiceStatus(win32service.SERVICE_STOPPED)
def start(self):
time.sleep(10000)
def stop(self):
pass
def log(self, msg):
servicemanager.LogInfoMsg(str(msg))
def sleep(self, minute):
win32api.Sleep((minute*1000), True)
def main():
ts = int(time.time())
payload = []
data = {"endpoint":endpoint,"metric":"","timestamp":ts,"step":push_interval,"value":"","counterType":"","tags":""}
cpu_status = psutil.cpu_times_percent(interval=cpu_interval)
mem_status = psutil.virtual_memory()
swap_status = psutil.swap_memory()
disk_io_status = psutil.disk_io_counters(perdisk=True)
net_io_status = psutil.net_io_counters(pernic=True)
data["metric"] = "cpu.user"
data["value"] = cpu_status.user
data["counterType"] = "GAUGE"
payload.append(copy.copy(data))
data["metric"] = "cpu.system"
data["value"] = cpu_status.system
payload.append(copy.copy(data))
data["metric"] = "cpu.idle"
data["value"] = cpu_status.idle
payload.append(copy.copy(data))
data["metric"] = "mem.memused.percent"
data["value"] = mem_status.percent
payload.append(copy.copy(data))
data["metric"] = "mem.swapused.percent"
data["value"] = swap_status.percent
payload.append(copy.copy(data))
disk_status = psutil.disk_partitions()
for disk in disk_status:
if 'cdrom' in disk.opts or disk.fstype == '':
continue
disk_info = psutil.disk_usage(disk.mountpoint)
data["metric"] = "df.used.percent"
data["value"] = disk_info.percent
data["tags"] = "disk=" + disk.device.split(":")[0]
payload.append(copy.copy(data))
data["metric"] = "df.byte.total"
data["value"] = disk_info.total
payload.append(copy.copy(data))
data["metric"] = "df.byte.used"
data["value"] = disk_info.used
payload.append(copy.copy(data))
data["metric"] = "df.byte.free"
data["value"] = disk_info.free
payload.append(copy.copy(data))
for key in disk_io_status:
#print "device_name = %s" % key
data["metric"] = "disk.io.read_count"
data["value"] = disk_io_status[key].read_count
data["tags"] = "device=" + key
data["counterType"] = "COUNTER"
payload.append(copy.copy(data))
data["metric"] = "disk.io.write_count"
data["value"] = disk_io_status[key].write_count
payload.append(copy.copy(data))
data["metric"] = "disk.io.read_bytes"
data["value"] = disk_io_status[key].read_bytes
payload.append(copy.copy(data))
data["metric"] = "disk.io.write_bytes"
data["value"] = disk_io_status[key].write_bytes
payload.append(copy.copy(data))
data["metric"] = "disk.io.read_time"
data["value"] = disk_io_status[key].read_time
payload.append(copy.copy(data))
data["metric"] = "disk.io.write_time"
data["value"] = disk_io_status[key].write_time
payload.append(copy.copy(data))
for key in net_io_status:
if is_interface_ignore(key) == True:
continue
data["metric"] = "net.if.in.bytes"
data["value"] = net_io_status[key].bytes_recv
data["tags"] = "interface=" + zh_decode
payload.append(copy.copy(data))
data["metric"] = "net.if.out.bytes"
data["value"] = net_io_status[key].bytes_sent
payload.append(copy.copy(data))
data["metric"] = "net.if.in.packets"
data["value"] = net_io_status[key].packets_recv
payload.append(copy.copy(data))
data["metric"] = "net.if.out.packets"
data["value"] = net_io_status[key].packets_sent
payload.append(copy.copy(data))
data["metric"] = "net.if.in.error"
data["value"] = net_io_status[key].errin
payload.append(copy.copy(data))
data["metric"] = "net.if.out.error"
data["value"] = net_io_status[key].errout
payload.append(copy.copy(data))
data["metric"] = "net.if.in.drop"
data["value"] = net_io_status[key].dropin
payload.append(copy.copy(data))
data["metric"] = "net.if.out.drop"
data["value"] = net_io_status[key].dropout
payload.append(copy.copy(data))
#print json.dumps(payload,indent=4)
r = requests.post(push_url, data=json.dumps(payload))
print (r.text)
def is_interface_ignore(key):
for ignore_key in ignore_interface:
if ignore_key in key:
return True
if __name__ == "__main__":
if len(sys.argv) == 1:
servicemanager.Initialize()
servicemanager.PrepareToHostSingle(WindowsCollectService)
servicemanager.StartServiceCtrlDispatcher()
else:
win32serviceutil.HandleCommandLine(WindowsCollectService)
3.2 push.ini内容
[config]
push_url = http://10.200.1.1:1988/v1/push
hostname = test-10.200.1.2-windows
#计算cpu使用率的时间间隔
cpu_interval = 30
#数据推送的时间间隔
push_interval = 60
#上报服务循环时间,隔一个python_sleep_time循环一次
python_sleep_time = 30