写个监测服务,监测所有服务,一旦某个服务dump掉,就立即发个邮件通知一下.
main 里第一个先启动 moniter 服务(unique service)
skynet.uniqueservice ("moniter")
其他服务启动后,向 moniter 注册一下,加入moniter的监测列表中
在 moniter 中定时轮询,安全调用(xpcall) 一下每个监测中的服务的心跳方法(CMD.heart_beat() ),如果服务dump掉,xpcall 会捕捉到错误,并返回error, 此时发个邮件通知一下就可以。
监测服务 moniter
moniter.lua
require "functions"
local skynet = require "skynet"
local syslog = require "syslog"
local traceback = debug.traceback
local IntervalTime = 5 * 100
local serTab = {}
local CMD = {}
function CMD.register(source, _serName)
serTab[source] = _serName
end
-- 服务宕机,发邮件通知
local function serviceDump(_serName)
syslog.errf("--- Error: service 【%s】 dump!", _serName)
end
local function callService(_addr)
skynet.call(_addr, "lua", "heart_beat")
end
--[[
检测各个服务是否宕机
]]
local function heartBeatScheduler()
-- syslog.debugf("---------- 【heart beat Begin】 ----------")
for k,v in pairs(serTab) do
local ok, _ = xpcall (callService, traceback, k)
if not ok then
serviceDump(v)
serTab[k] = nil
else
-- syslog.debugf("--- service running:【%s】, addr:%x", v, k)
end
end
-- syslog.debugf("---------- 【heart beat End】 ----------")
skynet.timeout(IntervalTime, heartBeatScheduler)
end
skynet.start (function ()
skynet.timeout(IntervalTime, heartBeatScheduler)
skynet.dispatch ("lua", function (_, source, command, ...)
local f = CMD[command]
if not f then
syslog.warningf ("unhandled message(%s)", command)
return skynet.ret ()
end
local ok, ret = xpcall (f, traceback, source, ...)
if not ok then
syslog.warningf ("handle message(%s) failed : %s", command, ret)
return skynet.ret ()
end
skynet.retpack (ret)
end)
end)
被监测的服务,以 friendserver 为例
friendserver.lua
function CMD.open (source, conf)
syslog.debugf("--- friend server open")
local moniter = skynet.uniqueservice ("moniter")
skynet.call(moniter, "lua", "register", "friendserver")
end
function CMD.heart_beat ()
-- print("--- heart_beat friendserver")
end
local traceback = debug.traceback
skynet.start (function ()
-- skynet.timeout (800, function() skynet.exit() end) -- for test moniter
skynet.dispatch ("lua", function (_, source, command, ...)
local f = CMD[command]
if not f then
syslog.warningf ("unhandled message(%s)", command)
return skynet.ret ()
end
local ok, ret = xpcall (f, traceback, source, ...)
if not ok then
syslog.warningf ("handle message(%s) failed : %s", command, ret)
return skynet.ret ()
end
skynet.retpack (ret)
end)
end)
mian.lua中启动
local friendserver = skynet.uniqueservice ("friendserver")
skynet.call (friendserver, "lua", "open")
a