解读:
open-falcon监控系统组件学习之——judge组件_random_w的博客-CSDN博客
策略stragedy:
{
"id": 228,
"result": {
"hostStrategies": [{
"hostname": "open-falcon-1",
"strategies": [{
"id": 5,
"metric": "load.1min",
"tags": {},
"func": "all(#3)",
"operator": "\u003e",
"rightValue": 40,
"maxStep": 3,
"priority": 0,
"note": "",
"tpl": {
"id": 3,
"name": "test",
"parentId": 1,
"actionId": 0,
"creator": "root"
}
}]
},
{
"hostname": "open-falcon-2",
"strategies": [{
"id": 4,
"metric": "proc.num",
"tags": {
"name": "mysql"
},
"func": "all(#3)",
"operator": "==",
"rightValue": 0,
"maxStep": 3,
"priority": 0,
"note": "",
"tpl": {
"id": 2,
"name": "com",
"parentId": 0,
"actionId": 3,
"creator": "root"
}
},
{
"id": 2,
"metric": "net.port.listen",
"tags": {
"port": "80"
},
"func": "all(#3)",
"operator": "= = ",
"rightValue": 0,
"maxStep": 3,
"priority": 0,
"note": "",
"tpl": {
"id": 2,
"name": "com",
"parentId": 0,
"actionId": 3,
"creator": "root"
}
}
]
}
]
},
"error": null
}
表达式expression:
{
"id": 229,
"result": {
"expressions": [{
"id": 1,
"metric": "cpu.idle",
"tags": {
"module": "123"
},
"func": "all(#3)",
"operator": "==",
"rightValue": 0,
"maxStep": 3,
"priority": 0,
"note": "",
"actionId": 2
}]
},
"error": null
}
报警的收敛 最大报警次数 最小报警间隔 巴拉巴拉
func sendEventIfNeed(historyData []*model.HistoryData, isTriggered bool, now int64, event *model.Event, maxStep int) {
lastEvent, exists := g.LastEvents.Get(event.Id)
if isTriggered {
event.Status = "PROBLEM"
if !exists || lastEvent.Status[0] == 'O' {
// 本次触发了阈值,之前又没报过警,得产生一个报警Event
event.CurrentStep = 1
// 但是有些用户把最大报警次数配置成了0,相当于屏蔽了,要检查一下
if maxStep == 0 {
return
}
sendEvent(event)
return
}
// 逻辑走到这里,说明之前Event是PROBLEM状态
if lastEvent.CurrentStep >= maxStep {
// 报警次数已经足够多,到达了最多报警次数了,不再报警
return
}
if historyData[len(historyData)-1].Timestamp <= lastEvent.EventTime {
// 产生过报警的点,就不能再使用来判断了,否则容易出现一分钟报一次的情况
// 只需要拿最后一个historyData来做判断即可,因为它的时间最老
return
}
if now-lastEvent.EventTime < g.Config().Alarm.MinInterval {
// 报警不能太频繁,两次报警之间至少要间隔MinInterval秒,否则就不能报警
return
}
event.CurrentStep = lastEvent.CurrentStep + 1
sendEvent(event)
} else {
// 如果LastEvent是Problem,报OK,否则啥都不做
if exists && lastEvent.Status[0] == 'P' {
event.Status = "OK"
event.CurrentStep = 1
sendEvent(event)
}
}
}