初始化
rules/manager.go
ruleManager := rules.NewManager(&rules.ManagerOptions{
Appendable: fanoutStorage,
Notifier: notifier,
QueryEngine: queryEngine,
Context: ctx,
ExternalURL: cfg.web.ExternalURL,
Logger: log.With(logger, "component", "rule manager"),
})
go ruleManager.Run()
配置
rules/manager.go
func (m *Manager) ApplyConfig(conf *config.Config) error {
var files []string
for _, pat := range conf.RuleFiles {
fs, err := filepath.Glob(pat)
files = append(files, fs...)
}
groups, errs := m.loadGroups(time.Duration(conf.GlobalConfig.EvaluationInterval), files...)
for _, newg := range groups {
go func(newg *Group) {
go func() {
newg.run()
}
}
}
}
主要做了如下几件事情:
- 列出配置的所有rules文件
- 解析rules文件
- 按groupName分组
- group.Run(),此处会启动定时任务,按照配置的频率evaluation_interval执行告警或者汇总规则
运行规则
运行group
rules/manager.go中的group.Run()运行group
iter := func() {
start := time.Now()
g.Eval(start)
}
iter()
tick := time.NewTicker(g.interval)
for {
select {
default:
select {
case <-tick.C: //按照频率重新运行该group
iter()
}
}
}
运行规则
rules/manager.go中的Eval方法,将循环当前group的所有rule,并执行eval
func (g *Group) Eval(ts time.Time) {
for i, rule := range g.rules {
func(i int, rule Rule) {
vector, err := rule.Eval(g.opts.Context, ts, g.opts.QueryEngine, g.opts.ExternalURL)
if ar, ok := rule.(*AlertingRule); ok {
g.sendAlerts(ar)
}
}(i, rule)
}
}
调用rules/alerting.go的Eval()方法,判断是否需要进行alert:
func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, engine *promql.Engine, externalURL *url.URL) (promql.Vector, error) {
res, err := query.Exec(ctx).Vector() //查询
for _, smpl := range res {
r.active[h] = &Alert{ //缓存alert
ActiveAt: ts,
State: StatePending,
}
}
// 调整alert的告警状态
for fp, a := range r.active {
if _, ok := resultFPs[fp]; !ok {
if a.State == StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > resolvedRetention) {
delete(r.active, fp)
}
if a.State != StateInactive {
a.State = StateInactive
a.ResolvedAt = ts
}
continue
}
if a.State == StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration {
a.State = StateFiring
}
}
}
发送告警
rules/manager.go中的sendAlerts方法 , 最终调用Notifier.Send()方法将alert发送给alertmanager
func (g *Group) sendAlerts(rule *AlertingRule) error {
var alerts []*notifier.Alert
for _, alert := range rule.currentAlerts() {
if alert.State == StatePending {
continue
}
}
if len(alerts) > 0 {
g.opts.Notifier.Send(alerts...)
}
}