对Prometheus感兴趣的朋友请加入QQ群:70860761 一起探讨
Prometheus启动过程
启动入口源码
func Main() int {
if err := parse(os.Args[1:]); err != nil {
log.Error(err)
return 2
}
if cfg.printVersion {
fmt.Fprintln(os.Stdout, version.Print("prometheus"))
return 0
}
log.Infoln("Starting prometheus", version.Info())
log.Infoln("Build context", version.BuildContext())
var reloadables []Reloadable
var (
memStorage = local.NewMemorySeriesStorage(&cfg.storage)
remoteStorage = remote.New(&cfg.remote)
sampleAppender = storage.Fanout{memStorage}
)
if remoteStorage != nil {
sampleAppender = append(sampleAppender, remoteStorage)
reloadables = append(reloadables, remoteStorage)
}
var (
notifier = notifier.New(&cfg.notifier)
targetManager = retrieval.NewTargetManager(sampleAppender)
queryEngine = promql.NewEngine(memStorage, &cfg.queryEngine)
)
ruleManager := rules.NewManager(&rules.ManagerOptions{
SampleAppender: sampleAppender,
Notifier: notifier,
QueryEngine: queryEngine,
ExternalURL: cfg.web.ExternalURL,
})
flags := map[string]string{}
cfg.fs.VisitAll(func(f *flag.Flag) {
flags[f.Name] = f.Value.String()
})
// 当前版本信息
version := &web.PrometheusVersion{
Version: version.Version,
Revision: version.Revision,
Branch: version.Branch,
BuildUser: version.BuildUser,
BuildDate: version.BuildDate,
GoVersion: version.GoVersion,
}
webHandler := web.New(memStorage, queryEngine, targetManager, ruleManager, version, flags, &cfg.web)
reloadables = append(reloadables, targetManager, ruleManager, webHandler, notifier)
if !reloadConfig(cfg.configFile, reloadables...) {
return 1
}
// Wait for reload or termination signals. Start the handler for SIGHUP as
// early as possible, but ignore it until we are ready to handle reloading
// our config.
hup := make(chan os.Signal)
hupReady := make(chan bool)
signal.Notify(hup, syscall.SIGHUP)
go func() {
<-hupReady
for {
select {
case <-hup:
case <-webHandler.Reload():
}
reloadConfig(cfg.configFile, reloadables...)
}
}()
// Start all components. The order is NOT arbitrary.
if err := memStorage.Start(); err != nil {
log.Errorln("Error opening memory series storage:", err)
return 1
}
defer func() {
if err := memStorage.Stop(); err != nil {
log.Errorln("Error stopping storage:", err)
}
}()
if remoteStorage != nil {
prometheus.MustRegister(remoteStorage)
go remoteStorage.Run()
defer remoteStorage.Stop()
}
// The storage has to be fully initialized before registering.
prometheus.MustRegister(memStorage)
prometheus.MustRegister(notifier)
prometheus.MustRegister(configSuccess)
prometheus.MustRegister(configSuccessTime)
// The notifieris a dependency of the rule manager. It has to be
// started before and torn down afterwards.
go notifier.Run()
defer notifier.Stop()
go ruleManager.Run()
defer ruleManager.Stop()
go targetManager.Run()
defer targetManager.Stop()
// Shutting down the query engine before the rule manager will cause pending queries
// to be canceled and ensures a quick shutdown of the rule manager.
defer queryEngine.Stop()
go webHandler.Run()
// Wait for reload or termination signals.
close(hupReady) // Unblock SIGHUP handler.
term := make(chan os.Signal)
signal.Notify(term, os.Interrupt, syscall.SIGTERM)
select {
case <-term:
log.Warn("Received SIGTERM, exiting gracefully...")
case <-webHandler.Quit():
log.Warn("Received termination request via web service, exiting gracefully...")
case err := <-webHandler.ListenError():
log.Errorln("Error starting web server, exiting gracefully:", err)
}
log.Info("See you next time!")
return 0
}
// Reloadable things can change their internal state to match a new config
// and handle failure gracefully.
type Reloadable interface {
ApplyConfig(*config.Config) bool
}
// 配置加载处理
func reloadConfig(filename string, rls ...Reloadable) (success bool) {
log.Infof("Loading configuration file %s", filename)
defer func() {
if success {
configSuccess.Set(1)
configSuccessTime.Set(float64(time.Now().Unix()))
} else {
configSuccess.Set(0)
}
}()
conf, err := config.LoadFile(filename)
if err != nil {
log.Errorf("Couldn't load configuration (-config.file=%s): %v", filename, err)
return false
}
success = true
for _, rl := range rls {
success = success && rl.ApplyConfig(conf)
}
return success
}
- 2行:parse方法解析命令行参数
- 18~25行:根据所解析的命令行参数构造指标存储引擎(本地模式或者远程模式[opentsdb,influxdb…])。
- 27~38行:构造规则管理服务(ruleManager),包括规则的执行和告警的发送等;
- 54行:构造Web管理服务
- 56~60行:加载告警规则,并同步到相关服务
- 65~77行:告警规则动态加载处理,以Linux信号量的通知方式实现;
- 81~96行:启动指标存储引擎以及程序退出引擎析构处理;
- 89~101行:注册prometheus组件内部所输出的系统指标;