背景
基于supervisord服务管理进程
使用supervisord对进程进行托管、默认重启三次后就会取消启动,当程序挂了后不能及时了解程序情况
方案:
自建prometheus采集器,当程序挂了时及时了解并发出告警信息到企业微信
package main
import (
"fmt"
"net/http"
"os"
"os/exec"
"strings"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var (
monitorSvc = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "supervisor_service",
Help: "This is supervisor_service",
},
[]string{"service"},
)
monitorTime = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "supervisor_run_time",
Help: "This is supervisor_run_time",
},
[]string{"service"},
)
)
func init() {
prometheus.MustRegister(monitorSvc, monitorTime)
}
func kll() {
cmd := exec.Command("supervisorctl", "status")
output, err := cmd.Output()
if err != nil {
fmt.Println("Error executing command:", err)
return
}
lines := strings.TrimSuffix(string(output), "\n")
lineSlice := strings.Split(lines, "\n")
for _, line := range lineSlice {
axf := strings.Fields(line)
buildMetrics(axf)
}
}
func buildMetrics(lines []string) {
var metricValue float64
serviceName := lines[0]
if lines[1] == "RUNNING" {
metricValue = 1
pid := strings.Replace(lines[3], ",", "", -1)
stat, err := os.Lstat(fmt.Sprintf("/proc/%s", pid))
if err != nil {
fmt.Println("Error getting process start time:", err)
return
}
startTime := stat.ModTime().Unix()
runTime := time.Now().Unix() - startTime
monitorTime.WithLabelValues(serviceName).Set(float64(runTime))
} else {
metricValue = 0
}
monitorSvc.WithLabelValues(serviceName).Set(metricValue)
}
func main() {
http.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) {
fmt.Printf("Received request from %s for %s\n", r.RemoteAddr, r.URL.Path)
kll()
promhttp.Handler().ServeHTTP(w, r) // 使用Prometheus的处理器直接服务于监控指标
})
http.ListenAndServe(":9077", nil)
}