背景
因公司项目需要,完成自研运维平台通过程序调用实现node_exporter的自动安装、自动发现的自动化流程实现
整体流程
通过运维平台后端服务,对目标主机执行node_exporter部署的流程,部署成功后,将该主机的node_exporter的监控信息注册到consul上面,并在VictoriaMetrics(公司选择的是vm,是prometheus的一个持续存储方案,配置及其他相关内容均与prometheus一致)上配置consul的自动发现
具体实现
node_exporter部署
运维平台后端使用golang进行开发、前端页面发起一个websocket请求(使用websocket的方式是为了在多节点部署时,能很好的实时展示进度信息)来进行部署
(PS:楼主是刚从java转go的小白,golang某些使用不合理的地方欢迎指正)
服务端(使用的是gin框架):
// 注册websocket连接,socket直接使用GET请求即可,在具体的方法里面再将GET请求升级为socket连接
r.GET("/websocket", NodeDeploy)
常量及参数类
package tools
const (
ServerTypeCmd = "uname -i"
NodeExporterPackageNameAMD64 = "node_exporter-1.4.0-rc.0.linux-amd64"
NodeExporterPackageNameARM64 = "node_exporter-1.5.0.linux-arm64"
)
// SocketResult websocket请求返回值结构体
type SocketResult struct {
Type string `json:"type"`
Host string `json:"host"`
Message string `json:"message"`
}
// SocketsParam websocket请求参数结构体
type SocketsParam struct {
Host string `json:"host"`
Pass string `json:"pass"`
}
// Consul Consul节点注册请求参数结构体
type Consul struct {
Id string `json:"id"`
Name string `json:"name"`
Address string `json:"address"`
Port int `json:"port"`
}
ssh工具类
package ssh
import (
"fmt"
"github.com/pkg/sftp"
"golang.org/x/crypto/ssh"
"io"
"io/ioutil"
"os"
"time"
)
/*
@Author : wyx
@Desc : ssh连接工具类
*/
type Client struct {
user string
pwd string
ip string
port string
sshClient *ssh.Client
sftpClient *sftp.Client
}
func NewSSHClient(user, pwd, ip, port string) Client {
return Client{
user: user,
pwd: pwd,
ip: ip,
port: port,
}
}
func (c *Client) getConfigNoKey() *ssh.ClientConfig {
config := &ssh.ClientConfig{
User: c.user,
Auth: []ssh.AuthMethod{
ssh.Password(c.pwd),
},
Timeout: 30 * time.Second,
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
}
return config
}
func (c *Client) Connect() error {
config := c.getConfigNoKey()
client, err := ssh.Dial("tcp", c.ip+":"+c.port, config)
if err != nil {
return fmt.Errorf("连接主机失败: %w", err)
}
sftp, err := sftp.NewClient(client)
if err != nil {
return fmt.Errorf("创建一个sftp连接失败: %w", err)
}
c.sshClient = client
c.sftpClient = sftp
return nil
}
// Run 执行命令
func (c Client) Run(cmd string) (string, error) {
if c.sshClient == nil {
if err := c.Connect(); err != nil {
return "", err
}
}
session, err := c.sshClient.NewSession()
if err != nil {
return "", fmt.Errorf("创建ssh连接异常: %w", err)
}
defer session.Close()
buf, err := session.CombinedOutput(cmd)
return string(buf), err
}
// DownloadFile 下载文件
func (c Client) DownloadFile(remoteFile, localFile string) (int, error) {
if c.sshClient == nil {
if err := c.Connect(); err != nil {
return -1, err
}
}
source, err := c.sftpClient.Open(remoteFile)
if err != nil {
return -1, fmt.Errorf("sftp连接打开文件异常: %w", err)
}
defer source.Close()
target, err := os.OpenFile(localFile, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return -1, fmt.Errorf("打开本地文件异常: %w,文件路径:%v", err, localFile)
}
defer target.Close()
n, err := io.Copy(target, source)
if err != nil {
return -1, fmt.Errorf("write file error: %w", err)
}
return int(n), nil
}
// UploadFile 上传文件
func (c Client) UploadFile(localFile, remoteFileName string) (int, error) {
if c.sshClient == nil {
if err := c.Connect(); err != nil {
return -1, err
}
}
file, err := os.Open(localFile)
if nil != err {
return -1, fmt.Errorf("open local file failed: %w", err)
}
defer file.Close()
ftpFile, err := c.sftpClient.Create(remoteFileName)
if nil != err {
return -1, fmt.Errorf("Create remote path failed: %w", err)
}
defer ftpFile.Close()
fileByte, err := ioutil.ReadAll(file)
if nil != err {
return -1, fmt.Errorf("read local file failed: %w", err)
}
ftpFile.Write(fileByte)
return 0, nil
}
部署逻辑
/*
@Author : wyx
@Desc : node_exporter自动部署逻辑
*/
// socket导的是这个包 "github.com/gorilla/websocket"
var upgrader = websocket.Upgrader{
ReadBufferSize: 1024,
WriteBufferSize: 1024,
CheckOrigin: func(r *http.Request) bool {
return true
},
}
func NodeDeploy(c *gin.Context){
conn, err := upgrader.Upgrade(c.Writer, c.Request, nil)
resultChan := make(chan interface{})
defer conn.Close()
defer close(resultChan)
for {
var (
res tools.SocketResult
params []tools.SocketsParam
)
_, message, err := conn.ReadMessage()
if err != nil {
log.Infof("Failed to read WebSocket message:", err)
break
}
err = json.Unmarshal(message, ¶ms)
if err != nil {
res = tools.SocketResult{
Type: "error",
Host: "all",
Message: "参数解析失败" + err.Error()}
log.Errorf("解析webSocket参数失败:%v", err)
err = conn.WriteJSON(res)
return
}
for _, param := range params {
go func(param tools.SocketsParam) {
var (
backInfo string
errs error
fileName string
packageName string
resStr string
res tools.SocketResult
port int
)
client := ssh.NewSSHClient("root", param.Pass, param.Host, "22")
fileName = "deploy.sh"
backInfo, errs = client.Run(tools.ServerTypeCmd)
if errs != nil {
resStr = fmt.Sprintf("【%s】执行命令【%s】时发送异常:%s",
param.Host,
tools.ServerTypeCmd,
errs)
res.Type = "error"
res.Host = param.Host
res.Message = resStr
log.Infof("【%v】执行【%v】时异常:%v", param.Host, tools.ServerTypeCmd, errs)
resultChan <- res
return
} else {
resStr = fmt.Sprintf("【%s】判断服务器类型成功", param.Host)
res.Type = "success"
res.Host = param.Host
res.Message = resStr
resultChan <- res
}
if strings.Contains(backInfo, tools.ServerTypeX86) {
packageName = tools.NodeExporterPackageNameAMD64
} else if strings.Contains(backInfo, tools.ServerTypeARM) {
packageName = tools.NodeExporterPackageNameARM64
}
//将部署脚本文件上传至目标服务器
_, errs = client.UploadFile(tools.LocalScriptPath,tools.DeployScriptPath+fileName)
if errs != nil {
resStr = fmt.Sprintf("【%s】上传部署文件时发生异常:%s", param.Host, errs)
res.Type = "error"
res.Host = param.Host
res.Message = resStr
resultChan <- res
log.Errorf("【%v】上传文件失败:%v", param.Host, errs)
return
} else {
resStr = fmt.Sprintf("【%s】部署脚本上传成功", param.Host)
res.Type = "success"
res.Host = param.Host
res.Message = resStr
resultChan <- res
}
// 执行部署命令
if viper.GetString("settings.consul.port") == "" {
port = tools.NodeExporterExportPort
} else {
port = viper.GetInt("settings.consul.port")
}
// 构造部署命令
s := fmt.Sprintf("cd %s ;sh %s %s %s %d",
tools.DeployScriptPath,
fileName,
viper.GetString("settings.consul.packageUrl"),
packageName,
port)
// 执行部署脚本
backInfo, errs = client.Run(s)
if errs != nil {
resStr = fmt.Sprintf("【%s】部署时发生异常:%s", param.Host, errs)
res.Type = "error"
res.Host = param.Host
resultChan <- res
log.Errorf("【%v】node_exporter部署时发生异常:%v", param.Host, errs)
return
}
// 部署成功向consul注册节点信息
if strings.Contains(backInfo, "running") {
resStr = fmt.Sprintf("【%s】部署脚本执行成功", param.Host)
res.Type = "success"
res.Host = param.Host
res.Message = resStr
resultChan <- res
//向consul发起注册请求
https := resty.New()
_, errs = https.R().SetBody(
tools.Consul{
Name: "node-exporter",
Id: param.Host,
Address: param.Host,
Port: port}).
Put("htp://"+viper.GetString("settings.consul.clusterAddr")+"/v1/agent/service/register")
if errs != nil {
resStr = fmt.Sprintf("节点【%s】注册consul时发生异常:%s",param.Host,errs)
res.Host = param.Host
res.Type = "error"
res.Message = resStr
resultChan <- res
log.Errorf("节点【%v】向consul注册时发生异常:%v", param.Host, errs)
return
} else {
resStr = fmt.Sprintf("【%s】注册consul节点成功", param.Host)
res.Type = "success"
res.Host = param.Host
res.Message = resStr
resultChan <- res
}
}
}(param)
}
for i:= 0; i < len(params) * 5; i++{
select {
case data := <- resultChan:
err = conn.WriteJSON(data.(tools.SocketResult))
log.Info(data)
}
}
}
}
脚本文件
#!/bin/bash
#拉取tar包的地址
ip=$1
#tar包名称
tarName=$2
#暴露的端口
port=$3
cd /opt
wget ${ip}/${tarName}.tar.gz
tar -zvxf ${tarName}.tar.gz >/dev/null 2>&1
cd ${tarName}
mv node_exporter /usr/local/bin/
cat > /etc/systemd/system/node_exporter.service <<EOF
[Unit]
Description=node_exporter
After=network.target
[Service]
Type=simple
User=root
# 指定启动的端口和日志级别
ExecStart=/usr/local/bin/node_exporter --web.listen-address=:${port} --log.level=error
MemoryLimit=300M
CPUQuota=100%
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl start node_exporter && systemctl enable node_exporter && systemctl status node_exporter
前端逻辑
前端使用的是vue框架,具体的样式及标签文件就不写了,只贴几个socket连接的方法
initWebSocketConnect(){
if (typeof WebSocket === 'undefined') {
//这里用的是element的message,需要引入
this.$message('当前浏览器不支持webSocket,建议更换浏览器')
}else{
const socketApi = 'localhost:8000/websoket'
this.webSocket = new WebSocket('ws://' + socketApi)
this.webSocket.onopen = this.webSocketOnOpen
this.webSocket.onmessage = this.websocketOnMessage
this.webSocket.onclose = this.websocketClose
}
}
webSocketOnOpen() {
console.log('webSocket已连接')
},
websocketOnMessage() {
console.log('socket返回数据:', e.data)
}
websocketClose(){
console.log('webSocket连接已经关闭')
}
//向后端发送数据
sendMessage(){
const deployInfo = []
deployInfo.push({
host: '192.168.1.1',
pass: 'xxxx'
})
deployInfo.push({
host: '192.168.1.2',
pass: 'xxxx'
})
this.webSocket.send(JSON.stringify(deployInfo))
}
//socket连接需要显示的调用.close()方法,否则连接建立后,会一直存活
closeConn(){
this.webSocket.close()
}
consul配置
consul其实没啥需要配置的,只要跑一个单节点或者集群的consul即可(建议跑集群的,稳定性能高一点)
向consul注册节点信息使用的是/v1/agent/service/register
这个接口
销毁节点用的是/v1/agent/service/deregister/:id
id就是你要销毁的节点id
当注册节点被调用后,我们在consul的ui页面上可以看到对应的节点信息
vm(prometheus)配置
对于vm我们需要在他的配置文件中加上consul自动发现的job即可
我们在ui页面上可以通过这个job_name来查询到我们自动发现的监控信息了