1. reportStart()函数上报本机信息给ams,从该函数开始
func main() {
...
if config.Config.Enable.Report {
reportStart()
}
...
2. 定时循环上报
func reportStart() {
go report.LoopReport()
}
func LoopReport() {
duration := time.Duration(config.Config.Report.Interval) * time.Second
for {
time.Sleep(duration)
if err := report(); err != nil {
logger.Error("report occur error: ", err)
}
}
}
根据前面配置文件解析内容可知config.Config.Report.Interval是从agent.yml获取到的数据,即每10秒上报一次
report:
# 调用ams的接口上报数据,需要ams的token
token: ams-builtin-token
# 上报周期,单位是秒
interval: 10
3. 上报任务
func report() error {
name, err := os.Hostname()
if err != nil {
return fmt.Errorf("cannot get hostname: %s", err)
}
//获取上报信息,使用config.Config.Report.Fields,
//可见是根据agent.yml中report:fields上报数据的,
//只需要在agent.yml中添加自定义的上报信息即可
fields, err := gatherFields(config.Config.Report.Fields)
if err != nil {
return err
}
...
}
func gatherFields(m map[string]string) (map[string]string, error) {
ret := make(map[string]string)
for k, v := range m {
//获取具体数据
output, err := exec(v)
if err != nil {
logger.Errorf("get %s by exec %v err:%v", k, v, err)
continue
}
ret[k] = output
}
return ret, nil
}
func exec(shell string) (string, error) {
//虽然直接根据agent.yml文件report:fields字段也能猜测自定义字段也是一条shell命令
//但本处直接确定就是使用shell
out, err := sys.CmdOutTrim("sh", "-c", shell)
if err != nil {
return "", fmt.Errorf("cannot exec `%s', error: %v", shell, err)
}
return out, nil
}
4. 根据前面内容已知添加自定义字段需要在agent.yml的report:fields字段添加,key为字段名,value为shell语句
fields:
cpu: cat /proc/cpuinfo | grep processor | wc -l
mem: cat /proc/meminfo | grep MemTotal | awk '{printf "%dGi", $2/1024/1024}'
disk: df -m | grep '/dev/' | grep -v '/var/lib' | grep -v tmpfs | awk '{sum += $2};END{printf "%dGi", sum/1024}'
// 模拟需要添加的是获取本机架构
arch: uname -i
5. 上报url
func report() error {
...
//封装成如下格式上报
form := hostRegisterForm{
SN: SN,
IP: IP,
Ident: Ident,
Name: name,
Cate: config.Config.Report.Cate,
UniqKey: config.Config.Report.UniqKey,
Fields: fields,
}
content := form.SN + form.IP + form.Ident + form.Name + form.Cate + form.UniqKey
var keys []string
for key := range fields {
keys = append(keys, key, fields[key])
}
sort.Strings(keys)
for _, key := range keys {
content += fields[key]
}
form.Digest = str.MD5(content)
//要获取ams的ip,说明会上报到ams,如果有多个ams,每个ams都会接收到数据
servers := address.GetHTTPAddresses("ams")
for _, i := range rand.Perm(len(servers)) {
//ams的v1/ams-ce/hosts/register,接下来需要去到ams的该url看如何获取数据
url := fmt.Sprintf("http://%s/v1/ams-ce/hosts/register", servers[i])
logger.Debugf("report: %+v", form)
var body errRes
err := httplib.Post(url).JSONBodyQuiet(form).Header("X-Srv-Token", config.Config.Report.Token).SetTimeout(time.Second * 5).ToJSON(&body)
if err != nil {
js, _ := json.Marshal(form)
logger.Errorf("report payload: %s, token: %s", string(js), config.Config.Report.Token)
return fmt.Errorf("curl %s fail: %v", url, err)
}
if body.Err != "" {
return fmt.Errorf(body.Err)
}
return nil
}
return fmt.Errorf("all server instance is dead")
}
6. ams接收数据
//直奔路由,其他暂且不考虑
func main() {
...
http.Start()
...
}
func Start() {
...
Config(r)
...
}
func Config(r *gin.Engine) {
...
v1 := r.Group("/v1/ams-ce").Use(shouldBeService())
{
v1.POST("/hosts/register", v1HostRegister)
}
...
}
// agent主动上报注册信息
func v1HostRegister(c *gin.Context) {
...
//判断是否已有该主机,无则添加,有则更新
host, err := models.HostGet(f.UniqKey+" = ?", uniqValue)
dangerous(err)
hFixed := map[string]struct{}{
"cpu": struct{}{},
"mem": struct{}{},
"disk": struct{}{},
//由mapKeyClear函数可知,需要将添加的自定义字段在这儿加上,否则无法保存
"arch": struct{}{},
}
//作用是: hFixed中没有的key,即使上报上来的有该key,也不保存
mapKeyClear(f.Fields, hFixed)
if host == nil {
msg := "create host failed"
host, err = models.HostNew(f.SN, f.IP, f.Ident, f.Name, f.Cate, f.Fields)
if err != nil {
logger.Error(err)
renderMessage(c, msg)
return
}
if host == nil {
logger.Errorf("%s, report info:%v", msg, f)
renderMessage(c, msg)
return
}
} else {
f.Fields["sn"] = f.SN
f.Fields["ip"] = f.IP
f.Fields["ident"] = f.Ident
f.Fields["name"] = f.Name
f.Fields["cate"] = f.Cate
f.Fields["clock"] = time.Now().Unix()
err = host.Update(f.Fields)
if err != nil {
logger.Error(err)
msg := "update host err"
renderMessage(c, msg)
return
}
}
...
}
// mapKeyClear map key clear
func mapKeyClear(src map[string]interface{}, save map[string]struct{}) {
var dels []string
for k := range src {
if _, ok := save[k]; !ok {
dels = append(dels, k)
}
}
for i := 0; i < len(dels); i++ {
delete(src, dels[i])
}
}
func HostNew(sn, ip, ident, name, cate string, fields map[string]interface{}) (*Host, error) {
host := new(Host)
host.SN = sn
host.IP = ip
host.Ident = ident
host.Name = name
host.Cate = cate
host.Clock = time.Now().Unix()
session := DB["ams"].NewSession()
defer session.Close()
if err := session.Begin(); err != nil {
return nil, err
}
//先插入一条数据,不包括fields
if _, err := session.Insert(host); err != nil {
session.Rollback()
return nil, err
}
//如果fields字段不为空,则更新前面插入的数据
if len(fields) > 0 {
//xorm update方法,要求fields中的字段Host中存在才能更新,
//因此需要在Host中加入本次测试的arch字段,因n9e不是使用结构体与数据库直接同步的方式创建数据库表,
//因此还需要在ams-xxx-.sql的Host表中添加一个字段arch
if _, err := session.Table(new(Host)).ID(host.Id).Update(fields); err != nil {
session.Rollback()
return nil, err
}
}
err := session.Commit()
return host, err
}
type Host struct {
...
Arch string `json:"arch"`
}
7. 更新ams数据库表结构,可在sql/update/下新建ams升级文件(由sql/n9e_ams.sql拷贝得来),比如n9e_ams-v3.5.0.sql, 加入arch行
CREATE TABLE `host`
(
`id` int unsigned not null AUTO_INCREMENT,
...
`arch` char(15) not null default '',
...
)
8. 同步到数据库
mysql -uroot -p1234 < n9e_ams-v3.5.0.sql
9. 重新build ams,重启ams和agent即可获取到数据