zabbix源码之zabbix alerter.c报警逻辑

最新推荐文章于 2024-05-08 07:48:04 发布

Zhao_S

最新推荐文章于 2024-05-08 07:48:04 发布

阅读量1.9k

点赞数

分类专栏： zabbix 文章标签： zabbix 源码

zabbix 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

首先需要介绍的时execute_action 函数，这个函数是对于action相关联的mediatype，已经向db反馈成功还是失败的状态。

那我们根据在当初配置的media，做不同的动作。比如是MEDIA_TYPE_EMAIL的话，那就连接stmp，发送邮件。如果是EXEC的话，那就是fork进程，也就是第三方的系统调用。

 
Python
 
         1 
       
         2 
       
         3 
       
         4 
       
         5 
       
         6 
       
         7 
       
         8 
       
         9 
       
         10 
       
         11 
       
         12 
       
         13 
       
         14 
       
         15 
       
         16 
       
         17 
       
         18 
       
         19 
       
         20 
       
         21 
       
         22 
       
         23 
       
         24 
       
         25 
       
         26 
       
         27 
       
         28 
       
        if 
          
        ( 
        MEDIA_TYPE_EMAIL 
          
        == 
          
        mediatype 
        -> 
        type 
        ) 
       
        { 
       
        alarm 
        ( 
        ALARM_ACTION_TIMEOUT 
        ) 
        ; 
       
        res 
          
        = 
          
        send_email 
        ( 
        mediatype 
        -> 
        smtp_server 
        , 
          
        mediatype 
        -> 
        smtp_helo 
        , 
          
        mediatype 
        -> 
        smtp_email 
        , 
       
        alert 
        -> 
        sendto 
        , 
          
        alert 
        -> 
        subject 
        , 
          
        alert 
        -> 
        message 
        , 
          
        error 
        , 
          
        max_error_len 
        ) 
        ; 
       
        alarm 
        ( 
        0 
        ) 
        ; 
       
        } 
       
        #ifdef HAVE_JABBER 
       
        else 
          
        if 
          
        ( 
        MEDIA_TYPE_JABBER 
          
        == 
          
        mediatype 
        -> 
        type 
        ) 
       
        { 
       
        / 
        * 
          
        Jabber  
        uses  
        its  
        own  
        timeouts 
          
        * 
        / 
       
        res 
          
        = 
          
        send_jabber 
        ( 
        mediatype 
        -> 
        username 
        , 
          
        mediatype 
        -> 
        passwd 
        , 
       
        alert 
        -> 
        sendto 
        , 
          
        alert 
        -> 
        subject 
        , 
          
        alert 
        -> 
        message 
        , 
          
        error 
        , 
          
        max_error_len 
        ) 
        ; 
       
        } 
       
        #endif 
       
        else 
          
        if 
          
        ( 
        MEDIA_TYPE_SMS 
          
        == 
          
        mediatype 
        -> 
        type 
        ) 
       
        { 
       
        / 
        * 
          
        SMS  
        uses  
        its  
        own  
        timeouts 
          
        * 
        / 
       
        res 
          
        = 
          
        send_sms 
        ( 
        mediatype 
        -> 
        gsm_modem 
        , 
          
        alert 
        -> 
        sendto 
        , 
          
        alert 
        -> 
        message 
        , 
          
        error 
        , 
          
        max_error_len 
        ) 
        ; 
       
        } 
       
        else 
          
        if 
          
        ( 
        MEDIA_TYPE_EZ_TEXTING 
          
        == 
          
        mediatype 
        -> 
        type 
        ) 
       
        { 
       
        / 
        * 
          
        Ez  
        Texting  
        uses  
        its  
        own  
        timeouts 
          
        * 
        / 
       
        res 
          
        = 
          
        send_ez_texting 
        ( 
        mediatype 
        -> 
        username 
        , 
          
        mediatype 
        -> 
        passwd 
        , 
       
        alert 
        -> 
        sendto 
        , 
          
        alert 
        -> 
        message 
        , 
          
        mediatype 
        -> 
        exec_path 
        , 
          
        error 
        , 
          
        max_error_len 
        ) 
        ; 
       
        } 
       
        else 
          
        if 
          
        ( 
        MEDIA_TYPE_EXEC 
          
        == 
          
        mediatype 
        -> 
        type 
        )

这下面是zabbix里面具体调用scripts脚本的过程。

Python
 
         1 
       
         2 
       
         3 
       
         4 
       
         5 
       
         6 
       
         7 
       
         8 
       
         9 
       
         10 
       
         11 
       
         12 
       
         13 
       
         14 
       
         15 
       
         16 
       
         17 
       
         18 
       
         19 
       
         20 
       
         21 
       
         22 
       
         23 
       
         24 
       
         25 
       
         26 
       
        if 
          
        ( 
        0 
          
        == 
          
        access 
        ( 
        cmd 
        , 
          
        X_OK 
        ) 
        ) 
       
        { 
       
        send_to 
          
        = 
          
        zbx_dyn_escape_string 
        ( 
        alert 
        -> 
        sendto 
        , 
          
        "\"\\" 
        ) 
        ; 
       
        subject 
          
        = 
          
        zbx_dyn_escape_string 
        ( 
        alert 
        -> 
        subject 
        , 
          
        "\"\\" 
        ) 
        ; 
       
        message 
          
        = 
          
        zbx_dyn_escape_string 
        ( 
        alert 
        -> 
        message 
        , 
          
        "\"\\" 
        ) 
        ; 
       
        zbx_snprintf_alloc 
        ( 
        & 
        cmd 
        , 
          
        & 
        cmd_alloc 
        , 
          
        & 
        cmd_offset 
        , 
          
        " \"%s\" \"%s\" \"%s\"" 
        , 
       
        send_to 
        , 
          
        subject 
        , 
          
        message 
        ) 
        ; 
       
        zbx_free 
        ( 
        send_to 
        ) 
        ; 
       
        zbx_free 
        ( 
        subject 
        ) 
        ; 
       
        zbx_free 
        ( 
        message 
        ) 
        ; 
       
        if 
          
        ( 
        SUCCEED 
          
        == 
          
        ( 
        res 
          
        = 
          
        zbx_execute 
        ( 
        cmd 
        , 
          
        & 
        output 
        , 
          
        error 
        , 
          
        max_error_len 
        , 
          
        ALARM_ACTION_TIMEOUT 
        ) 
        ) 
        ) 
       
        { 
       
        zabbix_log 
        ( 
        LOG_LEVEL_DEBUG 
        , 
          
        "%s output:\n%s" 
        , 
          
        mediatype 
        -> 
        exec_path 
        , 
          
        output 
        ) 
        ; 
       
        zbx_free 
        ( 
        output 
        ) 
        ; 
       
        } 
       
        else 
       
        res 
          
        = 
          
        FAIL 
        ; 
       
        } 
       
        else 
       
        zbx_snprintf 
        ( 
        error 
        , 
          
        max_error_len 
        , 
          
        "%s: %s" 
        , 
          
        cmd 
        , 
          
        zbx_strerror 
        ( 
        errno 
        ) 
        ) 
        ; 
       
        zbx_free 
        ( 
        cmd 
        ) 
        ;

上面的是关于触发action的相关函数，那肯定还有个在一直调用execute_action函数的主main函数吧。他的函数名字是 main_alerter_loop 。既然是loop，那就知道他是做啥的了，逻辑很简单，zabbix_server 启动后，fork出main_alerter_loop函数来，让他独立负责报警这件事情。

关于zabbix日志记录逻辑:

 
Python
 
         1 
       
         2 
       
         3 
       
        zabbix_log 
        ( 
        LOG_LEVEL_INFORMATION 
        , 
          
        "%s #%d started [%s #%d]" 
        , 
          
        get_daemon_type_string 
        ( 
        daemon_type 
        ) 
        , 
       
        server_num 
        , 
          
        get_process_type_string 
        ( 
        process_type 
        ) 
        , 
          
        process_num 
        ) 
        ;

创建一个DB连接的对象

Python
 
         1 
       
         2 
       
        DBconnect 
        ( 
        ZBX_DB_CONNECT_NORMAL 
        ) 
        ;

通过Mysql查询alerts未发送的任务，通过media查到行为的方式。

 
Python
 
         1 
       
         2 
       
         3 
       
         4 
       
         5 
       
         6 
       
         7 
       
         8 
       
         9 
       
         10 
       
         11 
       
         12 
       
        result 
          
        = 
          
        DBselect 
        ( 
       
        "select a.alertid,a.mediatypeid,a.sendto,a.subject,a.message,a.status,mt.mediatypeid," 
       
        "mt.type,mt.description,mt.smtp_server,mt.smtp_helo,mt.smtp_email,mt.exec_path," 
       
        "mt.gsm_modem,mt.username,mt.passwd,a.retries" 
       
        " from alerts a,media_type mt" 
       
        " where a.mediatypeid=mt.mediatypeid" 
       
        " and a.status=%d" 
       
        " and a.alerttype=%d" 
       
        " order by a.alertid" 
        , 
       
        ALERT_STATUS_NOT_SENT 
        , 
       
        ALERT_TYPE_MESSAGE 
        ) 
        ;

他是一次性的把没有发送，也就是未执行的报警任务，都给取出来，然后传递给execute_action去处理报警的逻辑。

Python
 
         1 
       
         2 
       
         3 
       
         4 
       
         5 
       
         6 
       
         7 
       
         8 
       
         9 
       
         10 
       
         11 
       
         12 
       
         13 
       
         14 
       
         15 
       
         16 
       
         17 
       
         18 
       
         19 
       
         20 
       
         21 
       
         22 
       
         23 
       
        ZBX_STR2UINT64 
        ( 
        alert 
        . 
        alertid 
        , 
          
        row 
        [ 
        0 
        ] 
        ) 
        ; 
       
        ZBX_STR2UINT64 
        ( 
        alert 
        . 
        mediatypeid 
        , 
          
        row 
        [ 
        1 
        ] 
        ) 
        ; 
       
        alert 
        . 
        sendto 
          
        = 
          
        row 
        [ 
        2 
        ] 
        ; 
       
        alert 
        . 
        subject 
          
        = 
          
        row 
        [ 
        3 
        ] 
        ; 
       
        alert 
        . 
        message 
          
        = 
          
        row 
        [ 
        4 
        ] 
        ; 
       
        alert 
        . 
        status 
          
        = 
          
        atoi 
        ( 
        row 
        [ 
        5 
        ] 
        ) 
        ; 
       
        ZBX_STR2UINT64 
        ( 
        mediatype 
        . 
        mediatypeid 
        , 
          
        row 
        [ 
        6 
        ] 
        ) 
        ; 
       
        mediatype 
        . 
        type 
          
        = 
          
        atoi 
        ( 
        row 
        [ 
        7 
        ] 
        ) 
        ; 
       
        mediatype 
        . 
        description 
          
        = 
          
        row 
        [ 
        8 
        ] 
        ; 
       
        mediatype 
        . 
        smtp_server 
          
        = 
          
        row 
        [ 
        9 
        ] 
        ; 
       
        mediatype 
        . 
        smtp_helo 
          
        = 
          
        row 
        [ 
        10 
        ] 
        ; 
       
        mediatype 
        . 
        smtp_email 
          
        = 
          
        row 
        [ 
        11 
        ] 
        ; 
       
        mediatype 
        . 
        exec_path 
          
        = 
          
        row 
        [ 
        12 
        ] 
        ; 
       
        mediatype 
        . 
        gsm_modem 
          
        = 
          
        row 
        [ 
        13 
        ] 
        ; 
       
        mediatype 
        . 
        username 
          
        = 
          
        row 
        [ 
        14 
        ] 
        ; 
       
        mediatype 
        . 
        passwd 
          
        = 
          
        row 
        [ 
        15 
        ] 
        ; 
       
        alert 
        . 
        retries 
          
        = 
          
        atoi 
        ( 
        row 
        [ 
        16 
        ] 
        ) 
        ; 
       
        * 
        error 
          
        = 
          
        '\0' 
        ; 
       
        res 
          
        = 
          
        execute_action 
        ( 
        & 
        alert 
        , 
          
        & 
        mediatype 
        , 
          
        error 
        , 
          
        sizeof 
        ( 
        error 
        ) 
        ) 
        ;

虽然有不同的触发动作，但是返回值的状态都一样。下面的逻辑，是判断返回状态，入库或者是debug日志中。

 
Python
 
         1 
       
         2 
       
         3 
       
         4 
       
         5 
       
         6 
       
         7 
       
         8 
       
         9 
       
         10 
       
         11 
       
         12 
       
         13 
       
         14 
       
         15 
       
         16 
       
         17 
       
         18 
       
         19 
       
         20 
       
         21 
       
         22 
       
         23 
       
         24 
       
         25 
       
         26 
       
         27 
       
         28 
       
         29 
       
         30 
       
         31 
       
         32 
       
         33 
       
         34 
       
        if 
          
        ( 
        SUCCEED 
          
        == 
          
        res 
        ) 
       
        { 
       
        zabbix_log 
        ( 
        LOG_LEVEL_DEBUG 
        , 
          
        "alert ID [" 
          
        ZBX_FS 
        _UI64 
          
        "] was sent successfully" 
        , 
       
        alert 
        . 
        alertid 
        ) 
        ; 
       
        DBexecute 
        ( 
        "update alerts set status=%d,error='' where alertid=" 
          
        ZBX_FS_UI64 
        , 
       
        ALERT_STATUS_SENT 
        , 
          
        alert 
        . 
        alertid 
        ) 
        ; 
       
        alerts_success 
        ++ 
        ; 
       
        } 
       
        else 
       
        { 
       
        zabbix_log 
        ( 
        LOG_LEVEL_DEBUG 
        , 
          
        "error sending alert ID [" 
          
        ZBX_FS 
        _UI64 
          
        "]" 
        , 
          
        alert 
        . 
        alertid 
        ) 
        ; 
       
        error_esc 
          
        = 
          
        DBdyn_escape_string_len 
        ( 
        error 
        , 
          
        ALERT_ERROR_LEN 
        ) 
        ; 
       
        alert 
        . 
        retries 
        ++ 
        ; 
       
        if 
          
        ( 
        ALERT_MAX_RETRIES 
          
        > 
          
        alert 
        . 
        retries 
        ) 
       
        { 
       
        DBexecute 
        ( 
        "update alerts set retries=%d,error='%s' where alertid=" 
          
        ZBX_FS_UI64 
        , 
       
        alert 
        . 
        retries 
        , 
          
        error_esc 
        , 
          
        alert 
        . 
        alertid 
        ) 
        ; 
       
        } 
       
        else 
       
        { 
       
        DBexecute 
        ( 
        "update alerts set status=%d,retries=%d,error='%s' where alertid=" 
          
        ZBX_FS_UI64 
        , 
       
        ALERT_STATUS_FAILED 
        , 
          
        alert 
        . 
        retries 
        , 
          
        error_esc 
        , 
          
        alert 
        . 
        alertid 
        ) 
        ; 
       
        } 
       
        zbx_free 
        ( 
        error_esc 
        ) 
        ; 
       
        alerts_fail 
        ++ 
        ; 
       
        } 
       
        }

最后的几段话意思是，统计时间及sleep 30秒后，再继续下一轮。

Python
 
         1 
       
         2 
       
         3 
       
         4 
       
         5 
       
         6 
       
         7 
       
         8 
       
        sec 
          
        = 
          
        zbx_time 
        ( 
        ) 
          
        - 
          
        sec 
        ; 
       
        zbx_setproctitle 
        ( 
        "%s [sent alerts: %d success, %d fail in " 
          
        ZBX_FS 
        _DBL 
          
        " sec, idle %d sec]" 
        , 
       
        get_process_type_string 
        ( 
        process_type 
        ) 
        , 
          
        alerts_success 
        , 
          
        alerts_fail 
        , 
          
        sec 
        , 
       
        CONFIG_SENDER_FREQUENCY 
        ) 
        ; 
       
        zbx_sleep_loop 
        ( 
        CONFIG_SENDER_FREQUENCY 
        ) 
        ;

通过server.c确定zabbix 每次alert间隔的时间了。

 
Python
 
         1 
       
         2 
       
         3 
       
         4 
       
        [ 
        xiaorui 
        @ 
        devops  
        zabbix 
        - 
        2.4.2 
          
        ] 
        $ 
          
        grep 
           
        'CONFIG_SENDER_FREQUENCY' 
           
        src 
        / 
        zabbix_server 
        / 
        server 
        . 
        c 
       
        int 
          
        CONFIG_SENDER_FREQUENCY 
          
        = 
          
        30 
        ; 
       
        [ 
        xiaorui 
        @ 
        devops  
        zabbix 
        - 
        2.4.2 
          
        ] 
        $

我先前一直好奇，他报警的时候，是不是串行的，先前也看了官方的介绍说是串行执行的。奇怪了看他的函数定义的地方，发现有多线程的逻辑，不知道为啥没有应用上，而是用while一直遍历数据库返回的列表数据。

有时间把zabbix的有关alert的代码做个patch，真的很想知道，把zabbix做成http报警后，在报警多的时候，会不会能力很突出。希望我的这篇文章，能对那些做zabbix二次开发的有所帮助。

Zhao_S

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
zabbix源码之zabbix alerter.c报警逻辑

首先需要介绍的时execute_action 函数，这个函数是对于action相关联的mediatype，已经向db反馈成功还是失败的状态。那我们根据在当初配置的media，做不同的动作。比如是MEDIA_TYPE_EMAIL的话，那就连接stmp，发送邮件。如果是EXEC的话，那就是fork进程，也就是第三方的系统调用。
复制链接

扫一扫

专栏目录