Apache Eagle上线问题解决

之前研究了一下apache eagle,并在测试环境进行了部署测试,这次整理一下正式上线出现的问题和解决办法。

1、新增短信告警。
原生的eagle的告警方式包括邮件、kafka以及系统中记录,我自己在源码中新加了发短信的功能,重新编译后使用。新增和修改的代码如下:
(1)修改了NotificationConstnts.java文件,目录在源码包下的
apache-eagle-0.4.0-incubating-src/eagle-core/eagle-alert/eagle-alert-notification-plugin/src/main/java/org/apache/eagle/notification/base中。修改完的代码如下:

 public class NotificationConstants {
    public static final String NOTIFICATION_TYPE = "notificationType";
    public static final String EMAIL_NOTIFICATION = "email";
    public static final String PHONEMESSAGE_NOTIFICATION = "phonemessage";
    public static final String KAFKA_STORE = "kafka";
    public static final String EAGLE_STORE = "eagleStore";

    // email specific constants
    public static final String SUBJECT = "subject";
    public static final String SENDER = "sender";
    public static final String RECIPIENTS = "recipients";
    public static final String TPL_FILE_NAME = "tplFileName";

    // kafka specific constants
    public static final String TOPIC = "topic";
    public static final String BROKER_LIST = "kafka_broker";

    //phonemessage specific constants
    public static final String RECEIVE = "receive";
}

(2)新增类AlertPhoneMessagePlugin.java。放在apache-eagle-0.4.0-incubating-src/eagle-core/eagle-alert/eagle-alert-notification-plugin/src/main/java/org/apache/eagle/notification/plugin下。完整代码如下:

import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.eagle.alert.entity.AlertAPIEntity;
import org.apache.eagle.alert.entity.AlertDefinitionAPIEntity;
import org.apache.eagle.notification.base.NotificationConstants;
import org.apache.eagle.notification.base.NotificationStatus;
import org.apache.eagle.notification.phoneMessage.AlertPhoneMessageGenerator;
import org.apache.eagle.notification.phoneMessage.AlertPhoneMessageGeneratorBuilder;
import org.apache.eagle.notification.utils.NotificationPluginUtils;
import org.apache.eagle.policy.common.Constants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.typesafe.config.Config;

public class AlertPhoneMessagePlugin implements NotificationPlugin{

    private static final Logger LOG = LoggerFactory.getLogger(AlertPhoneMessagePlugin.class);
    private List<NotificationStatus> statusList = new Vector<>();
    private Map<String, List<AlertPhoneMessageGenerator>> phoneMessageGenerators = new ConcurrentHashMap<>();
    private Config config;

    @Override
    public void init(Config config, List<AlertDefinitionAPIEntity> initAlertDefs) throws Exception {
        LOG.info(" Creating PhoneMessage Generator... ");
        this.config = config;
        for( AlertDefinitionAPIEntity entity : initAlertDefs ) {
            List<Map<String,String>>  configMaps = NotificationPluginUtils.deserializeNotificationConfig(entity.getNotificationDef());
            this.update(entity.getTags().get(Constants.POLICY_ID), configMaps, false);
        }

    }

    @Override
    public void update(String policyId, List<Map<String, String>> notificationConfCollection, boolean isPolicyDelete)
            throws Exception {
        if( isPolicyDelete ){
            LOG.info(" Policy been deleted.. Removing reference from Notification Plugin ");
            this.phoneMessageGenerators.remove(policyId);
            return;
        }

        Vector<AlertPhoneMessageGenerator> generators = new Vector<>();
        for(Map<String, String> notificationConf: notificationConfCollection) {
            String notificationType = notificationConf.get(NotificationConstants.NOTIFICATION_TYPE);
            if(notificationType == null || notificationType.equalsIgnoreCase(NotificationConstants.PHONEMESSAGE_NOTIFICATION)) {
                AlertPhoneMessageGenerator generator = createPhoneMessageGenerator(notificationConf);
                generators.add(generator);
            }
        }
        if(generators.size() != 0) {
            this.phoneMessageGenerators.put(policyId, generators);
            LOG.info("created/updated phonemessage generators for policy " + policyId);
        }

    }

    @Override
    public void onAlert(AlertAPIEntity alertEntity) throws Exception {
        String policyId = alertEntity.getTags().get(Constants.POLICY_ID);
        List<AlertPhoneMessageGenerator> generators = this.phoneMessageGenerators.get(policyId);
        for(AlertPhoneMessageGenerator generator: generators) {
            boolean isSuccess = generator.sendAlertPhoneMessage(alertEntity);
            NotificationStatus status = new NotificationStatus();
            if( !isSuccess ) {
                status.errorMessage = "Failed to send email";
                status.successful = false;
            }else {
                status.errorMessage = "";
                status.successful = true;
            }
            this.statusList.add(status);
        }

    }

    @Override
    public List<NotificationStatus> getStatusList() {
        return this.statusList;
    }


    private AlertPhoneMessageGenerator createPhoneMessageGenerator(Map<String, String> notificationConfig) {
        AlertPhoneMessageGenerator gen = AlertPhoneMessageGeneratorBuilder.newBuilder().
                withEagleProps(this.config.getObject("eagleProps")).
                withReceove(notificationConfig.get(NotificationConstants.RECEIVE)).
                build();
        return gen;
    }

    @Override
    public int hashCode(){
        return new HashCodeBuilder().append(getClass().getCanonicalName()).toHashCode();
    }

    @Override
    public boolean equals(Object o){
        if(o == this)
            return true;
        if(!(o instanceof AlertPhoneMessagePlugin))
            return false;
        return true;
    }


}

(3)新建文件夹phoneMessage。放在apache-eagle-0.4.0-incubating-src/eagle-core/eagle-alert/eagle-alert-notification-plugin/src/main/java/org/apache/eagle/notification目录下。进入新建文件夹下新增两个类,分别是AlertPhoneMessageGeneratorBuilder.java和AlertPhoneMessageGenerator.java。完整代码如下:
AlertPhoneMessageGeneratorBuilder.java:

import com.typesafe.config.ConfigObject;

public class AlertPhoneMessageGeneratorBuilder {

    private AlertPhoneMessageGenerator generator;
    private AlertPhoneMessageGeneratorBuilder(){
        generator = new AlertPhoneMessageGenerator();
    }

    public static AlertPhoneMessageGeneratorBuilder newBuilder(){
        return new AlertPhoneMessageGeneratorBuilder();
    }

    public AlertPhoneMessageGeneratorBuilder withReceove(String receive){
        generator.setReceive(receive);
        return this;
    }
    public AlertPhoneMessageGeneratorBuilder withEagleProps(ConfigObject eagleProps) {
        generator.setEagleProps(eagleProps);
        return this;
    }

    public AlertPhoneMessageGenerator build(){
        return this.generator;
    }
}

AlertPhoneMessageGenerator.java:

import org.apache.eagle.alert.entity.AlertAPIEntity;
import org.apache.eagle.notification.plugin.SendAlert;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.typesafe.config.ConfigObject;

public class AlertPhoneMessageGenerator {

    private ConfigObject eagleProps;
    private String receive;

    private final static Logger LOG = LoggerFactory.getLogger(AlertPhoneMessageGenerator.class);


    public ConfigObject getEagleProps() {
        return eagleProps;
    }

    public void setEagleProps(ConfigObject eagleProps) {
        this.eagleProps = eagleProps;
    }

    public String getReceive() {
        return receive;
    }

    public void setReceive(String receive) {
        this.receive = receive;
    }

    public boolean sendAlertPhoneMessage(AlertAPIEntity alertEntity) {
        return sendAlertPhoneMessage(alertEntity, receive); 
    }

    private boolean sendAlertPhoneMessage(AlertAPIEntity alertEntity, String receive) {
         LOG.info("going to send message.....");
         SendAlert send =  new SendAlert();
                 String str = alertEntity.getAlertContext().substring(alertEntity.getAlertContext().indexOf("\"alertMessage\""), alertEntity.getAlertContext().indexOf("\"alertEvent\"")-1);
         return send.send(receive, "monitor", str);
    }

}

在AlertPhoneMessageGenerator.java类的sendAlerPhoneMessage方法中调用了SendAlert类的send方法发送短信,其中的str变量对alertEntity.getAlertContext()得到的字符串做了处理,作为发送短信的内容。由于发短信方式各公司不同,所以就不贴出SendAlert类的代码了。

(4)在eagle所使用的数据库中找到alertnotifications_alertnotifications这个表,新增一条记录。
uuid:WSdQ7H_____62aP_YA4exQXAReU
notificationType:phoneMessage
enable:1
description:send alert to phone
className:org.apache.eagle.notification.plugin.AlertPhoneMessagePlugin
fields:[{“name”:”receive”}] (与NotificationConstants类中新增的RECEIVE字段值对应)
这样添加完后,在eagle页面中的增加告警模块就增加了发送短信的设置,可在页面中填写短信发送的号码。
这里写图片描述

2、hive任务监控告警storm任务出错。
现象就是时不时在storm页面中,任务的Spouts报错,查看storm日志,报错是connection refused

2017-04-21T10:09:31.538+0800 o.a.e.j.c.RMResourceFetcher [INFO] Going to fetch job detail information for application_1492370390007_208973 , url: http://rm1:50030/proxy/application_1492370390007_208973/ws/v1/mapreduce/jobs?anonymous=true
2017-04-21T10:09:31.541+0800 o.a.e.j.c.RunningJobCrawlerImpl [ERROR] Got an exception when fetching resource, jobId: job_1492370390007_208973
java.net.ConnectException: Connection refused
        at java.net.PlainSocketImpl.socketConnect(Native Method) ~[na:1.7.0_80]
        at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:339) ~[na:1.7.0_80]
        at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:200) ~[na:1.7.0_80]
        at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:182) ~[na:1.7.0_80]
        at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) ~[na:1.7.0_80]
        at java.net.Socket.connect(Socket.java:579) ~[na:1.7.0_80]
        at sun.net.NetworkClient.doConnect(NetworkClient.java:175) ~[na:1.7.0_80]
        at sun.net.www.http.HttpClient.openServer(HttpClient.java:432) ~[na:1.7.0_80]
        at sun.net.www.http.HttpClient.openServer(HttpClient.java:527) ~[na:1.7.0_80]
        at sun.net.www.http.HttpClient.<init>(HttpClient.java:211) ~[na:1.7.0_80]
        at sun.net.www.http.HttpClient.New(HttpClient.java:308) ~[na:1.7.0_80]
        at sun.net.www.http.HttpClient.New(HttpClient.java:326) ~[na:1.7.0_80]
        at sun.net.www.protocol.http.HttpURLConnection.getNewHttpClient(HttpURLConnection.java:997) ~[na:1.7.0_80]
        at sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:933) ~[na:1.7.0_80]
        at sun.net.www.protocol.http.HttpURLConnection.connect(HttpURLConnection.java:851) ~[na:1.7.0_80]
        at sun.net.www.protocol.http.HttpURLConnection.followRedirect(HttpURLConnection.java:2411) ~[na:1.7.0_80]
        at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1558) ~[na:1.7.0_80]
        at org.apache.eagle.jobrunning.util.InputStreamUtils.openGZIPInputStream(InputStreamUtils.java:39) ~[stormjar.jar:na]
        at org.apache.eagle.jobrunning.util.InputStreamUtils.getInputStream(InputStreamUtils.java:51) ~[stormjar.jar:na]
        at org.apache.eagle.jobrunning.util.InputStreamUtils.getInputStream(InputStreamUtils.java:59) ~[stormjar.jar:na]
        at org.apache.eagle.jobrunning.crawler.RMResourceFetcher.doFetchRunningJobInfo(RMResourceFetcher.java:140) ~[stormjar.jar:na]
        at org.apache.eagle.jobrunning.crawler.RMResourceFetcher.getResource(RMResourceFetcher.java:257) ~[stormjar.jar:na]
        at org.apache.eagle.jobrunning.crawler.RunningJobCrawlerImpl.crawl(RunningJobCrawlerImpl.java:328) ~[stormjar.jar:na]
        at org.apache.eagle.jobrunning.storm.JobRunningSpout.nextTuple(JobRunningSpout.java:124) [stormjar.jar:na]
        at org.apache.eagle.datastream.storm.SpoutProxy.nextTuple(SpoutProxy.scala:42) [stormjar.jar:na]
        at backtype.storm.daemon.executor$fn__6579$fn__6594$fn__6623.invoke(executor.clj:565) [storm-core-0.9.5.jar:0.9.5]
        at backtype.storm.util$async_loop$fn__459.invoke(util.clj:463) [storm-core-0.9.5.jar:0.9.5]
        at clojure.lang.AFn.run(AFn.java:24) [clojure-1.5.1.jar:na]
        at java.lang.Thread.run(Thread.java:745) [na:1.7.0_80]

原因是,有些任务运行时间很短暂,当eagle去请求 http://rm1:50030/proxy/application_1492370390007_208973/ws/v1/mapreduce/jobs?anonymous=true地址时,如果这个任务运行完了,信息会到jobhistory上,该请求会直接转发到jobhistoty地址,而且是走的hostname不是ip,该计算节点没有配置jobhistory的hostname地址,所以无法请求到信息。如果这个任务没运行完,则返回的是xml文件不会重定向到jobhistory。所以出现了时不时出错的现象。

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值