flume组件HTTPSource解析

HTTPSource是flume的一个专门监听http请求的组件,主要负责在机器中打开某个端口,接收日志请求,并将日志发送到chanel中。
HTTPSource的源码如下

package org.apache.flume.source.http;

import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import org.apache.flume.ChannelException;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDrivenSource;
import org.apache.flume.conf.Configurable;
import org.apache.flume.instrumentation.SourceCounter;
import org.apache.flume.source.AbstractSource;
import org.apache.flume.tools.HTTPServerConstraintUtil;
import org.mortbay.jetty.Connector;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.nio.SelectChannelConnector;
import org.mortbay.jetty.security.SslSocketConnector;
import org.mortbay.jetty.servlet.ServletHolder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.net.ssl.SSLServerSocket;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.net.ServerSocket;
import java.util.*;

/**
 * A source which accepts Flume Events by HTTP POST and GET. GET should be used
 * for experimentation only. HTTP requests are converted into flume events by a
 * pluggable "handler" which must implement the
 * {@linkplain HTTPSourceHandler} interface. This handler takes a
 * {@linkplain HttpServletRequest} and returns a list of flume events.
 *
 * The source accepts the following parameters: <p> <tt>port</tt>: port to which
 * the server should bind. Mandatory <p> <tt>handler</tt>: the class that
 * deserializes a HttpServletRequest into a list of flume events. This class
 * must implement HTTPSourceHandler. Default:
 * {@linkplain JSONHandler}. <p> <tt>handler.*</tt> Any configuration
 * to be passed to the handler. <p>
 *
 * All events deserialized from one Http request are committed to the channel in
 * one transaction, thus allowing for increased efficiency on channels like the
 * file channel. If the handler throws an exception this source will return
 * a HTTP status of 400. If the channel is full, or the source is unable to
 * append events to the channel, the source will return a HTTP 503 - Temporarily
 * unavailable status.
 *
 * A JSON handler which converts JSON objects to Flume events is provided.
 *
 */
public class HTTPSource extends AbstractSource implements
        EventDrivenSource, Configurable {
  /*
   * There are 2 ways of doing this:
   * a. Have a static server instance and use connectors in each source
   *    which binds to the port defined for that source.
   * b. Each source starts its own server instance, which binds to the source's
   *    port.
   *
   * b is more efficient than a because Jetty does not allow binding a
   * servlet to a connector. So each request will need to go through each
   * each of the handlers/servlet till the correct one is found.
   *
   */

  private static final Logger LOG = LoggerFactory.getLogger(HTTPSource.class);
  /**
  *监听的端口
  */
  private volatile Integer port;
  /**
  *监听器的名字
  */
  private volatile Server srv;
  /**
  *监听的地址
  */
  private volatile String host;
  /**
  *http的解析器
  */
  private HTTPSourceHandler handler;
  /**
  *全局对象
  */
  private SourceCounter sourceCounter;

  // SSL configuration variable
  /**
  *keyStore文件保存的地址
  */
  private volatile String keyStorePath;
  /**
  *keyStore打开的密码
  */
  private volatile String keyStorePassword;
  /**
  *是否使用ssl加密
  */
  private volatile Boolean sslEnabled;
  private final List<String> excludedProtocols = new LinkedList<String>();

/**
*初始化配置
*/
  @Override
  public void configure(Context context) {
    try {
      // SSL related config
      //是否使用ssl加密连接
      sslEnabled = context.getBoolean(HTTPSourceConfigurationConstants.SSL_ENABLED, false);
      //监听的端口
      port = context.getInteger(HTTPSourceConfigurationConstants.CONFIG_PORT);
      //监听的地址
      host = context.getString(HTTPSourceConfigurationConstants.CONFIG_BIND,
        HTTPSourceConfigurationConstants.DEFAULT_BIND);
     //执行检查
      Preconditions.checkState(host != null && !host.isEmpty(),
                "HTTPSource hostname specified is empty");
      Preconditions.checkNotNull(port, "HTTPSource requires a port number to be"
        + " specified");
      //获得http的解析类,如果没有使用默认的解析类
      String handlerClassName = context.getString(
              HTTPSourceConfigurationConstants.CONFIG_HANDLER,
              HTTPSourceConfigurationConstants.DEFAULT_HANDLER).trim();
      //是否使用ssl加密,如果使用读取keyStore的地址
      if(sslEnabled) {
        LOG.debug("SSL configuration enabled");
        keyStorePath = context.getString(HTTPSourceConfigurationConstants.SSL_KEYSTORE);
        Preconditions.checkArgument(keyStorePath != null && !keyStorePath.isEmpty(),
                                        "Keystore is required for SSL Conifguration" );
        keyStorePassword = context.getString(HTTPSourceConfigurationConstants.SSL_KEYSTORE_PASSWORD);
        Preconditions.checkArgument(keyStorePassword != null,
          "Keystore password is required for SSL Configuration");
        String excludeProtocolsStr = context.getString(HTTPSourceConfigurationConstants
          .EXCLUDE_PROTOCOLS);
        if (excludeProtocolsStr == null) {
          excludedProtocols.add("SSLv3");
        } else {
          excludedProtocols.addAll(Arrays.asList(excludeProtocolsStr.split(" ")));
          if (!excludedProtocols.contains("SSLv3")) {
            excludedProtocols.add("SSLv3");
          }
        }
      }



      @SuppressWarnings("unchecked")
      /**
      *实例化http解析类
      */
      Class<? extends HTTPSourceHandler> clazz =
              (Class<? extends HTTPSourceHandler>)
              Class.forName(handlerClassName);
      handler = clazz.getDeclaredConstructor().newInstance();
      //ref: http://docs.codehaus.org/display/JETTY/Embedding+Jetty
      //ref: http://jetty.codehaus.org/jetty/jetty-6/apidocs/org/mortbay/jetty/servlet/Context.html
      Map<String, String> subProps =
              context.getSubProperties(
              HTTPSourceConfigurationConstants.CONFIG_HANDLER_PREFIX);
      handler.configure(new Context(subProps));
    } catch (ClassNotFoundException ex) {
      LOG.error("Error while configuring HTTPSource. Exception follows.", ex);
      Throwables.propagate(ex);
    } catch (ClassCastException ex) {
      LOG.error("Deserializer is not an instance of HTTPSourceHandler."
              + "Deserializer must implement HTTPSourceHandler.");
      Throwables.propagate(ex);
    } catch (Exception ex) {
      LOG.error("Error configuring HTTPSource!", ex);
      Throwables.propagate(ex);
    }
    if (sourceCounter == null) {
      sourceCounter = new SourceCounter(getName());
    }
  }

  private void checkHostAndPort() {
    Preconditions.checkState(host != null && !host.isEmpty(),
      "HTTPSource hostname specified is empty");
    Preconditions.checkNotNull(port, "HTTPSource requires a port number to be"
      + " specified");
  }
/**
*开始监听
*/
  @Override
  public void start() {
    Preconditions.checkState(srv == null,
            "Running HTTP Server found in source: " + getName()
            + " before I started one."
            + "Will not attempt to start.");
    /**
    *实例化一个jetty服务器对象
    */
    srv = new Server();

    // Connector Array
    //实例化一个监听请求地址
    Connector[] connectors = new Connector[1];

    //是否打开ssl连接
    if (sslEnabled) {
      SslSocketConnector sslSocketConnector = new HTTPSourceSocketConnector(excludedProtocols);
      sslSocketConnector.setKeystore(keyStorePath);
      sslSocketConnector.setKeyPassword(keyStorePassword);
      sslSocketConnector.setReuseAddress(true);
      connectors[0] = sslSocketConnector;
    } else {
      SelectChannelConnector connector = new SelectChannelConnector();
      connector.setReuseAddress(true);
      connectors[0] = connector;
    }
    //设置监听的域名
    connectors[0].setHost(host);
    //设置监听的端口
    connectors[0].setPort(port);
    srv.setConnectors(connectors);
    try {
      org.mortbay.jetty.servlet.Context root =
        new org.mortbay.jetty.servlet.Context(
          srv, "/", org.mortbay.jetty.servlet.Context.SESSIONS);
          //设置servlet对象
      root.addServlet(new ServletHolder(new FlumeHTTPServlet()), "/");
      HTTPServerConstraintUtil.enforceConstraints(root);
      srv.start();
      Preconditions.checkArgument(srv.getHandler().equals(root));
    } catch (Exception ex) {
      LOG.error("Error while starting HTTPSource. Exception follows.", ex);
      Throwables.propagate(ex);
    }
    Preconditions.checkArgument(srv.isRunning());
    sourceCounter.start();
    super.start();
  }
//停止监听
  @Override
  public void stop() {
    try {
      srv.stop();
      srv.join();
      srv = null;
    } catch (Exception ex) {
      LOG.error("Error while stopping HTTPSource. Exception follows.", ex);
    }
    sourceCounter.stop();
    LOG.info("Http source {} stopped. Metrics: {}", getName(), sourceCounter);
  }
//实际监听处理类
  private class FlumeHTTPServlet extends HttpServlet {

    private static final long serialVersionUID = 4891924863218790344L;

    @Override
    public void doPost(HttpServletRequest request, HttpServletResponse response)
            throws IOException {
      List<Event> events = Collections.emptyList(); //create empty list
      try {
      //处理监听取得的对象
        events = handler.getEvents(request);
      } catch (HTTPBadRequestException ex) {
        LOG.warn("Received bad request from client. ", ex);
        response.sendError(HttpServletResponse.SC_BAD_REQUEST,
                "Bad request from client. "
                + ex.getMessage());
        return;
      } catch (Exception ex) {
        LOG.warn("Deserializer threw unexpected exception. ", ex);
        response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,
                "Deserializer threw unexpected exception. "
                + ex.getMessage());
        return;
      }
      sourceCounter.incrementAppendBatchReceivedCount();
      sourceCounter.addToEventReceivedCount(events.size());
      try {
      //将取得的处理结果放入chanel中,交给下一个节点
        getChannelProcessor().processEventBatch(events);
      } catch (ChannelException ex) {
        LOG.warn("Error appending event to channel. "
                + "Channel might be full. Consider increasing the channel "
                + "capacity or make sure the sinks perform faster.", ex);
        response.sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE,
                "Error appending event to channel. Channel might be full."
                + ex.getMessage());
        return;
      } catch (Exception ex) {
        LOG.warn("Unexpected error appending event to channel. ", ex);
        response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,
                "Unexpected error while appending event to channel. "
                + ex.getMessage());
        return;
      }
      //设置返回给客户端的消息
      response.setCharacterEncoding(request.getCharacterEncoding());
      response.setStatus(HttpServletResponse.SC_OK);
      response.flushBuffer();
      sourceCounter.incrementAppendBatchAcceptedCount();
      sourceCounter.addToEventAcceptedCount(events.size());
    }

    @Override
    public void doGet(HttpServletRequest request, HttpServletResponse response)
            throws IOException {
      doPost(request, response);
    }
  }

  private static class HTTPSourceSocketConnector extends SslSocketConnector {

    private final List<String> excludedProtocols;
    HTTPSourceSocketConnector(List<String> excludedProtocols) {
      this.excludedProtocols = excludedProtocols;
    }

    @Override
    public ServerSocket newServerSocket(String host, int port,
      int backlog) throws IOException {
      SSLServerSocket socket = (SSLServerSocket)super.newServerSocket(host,
        port, backlog);
      String[] protocols = socket.getEnabledProtocols();
      List<String> newProtocols = new ArrayList<String>(protocols.length);
      for(String protocol: protocols) {
        if (!excludedProtocols.contains(protocol)) {
          newProtocols.add(protocol);
        }
      }
      socket.setEnabledProtocols(
        newProtocols.toArray(new String[newProtocols.size()]));
      return socket;
    }
  }
}

HTTPSource默认配置保存在HTTPSourceConfigurationConstants

package org.apache.flume.source.http;

/**
 *
 */
public class HTTPSourceConfigurationConstants {

  public static final String CONFIG_PORT = "port";
  public static final String CONFIG_HANDLER = "handler";
  public static final String CONFIG_HANDLER_PREFIX =
          CONFIG_HANDLER + ".";
  public static final String CONFIG_BIND = "bind";

  public static final String DEFAULT_BIND = "0.0.0.0";

  public static final String DEFAULT_HANDLER =
          "org.apache.flume.source.http.JSONHandler";

  public static final String SSL_KEYSTORE = "keystore";
  public static final String SSL_KEYSTORE_PASSWORD = "keystorePassword";
  public static final String SSL_ENABLED = "enableSSL";
  public static final String EXCLUDE_PROTOCOLS = "excludeProtocols";

}

HTTPSourceHandler是httpsource的默认接口,只需要重写getEvents(HttpServletRequest request)方法即可

package org.apache.flume.source.http;

import java.util.List;
import javax.servlet.http.HttpServletRequest;
import org.apache.flume.Event;
import org.apache.flume.conf.Configurable;

/**
 *
 */
public interface HTTPSourceHandler extends Configurable {

  /**
   * Takes an {@linkplain HttpServletRequest} and returns a list of Flume
   * Events. If this request cannot be parsed into Flume events based on the
   * format this method will throw an exception. This method may also throw an
   * exception if there is some sort of other error. <p>
   *
   * @param request The request to be parsed into Flume events.
   * @return List of Flume events generated from the request.
   * @throws HTTPBadRequestException If the was not parsed correctly into an
   * event because the request was not in the expected format.
   * @throws Exception If there was an unexpected error.
   */
  public List<Event> getEvents(HttpServletRequest request) throws
          HTTPBadRequestException, Exception;

}

HTTPSourceHandler 的默认实现类是JSONHandler

package org.apache.flume.source.http;

import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonSyntaxException;
import com.google.gson.reflect.TypeToken;
import java.io.BufferedReader;
import java.lang.reflect.Type;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.List;
import javax.servlet.http.HttpServletRequest;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.event.JSONEvent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 *
 * JSONHandler for HTTPSource that accepts an array of events.
 *
 * This handler throws exception if the deserialization fails because of bad
 * format or any other reason.
 *
 *
 * Each event must be encoded as a map with two key-value pairs. <p> 1. headers
 * - the key for this key-value pair is "headers". The value for this key is
 * another map, which represent the event headers. These headers are inserted
 * into the Flume event as is. <p> 2. body - The body is a string which
 * represents the body of the event. The key for this key-value pair is "body".
 * All key-value pairs are considered to be headers. An example: <p> [{"headers"
 * : {"a":"b", "c":"d"},"body": "random_body"}, {"headers" : {"e": "f"},"body":
 * "random_body2"}] <p> would be interpreted as the following two flume events:
 * <p> * Event with body: "random_body" (in UTF-8/UTF-16/UTF-32 encoded bytes)
 * and headers : (a:b, c:d) <p> *
 * Event with body: "random_body2" (in UTF-8/UTF-16/UTF-32 encoded bytes) and
 * headers : (e:f) <p>
 *
 * The charset of the body is read from the request and used. If no charset is
 * set in the request, then the charset is assumed to be JSON's default - UTF-8.
 * The JSON handler supports UTF-8, UTF-16 and UTF-32.
 *
 * To set the charset, the request must have content type specified as
 * "application/json; charset=UTF-8" (replace UTF-8 with UTF-16 or UTF-32 as
 * required).
 *
 * One way to create an event in the format expected by this handler, is to
 * use {@linkplain JSONEvent} and use {@linkplain Gson} to create the JSON
 * string using the
 * {@linkplain Gson#toJson(java.lang.Object, java.lang.reflect.Type) }
 * method. The type token to pass as the 2nd argument of this method
 * for list of events can be created by: <p>
 *
 * Type type = new TypeToken<List<JSONEvent>>() {}.getType(); <p>
 *
 */

public class JSONHandler implements HTTPSourceHandler {

  private static final Logger LOG = LoggerFactory.getLogger(JSONHandler.class);
  private final Type listType =
          new TypeToken<List<JSONEvent>>() {
          }.getType();
  private final Gson gson;

  public JSONHandler() {
    gson = new GsonBuilder().disableHtmlEscaping().create();
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public List<Event> getEvents(HttpServletRequest request) throws Exception {
    BufferedReader reader = request.getReader();
    String charset = request.getCharacterEncoding();
    //UTF-8 is default for JSON. If no charset is specified, UTF-8 is to
    //be assumed.
    if (charset == null) {
      LOG.debug("Charset is null, default charset of UTF-8 will be used.");
      charset = "UTF-8";
    } else if (!(charset.equalsIgnoreCase("utf-8")
            || charset.equalsIgnoreCase("utf-16")
            || charset.equalsIgnoreCase("utf-32"))) {
      LOG.error("Unsupported character set in request {}. "
              + "JSON handler supports UTF-8, "
              + "UTF-16 and UTF-32 only.", charset);
      throw new UnsupportedCharsetException("JSON handler supports UTF-8, "
              + "UTF-16 and UTF-32 only.");
    }

    /*
     * Gson throws Exception if the data is not parseable to JSON.
     * Need not catch it since the source will catch it and return error.
     */
    List<Event> eventList = new ArrayList<Event>(0);
    try {
      eventList = gson.fromJson(reader, listType);
    } catch (JsonSyntaxException ex) {
      throw new HTTPBadRequestException("Request has invalid JSON Syntax.", ex);
    }

    for (Event e : eventList) {
      ((JSONEvent) e).setCharset(charset);
    }
    return getSimpleEvents(eventList);
  }

  @Override
  public void configure(Context context) {
  }

  private List<Event> getSimpleEvents(List<Event> events) {
    List<Event> newEvents = new ArrayList<Event>(events.size());
    for(Event e:events) {
      newEvents.add(EventBuilder.withBody(e.getBody(), e.getHeaders()));
    }
    return newEvents;
  }
}

source和chanel交换数据的默认类型是Event接口

package org.apache.flume;

import java.util.Map;

/**
 * Basic representation of a data object in Flume.
 * Provides access to data as it flows through the system.
 */
public interface Event {

  /**
   * Returns a map of name-value pairs describing the data stored in the body.
   */
  public Map<String, String> getHeaders();

  /**
   * Set the event headers
   * @param headers Map of headers to replace the current headers.
   */
  public void setHeaders(Map<String, String> headers);

  /**
   * Returns the raw byte array of the data contained in this event.
   */
  public byte[] getBody();

  /**
   * Sets the raw byte array of the data contained in this event.
   * @param body The data.
   */
  public void setBody(byte[] body);

}

httpsouce默认的Event实现类是JSONEvent

package org.apache.flume.event;

import java.io.UnsupportedEncodingException;
import java.util.Map;
import org.apache.flume.Event;
import org.apache.flume.FlumeException;

/**
 *
 */
public class JSONEvent implements Event{
  private Map<String, String> headers;
  private String body;
  private transient String charset = "UTF-8";

  @Override
  public Map<String, String> getHeaders() {
    return headers;
  }

  @Override
  public void setHeaders(Map<String, String> headers) {
    this.headers = headers;
  }

  @Override
  public byte[] getBody() {
    if(body != null) {
      try {
        return body.getBytes(charset);
      } catch (UnsupportedEncodingException ex) {
        throw new FlumeException(String.format("%s encoding not supported", charset), ex);
      }
    } else {
      return new byte[0];
    }

  }

  @Override
  public void setBody(byte[] body) {
    if(body != null) {
      this.body = new String(body);
    } else {
      this.body = "";
    }
  }

  public void setCharset(String charset) {
    this.charset = charset;
  }

}

接口source是所有source必须实现的接口,它继承至接口LifecycleAware, NamedComponent

package org.apache.flume;

import org.apache.flume.annotations.InterfaceAudience;
import org.apache.flume.annotations.InterfaceStability;
import org.apache.flume.channel.ChannelProcessor;
import org.apache.flume.lifecycle.LifecycleAware;

/**
 * <p>
 * A source generates {@plainlink Event events} and calls methods on the
 * configured {@link ChannelProcessor} to persist those events into the
 * configured {@linkplain Channel channels}.
 * </p>
 *
 * <p>
 * Sources are associated with unique {@linkplain NamedComponent names} that can
 * be used for separating configuration and working namespaces.
 * </p>
 *
 * <p>
 * No guarantees are given regarding thread safe access.
 * </p>
 *
 * @see org.apache.flume.Channel
 * @see org.apache.flume.Sink
 */
@InterfaceAudience.Public
@InterfaceStability.Stable
public interface Source extends LifecycleAware, NamedComponent {

  /**
   * Specifies which channel processor will handle this source's events.
   *
   * @param channelProcessor
   */
  public void setChannelProcessor(ChannelProcessor channelProcessor);

  /**
   * Returns the channel processor that will handle this source's events.
   */
  public ChannelProcessor getChannelProcessor();

}

使用httpsource可以很方便的满足客户监听日志的需求,只需要实现接口HTTPSourceHandler即可,所以如果想快速开发就可以使用httpsource来完成日常的开发!

  • 2
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值