HTTPSource是flume的一个专门监听http请求的组件,主要负责在机器中打开某个端口,接收日志请求,并将日志发送到chanel中。
HTTPSource的源码如下
package org.apache.flume.source.http;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import org.apache.flume.ChannelException;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDrivenSource;
import org.apache.flume.conf.Configurable;
import org.apache.flume.instrumentation.SourceCounter;
import org.apache.flume.source.AbstractSource;
import org.apache.flume.tools.HTTPServerConstraintUtil;
import org.mortbay.jetty.Connector;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.nio.SelectChannelConnector;
import org.mortbay.jetty.security.SslSocketConnector;
import org.mortbay.jetty.servlet.ServletHolder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.net.ssl.SSLServerSocket;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.net.ServerSocket;
import java.util.*;
/**
* A source which accepts Flume Events by HTTP POST and GET. GET should be used
* for experimentation only. HTTP requests are converted into flume events by a
* pluggable "handler" which must implement the
* {@linkplain HTTPSourceHandler} interface. This handler takes a
* {@linkplain HttpServletRequest} and returns a list of flume events.
*
* The source accepts the following parameters: <p> <tt>port</tt>: port to which
* the server should bind. Mandatory <p> <tt>handler</tt>: the class that
* deserializes a HttpServletRequest into a list of flume events. This class
* must implement HTTPSourceHandler. Default:
* {@linkplain JSONHandler}. <p> <tt>handler.*</tt> Any configuration
* to be passed to the handler. <p>
*
* All events deserialized from one Http request are committed to the channel in
* one transaction, thus allowing for increased efficiency on channels like the
* file channel. If the handler throws an exception this source will return
* a HTTP status of 400. If the channel is full, or the source is unable to
* append events to the channel, the source will return a HTTP 503 - Temporarily
* unavailable status.
*
* A JSON handler which converts JSON objects to Flume events is provided.
*
*/
public class HTTPSource extends AbstractSource implements
EventDrivenSource, Configurable {
/*
* There are 2 ways of doing this:
* a. Have a static server instance and use connectors in each source
* which binds to the port defined for that source.
* b. Each source starts its own server instance, which binds to the source's
* port.
*
* b is more efficient than a because Jetty does not allow binding a
* servlet to a connector. So each request will need to go through each
* each of the handlers/servlet till the correct one is found.
*
*/
private static final Logger LOG = LoggerFactory.getLogger(HTTPSource.class);
/**
*监听的端口
*/
private volatile Integer port;
/**
*监听器的名字
*/
private volatile Server srv;
/**
*监听的地址
*/
private volatile String host;
/**
*http的解析器
*/
private HTTPSourceHandler handler;
/**
*全局对象
*/
private SourceCounter sourceCounter;
// SSL configuration variable
/**
*keyStore文件保存的地址
*/
private volatile String keyStorePath;
/**
*keyStore打开的密码
*/
private volatile String keyStorePassword;
/**
*是否使用ssl加密
*/
private volatile Boolean sslEnabled;
private final List<String> excludedProtocols = new LinkedList<String>();
/**
*初始化配置
*/
@Override
public void configure(Context context) {
try {
// SSL related config
//是否使用ssl加密连接
sslEnabled = context.getBoolean(HTTPSourceConfigurationConstants.SSL_ENABLED, false);
//监听的端口
port = context.getInteger(HTTPSourceConfigurationConstants.CONFIG_PORT);
//监听的地址
host = context.getString(HTTPSourceConfigurationConstants.CONFIG_BIND,
HTTPSourceConfigurationConstants.DEFAULT_BIND);
//执行检查
Preconditions.checkState(host != null && !host.isEmpty(),
"HTTPSource hostname specified is empty");
Preconditions.checkNotNull(port, "HTTPSource requires a port number to be"
+ " specified");
//获得http的解析类,如果没有使用默认的解析类
String handlerClassName = context.getString(
HTTPSourceConfigurationConstants.CONFIG_HANDLER,
HTTPSourceConfigurationConstants.DEFAULT_HANDLER).trim();
//是否使用ssl加密,如果使用读取keyStore的地址
if(sslEnabled) {
LOG.debug("SSL configuration enabled");
keyStorePath = context.getString(HTTPSourceConfigurationConstants.SSL_KEYSTORE);
Preconditions.checkArgument(keyStorePath != null && !keyStorePath.isEmpty(),
"Keystore is required for SSL Conifguration" );
keyStorePassword = context.getString(HTTPSourceConfigurationConstants.SSL_KEYSTORE_PASSWORD);
Preconditions.checkArgument(keyStorePassword != null,
"Keystore password is required for SSL Configuration");
String excludeProtocolsStr = context.getString(HTTPSourceConfigurationConstants
.EXCLUDE_PROTOCOLS);
if (excludeProtocolsStr == null) {
excludedProtocols.add("SSLv3");
} else {
excludedProtocols.addAll(Arrays.asList(excludeProtocolsStr.split(" ")));
if (!excludedProtocols.contains("SSLv3")) {
excludedProtocols.add("SSLv3");
}
}
}
@SuppressWarnings("unchecked")
/**
*实例化http解析类
*/
Class<? extends HTTPSourceHandler> clazz =
(Class<? extends HTTPSourceHandler>)
Class.forName(handlerClassName);
handler = clazz.getDeclaredConstructor().newInstance();
//ref: http://docs.codehaus.org/display/JETTY/Embedding+Jetty
//ref: http://jetty.codehaus.org/jetty/jetty-6/apidocs/org/mortbay/jetty/servlet/Context.html
Map<String, String> subProps =
context.getSubProperties(
HTTPSourceConfigurationConstants.CONFIG_HANDLER_PREFIX);
handler.configure(new Context(subProps));
} catch (ClassNotFoundException ex) {
LOG.error("Error while configuring HTTPSource. Exception follows.", ex);
Throwables.propagate(ex);
} catch (ClassCastException ex) {
LOG.error("Deserializer is not an instance of HTTPSourceHandler."
+ "Deserializer must implement HTTPSourceHandler.");
Throwables.propagate(ex);
} catch (Exception ex) {
LOG.error("Error configuring HTTPSource!", ex);
Throwables.propagate(ex);
}
if (sourceCounter == null) {
sourceCounter = new SourceCounter(getName());
}
}
private void checkHostAndPort() {
Preconditions.checkState(host != null && !host.isEmpty(),
"HTTPSource hostname specified is empty");
Preconditions.checkNotNull(port, "HTTPSource requires a port number to be"
+ " specified");
}
/**
*开始监听
*/
@Override
public void start() {
Preconditions.checkState(srv == null,
"Running HTTP Server found in source: " + getName()
+ " before I started one."
+ "Will not attempt to start.");
/**
*实例化一个jetty服务器对象
*/
srv = new Server();
// Connector Array
//实例化一个监听请求地址
Connector[] connectors = new Connector[1];
//是否打开ssl连接
if (sslEnabled) {
SslSocketConnector sslSocketConnector = new HTTPSourceSocketConnector(excludedProtocols);
sslSocketConnector.setKeystore(keyStorePath);
sslSocketConnector.setKeyPassword(keyStorePassword);
sslSocketConnector.setReuseAddress(true);
connectors[0] = sslSocketConnector;
} else {
SelectChannelConnector connector = new SelectChannelConnector();
connector.setReuseAddress(true);
connectors[0] = connector;
}
//设置监听的域名
connectors[0].setHost(host);
//设置监听的端口
connectors[0].setPort(port);
srv.setConnectors(connectors);
try {
org.mortbay.jetty.servlet.Context root =
new org.mortbay.jetty.servlet.Context(
srv, "/", org.mortbay.jetty.servlet.Context.SESSIONS);
//设置servlet对象
root.addServlet(new ServletHolder(new FlumeHTTPServlet()), "/");
HTTPServerConstraintUtil.enforceConstraints(root);
srv.start();
Preconditions.checkArgument(srv.getHandler().equals(root));
} catch (Exception ex) {
LOG.error("Error while starting HTTPSource. Exception follows.", ex);
Throwables.propagate(ex);
}
Preconditions.checkArgument(srv.isRunning());
sourceCounter.start();
super.start();
}
//停止监听
@Override
public void stop() {
try {
srv.stop();
srv.join();
srv = null;
} catch (Exception ex) {
LOG.error("Error while stopping HTTPSource. Exception follows.", ex);
}
sourceCounter.stop();
LOG.info("Http source {} stopped. Metrics: {}", getName(), sourceCounter);
}
//实际监听处理类
private class FlumeHTTPServlet extends HttpServlet {
private static final long serialVersionUID = 4891924863218790344L;
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws IOException {
List<Event> events = Collections.emptyList(); //create empty list
try {
//处理监听取得的对象
events = handler.getEvents(request);
} catch (HTTPBadRequestException ex) {
LOG.warn("Received bad request from client. ", ex);
response.sendError(HttpServletResponse.SC_BAD_REQUEST,
"Bad request from client. "
+ ex.getMessage());
return;
} catch (Exception ex) {
LOG.warn("Deserializer threw unexpected exception. ", ex);
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,
"Deserializer threw unexpected exception. "
+ ex.getMessage());
return;
}
sourceCounter.incrementAppendBatchReceivedCount();
sourceCounter.addToEventReceivedCount(events.size());
try {
//将取得的处理结果放入chanel中,交给下一个节点
getChannelProcessor().processEventBatch(events);
} catch (ChannelException ex) {
LOG.warn("Error appending event to channel. "
+ "Channel might be full. Consider increasing the channel "
+ "capacity or make sure the sinks perform faster.", ex);
response.sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE,
"Error appending event to channel. Channel might be full."
+ ex.getMessage());
return;
} catch (Exception ex) {
LOG.warn("Unexpected error appending event to channel. ", ex);
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,
"Unexpected error while appending event to channel. "
+ ex.getMessage());
return;
}
//设置返回给客户端的消息
response.setCharacterEncoding(request.getCharacterEncoding());
response.setStatus(HttpServletResponse.SC_OK);
response.flushBuffer();
sourceCounter.incrementAppendBatchAcceptedCount();
sourceCounter.addToEventAcceptedCount(events.size());
}
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws IOException {
doPost(request, response);
}
}
private static class HTTPSourceSocketConnector extends SslSocketConnector {
private final List<String> excludedProtocols;
HTTPSourceSocketConnector(List<String> excludedProtocols) {
this.excludedProtocols = excludedProtocols;
}
@Override
public ServerSocket newServerSocket(String host, int port,
int backlog) throws IOException {
SSLServerSocket socket = (SSLServerSocket)super.newServerSocket(host,
port, backlog);
String[] protocols = socket.getEnabledProtocols();
List<String> newProtocols = new ArrayList<String>(protocols.length);
for(String protocol: protocols) {
if (!excludedProtocols.contains(protocol)) {
newProtocols.add(protocol);
}
}
socket.setEnabledProtocols(
newProtocols.toArray(new String[newProtocols.size()]));
return socket;
}
}
}
HTTPSource默认配置保存在HTTPSourceConfigurationConstants
package org.apache.flume.source.http;
/**
*
*/
public class HTTPSourceConfigurationConstants {
public static final String CONFIG_PORT = "port";
public static final String CONFIG_HANDLER = "handler";
public static final String CONFIG_HANDLER_PREFIX =
CONFIG_HANDLER + ".";
public static final String CONFIG_BIND = "bind";
public static final String DEFAULT_BIND = "0.0.0.0";
public static final String DEFAULT_HANDLER =
"org.apache.flume.source.http.JSONHandler";
public static final String SSL_KEYSTORE = "keystore";
public static final String SSL_KEYSTORE_PASSWORD = "keystorePassword";
public static final String SSL_ENABLED = "enableSSL";
public static final String EXCLUDE_PROTOCOLS = "excludeProtocols";
}
HTTPSourceHandler是httpsource的默认接口,只需要重写getEvents(HttpServletRequest request)方法即可
package org.apache.flume.source.http;
import java.util.List;
import javax.servlet.http.HttpServletRequest;
import org.apache.flume.Event;
import org.apache.flume.conf.Configurable;
/**
*
*/
public interface HTTPSourceHandler extends Configurable {
/**
* Takes an {@linkplain HttpServletRequest} and returns a list of Flume
* Events. If this request cannot be parsed into Flume events based on the
* format this method will throw an exception. This method may also throw an
* exception if there is some sort of other error. <p>
*
* @param request The request to be parsed into Flume events.
* @return List of Flume events generated from the request.
* @throws HTTPBadRequestException If the was not parsed correctly into an
* event because the request was not in the expected format.
* @throws Exception If there was an unexpected error.
*/
public List<Event> getEvents(HttpServletRequest request) throws
HTTPBadRequestException, Exception;
}
HTTPSourceHandler 的默认实现类是JSONHandler
package org.apache.flume.source.http;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonSyntaxException;
import com.google.gson.reflect.TypeToken;
import java.io.BufferedReader;
import java.lang.reflect.Type;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.List;
import javax.servlet.http.HttpServletRequest;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.event.JSONEvent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
* JSONHandler for HTTPSource that accepts an array of events.
*
* This handler throws exception if the deserialization fails because of bad
* format or any other reason.
*
*
* Each event must be encoded as a map with two key-value pairs. <p> 1. headers
* - the key for this key-value pair is "headers". The value for this key is
* another map, which represent the event headers. These headers are inserted
* into the Flume event as is. <p> 2. body - The body is a string which
* represents the body of the event. The key for this key-value pair is "body".
* All key-value pairs are considered to be headers. An example: <p> [{"headers"
* : {"a":"b", "c":"d"},"body": "random_body"}, {"headers" : {"e": "f"},"body":
* "random_body2"}] <p> would be interpreted as the following two flume events:
* <p> * Event with body: "random_body" (in UTF-8/UTF-16/UTF-32 encoded bytes)
* and headers : (a:b, c:d) <p> *
* Event with body: "random_body2" (in UTF-8/UTF-16/UTF-32 encoded bytes) and
* headers : (e:f) <p>
*
* The charset of the body is read from the request and used. If no charset is
* set in the request, then the charset is assumed to be JSON's default - UTF-8.
* The JSON handler supports UTF-8, UTF-16 and UTF-32.
*
* To set the charset, the request must have content type specified as
* "application/json; charset=UTF-8" (replace UTF-8 with UTF-16 or UTF-32 as
* required).
*
* One way to create an event in the format expected by this handler, is to
* use {@linkplain JSONEvent} and use {@linkplain Gson} to create the JSON
* string using the
* {@linkplain Gson#toJson(java.lang.Object, java.lang.reflect.Type) }
* method. The type token to pass as the 2nd argument of this method
* for list of events can be created by: <p>
*
* Type type = new TypeToken<List<JSONEvent>>() {}.getType(); <p>
*
*/
public class JSONHandler implements HTTPSourceHandler {
private static final Logger LOG = LoggerFactory.getLogger(JSONHandler.class);
private final Type listType =
new TypeToken<List<JSONEvent>>() {
}.getType();
private final Gson gson;
public JSONHandler() {
gson = new GsonBuilder().disableHtmlEscaping().create();
}
/**
* {@inheritDoc}
*/
@Override
public List<Event> getEvents(HttpServletRequest request) throws Exception {
BufferedReader reader = request.getReader();
String charset = request.getCharacterEncoding();
//UTF-8 is default for JSON. If no charset is specified, UTF-8 is to
//be assumed.
if (charset == null) {
LOG.debug("Charset is null, default charset of UTF-8 will be used.");
charset = "UTF-8";
} else if (!(charset.equalsIgnoreCase("utf-8")
|| charset.equalsIgnoreCase("utf-16")
|| charset.equalsIgnoreCase("utf-32"))) {
LOG.error("Unsupported character set in request {}. "
+ "JSON handler supports UTF-8, "
+ "UTF-16 and UTF-32 only.", charset);
throw new UnsupportedCharsetException("JSON handler supports UTF-8, "
+ "UTF-16 and UTF-32 only.");
}
/*
* Gson throws Exception if the data is not parseable to JSON.
* Need not catch it since the source will catch it and return error.
*/
List<Event> eventList = new ArrayList<Event>(0);
try {
eventList = gson.fromJson(reader, listType);
} catch (JsonSyntaxException ex) {
throw new HTTPBadRequestException("Request has invalid JSON Syntax.", ex);
}
for (Event e : eventList) {
((JSONEvent) e).setCharset(charset);
}
return getSimpleEvents(eventList);
}
@Override
public void configure(Context context) {
}
private List<Event> getSimpleEvents(List<Event> events) {
List<Event> newEvents = new ArrayList<Event>(events.size());
for(Event e:events) {
newEvents.add(EventBuilder.withBody(e.getBody(), e.getHeaders()));
}
return newEvents;
}
}
source和chanel交换数据的默认类型是Event接口
package org.apache.flume;
import java.util.Map;
/**
* Basic representation of a data object in Flume.
* Provides access to data as it flows through the system.
*/
public interface Event {
/**
* Returns a map of name-value pairs describing the data stored in the body.
*/
public Map<String, String> getHeaders();
/**
* Set the event headers
* @param headers Map of headers to replace the current headers.
*/
public void setHeaders(Map<String, String> headers);
/**
* Returns the raw byte array of the data contained in this event.
*/
public byte[] getBody();
/**
* Sets the raw byte array of the data contained in this event.
* @param body The data.
*/
public void setBody(byte[] body);
}
httpsouce默认的Event实现类是JSONEvent
package org.apache.flume.event;
import java.io.UnsupportedEncodingException;
import java.util.Map;
import org.apache.flume.Event;
import org.apache.flume.FlumeException;
/**
*
*/
public class JSONEvent implements Event{
private Map<String, String> headers;
private String body;
private transient String charset = "UTF-8";
@Override
public Map<String, String> getHeaders() {
return headers;
}
@Override
public void setHeaders(Map<String, String> headers) {
this.headers = headers;
}
@Override
public byte[] getBody() {
if(body != null) {
try {
return body.getBytes(charset);
} catch (UnsupportedEncodingException ex) {
throw new FlumeException(String.format("%s encoding not supported", charset), ex);
}
} else {
return new byte[0];
}
}
@Override
public void setBody(byte[] body) {
if(body != null) {
this.body = new String(body);
} else {
this.body = "";
}
}
public void setCharset(String charset) {
this.charset = charset;
}
}
接口source是所有source必须实现的接口,它继承至接口LifecycleAware, NamedComponent
package org.apache.flume;
import org.apache.flume.annotations.InterfaceAudience;
import org.apache.flume.annotations.InterfaceStability;
import org.apache.flume.channel.ChannelProcessor;
import org.apache.flume.lifecycle.LifecycleAware;
/**
* <p>
* A source generates {@plainlink Event events} and calls methods on the
* configured {@link ChannelProcessor} to persist those events into the
* configured {@linkplain Channel channels}.
* </p>
*
* <p>
* Sources are associated with unique {@linkplain NamedComponent names} that can
* be used for separating configuration and working namespaces.
* </p>
*
* <p>
* No guarantees are given regarding thread safe access.
* </p>
*
* @see org.apache.flume.Channel
* @see org.apache.flume.Sink
*/
@InterfaceAudience.Public
@InterfaceStability.Stable
public interface Source extends LifecycleAware, NamedComponent {
/**
* Specifies which channel processor will handle this source's events.
*
* @param channelProcessor
*/
public void setChannelProcessor(ChannelProcessor channelProcessor);
/**
* Returns the channel processor that will handle this source's events.
*/
public ChannelProcessor getChannelProcessor();
}
使用httpsource可以很方便的满足客户监听日志的需求,只需要实现接口HTTPSourceHandler即可,所以如果想快速开发就可以使用httpsource来完成日常的开发!