目录
创建LogETLInterceptor类实现Interceptor接口,进行初步清洗
创建LogTypeInterceptor实现Interceptor,用于将不同类型的日志写到不同kafka topic中
项目架构
创建maven项目,引入依赖
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-core</artifactId>
<version>1.8.0</version>
<scope>provided</scope>
</dependency>
-
创建LogETLInterceptor类实现Interceptor接口,进行初步清洗
public class LogETLInterceptor implements Interceptor {
@Override
public void initialize() {
}
@Override
public Event intercept(Event event) {
// 主页日志{json}、 时间日志 时间戳|{json}
byte[] body = event.getBody();
String log = new String(body, Charset.forName("UTF-8"));
if (log.contains("start")) {
// 启动日志
if (LogUtil.validateStartLog(log)) {
return event;
}
} else {
// 时间日志
if (LogUtil.validateEventLog(log)) {
return event;
}
}
return null;
}
@Override
public List<Event> intercept(List<Event> events) {
ArrayList<Event> eventList = new ArrayList<>();
for (Event event : events) {
Event validEvent = intercept(event);
if (validEvent != null) {
eventList.add(validEvent);
}
}
return eventList;
}
@Override
public void close() {
}
public static class Builder implements Interceptor.Builder {
@Override
public Interceptor build() {
return new LogETLInterceptor();
}
@Override
public void configure(Context context) {
}
}
}
-
创建LogTypeInterceptor实现Interceptor,用于将不同类型的日志写到不同kafka topic中
public class LogTypeInterceptor implements Interceptor {
@Override
public void initialize() {
}
@Override
public Event intercept(Event event) {
byte[] body = event.getBody();
Map<String, String> headers = event.getHeaders();
String log = new String(body, Charset.forName("UTF-8"));
if (log.contains("start")) {
// 启动日志
headers.put("topic", "topic_start");
} else {
// 事件日志
headers.put("topic", "topic_event");
}
return event;
}
@Override
public List<Event> intercept(List<Event> events) {
ArrayList<Event> eventList = new ArrayList<>();
for (Event event : events) {
Event interceptedEvent = intercept(event);
eventList.add(interceptedEvent);
}
return eventList;
}
@Override
public void close() {
}
public static class Builder implements Interceptor.Builder {
@Override
public Interceptor build() {
return new LogTypeInterceptor();
}
@Override
public void configure(Context context) {
}
}
}
-
打包后放到flume/lib/目录下
-
编写conf文件
# Name the components on this agent
a1.sources = r1
a1.channels = c1 c2
# Describe/configure the source
a1.sources.r1.type = TAILDIR
a1.sources.r1.positionFile = /soft/flume/data/taildir_position.json
a1.sources.r1.filegroups = f1
a1.sources.r1.filegroups.f1 = /tmp/flume/.*log
a1.sources.ri.maxBatchCount = 1000
# Describe c1
a1.channels.c1.type = org.apache.flume.channel.kafka.KafkaChannel
a1.channels.c1.kafka.bootstrap.servers = cdh01:9092,cdh02:9092,cdh03:9092
a1.channels.c1.kafka.topic = topic_start
# Describe c2
a1.channels.c2.type = org.apache.flume.channel.kafka.KafkaChannel
a1.channels.c2.kafka.bootstrap.servers = cdh01:9092,cdh02:9092,cdh03:9092
a1.channels.c2.kafka.topic = topic_event
# Describe interceptor
a1.sources.r1.interceptors = i1 i2
a1.sources.r1.interceptors.i1.type = com.spark.interceptors.LogETLInterceptor$Builder
a1.sources.r1.interceptors.i2.type = com.spark.interceptors.LogTypeInterceptor$Builder
# Describe selector
a1.sources.r1.selector.type = multiplexing
a1.sources.r1.selector.header = topic
a1.sources.r1.selector.mapping.topic_start = c1
a1.sources.r1.selector.mapping.topic_event = c2
# Bind the source and sink to the channel
a1.sources.r1.channels = c1 c2
启动flume
/soft/flume/bin/flume-ng agent -n a1 -f /soft/flume/conf/file-flume-kafka.conf -c /soft/flume/conf/ -Dflume.root.logger=INFO,console > /soft/flume/logs/test.log 2>&1 &