- 项目架构
- 创建maven项目,引入依赖
<dependency> <groupId>org.apache.flume</groupId> <artifactId>flume-ng-core</artifactId> <version>1.8.0</version> <scope>provided</scope> </dependency>
- 创建LogETLInterceptor类实现Interceptor接口,进行初步清洗
public class LogETLInterceptor implements Interceptor { @Override public void initialize() { } @Override public Event intercept(Event event) { // 主页日志{json}、 时间日志 时间戳|{json} byte[] body = event.getBody(); String log = new String(body, Charset.forName("UTF-8")); if (log.contains("start")) { // 启动日志 if (LogUtil.validateStartLog(log)) { return event; } } else { // 时间日志 if (LogUtil.validateEventLog(log)) { return event; } } return null; } @Override public List<Event> intercept(List<Event> events) { ArrayList<Event> eventList = new ArrayList<>(); for (Event event : events) { Event validEvent = intercept(event); if (validEvent != null) { eventList.add(validEvent); } } return eventList; } @Override public void close() { } public static class Builder implements Interceptor.Builder { @Override public Interceptor build() { return new LogETLInterceptor(); } @Override public void configure(Context context) { } } }
- 创建LogTypeInterceptor实现Interceptor,用于将不同类型的日志写到不同kafka topic中
public class LogTypeInterceptor implements Interceptor { @Override public void initialize() { } @Override public Event intercept(Event event) { byte[] body = event.getBody(); Map<String, String> headers = event.getHeaders(); String log = new String(body, Charset.forName("UTF-8")); if (log.contains("start")) { // 启动日志 headers.put("topic", "topic_start"); } else { // 事件日志 headers.put("topic", "topic_event"); } return event; } @Override public List<Event> intercept(List<Event> events) { ArrayList<Event> eventList = new ArrayList<>(); for (Event event : events) { Event interceptedEvent = intercept(event); eventList.add(interceptedEvent); } return eventList; } @Override public void close() { } public static class Builder implements Interceptor.Builder { @Override public Interceptor build() { return new LogTypeInterceptor(); } @Override public void configure(Context context) { } } }
- 打包后放到flume/lib/目录下
- 编写conf文件
# Name the components on this agent a1.sources = r1 a1.channels = c1 c2 # Describe/configure the source a1.sources.r1.type = TAILDIR a1.sources.r1.positionFile = /soft/flume/data/taildir_position.json a1.sources.r1.filegroups = f1 a1.sources.r1.filegroups.f1 = /tmp/flume/.*log a1.sources.ri.maxBatchCount = 1000 # Describe c1 a1.channels.c1.type = org.apache.flume.channel.kafka.KafkaChannel a1.channels.c1.kafka.bootstrap.servers = cdh01:9092,cdh02:9092,cdh03:9092 a1.channels.c1.kafka.topic = topic_start # Describe c2 a1.channels.c2.type = org.apache.flume.channel.kafka.KafkaChannel a1.channels.c2.kafka.bootstrap.servers = cdh01:9092,cdh02:9092,cdh03:9092 a1.channels.c2.kafka.topic = topic_event # Describe interceptor a1.sources.r1.interceptors = i1 i2 a1.sources.r1.interceptors.i1.type = com.spark.interceptors.LogETLInterceptor$Builder a1.sources.r1.interceptors.i2.type = com.spark.interceptors.LogTypeInterceptor$Builder # Describe selector a1.sources.r1.selector.type = multiplexing a1.sources.r1.selector.header = topic a1.sources.r1.selector.mapping.topic_start = c1 a1.sources.r1.selector.mapping.topic_event = c2 # Bind the source and sink to the channel a1.sources.r1.channels = c1 c2
- 启动flume
/soft/flume/bin/flume-ng agent -n a1 -f /soft/flume/conf/file-flume-kafka.conf -c /soft/flume/conf/ -Dflume.root.logger=INFO,console > /soft/flume/logs/test.log 2>&1 &
flume自定义拦截器进行日志采集,并写入kafka
最新推荐文章于 2022-12-01 17:27:36 发布