自定义flume拦截器--简单测试

pom文件


  <dependencies>

      <!--flume-->
      <dependency>
          <groupId>org.apache.flume</groupId>
          <artifactId>flume-ng-core</artifactId>
          <version>1.5.2</version>
      </dependency>

      <!--fastjson-->
      <dependency>
          <groupId>com.alibaba</groupId>
          <artifactId>fastjson</artifactId>
          <version>1.2.32</version>
      </dependency>

      <!--日志包-->
      <dependency>
          <groupId>ch.qos.logback</groupId>
          <artifactId>logback-classic</artifactId>
          <version>1.1.3</version>
      </dependency>
      <dependency>
          <groupId>org.logback-extensions</groupId>
          <artifactId>logback-ext-spring</artifactId>
          <version>0.1.2</version>
      </dependency>
      <dependency>
          <groupId>org.slf4j</groupId>
          <artifactId>jcl-over-slf4j</artifactId>
          <version>1.7.12</version>
      </dependency>

  </dependencies>


    <build>
        <plugins>
            <!-- 打包插件 -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-jar-plugin</artifactId>
                <version>2.4</version>
                <configuration>
                    <archive>
                        <manifest>
                            <addClasspath>true</addClasspath>
                            <classpathPrefix>lib/</classpathPrefix>
                            <mainClass></mainClass>
                        </manifest>
                    </archive>
                </configuration>
            </plugin>
            <!-- 编译插件 -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>utf-8</encoding>
                </configuration>
            </plugin>
        </plugins>
    </build>

代码

package com.flume.interceptor;

import com.alibaba.fastjson.JSONObject;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.List;

/**
 * TODO
 *
 * @version: 1.0
 * @author: unique
 * @date: 2021/1/9 14:45
 * @desc:
 */
public class TestInterceptor implements Interceptor {


    public static final Logger logger = LoggerFactory.getLogger(TestInterceptor.class);

    public void initialize() {
        logger.info("----------自定义拦截器的initialize方法执行");
    }

    public Event intercept(Event event) {

        logger.info("----------intercept(Event event)方法执行,处理单个event");

        byte[] body = event.getBody();
        String data = new String(body);
        JSONObject jsonObject = JSONObject.parseObject(data);
        String message = jsonObject.getString("message");
        if (message.contains("Create")) {
            return null;
        } else {
            event.setBody(message.getBytes());
            return event;
        }
    }

    public List<Event> intercept(List<Event> events) {

        logger.info("----------intercept(List<Event> events)方法执行");

        List<Event> list = new ArrayList<>();
        for (Event event : events) {
            Event interceptedEvent = intercept(event);
            if (interceptedEvent != null) {
                list.add(interceptedEvent);
            }
        }
        return list;
    }

    public void close() {
        logger.info("----------自定义拦截器close方法执行");
    }

    public static class Builder implements Interceptor.Builder {

        @Override
        public Interceptor build() {
            logger.info("----------build方法执行");
            return new TestInterceptor();
        }

        @Override
        public void configure(Context context) {
            logger.info("----------configure方法执行");
        }
    }
}

 

将代码打包  及json jar包 都放到 flume下的lib    

flume 配置  读取kafka数据到hdfs

## 组件
a1.sources=r1
a1.channels=c1
a1.sinks=k1


## source1
a1.sources.r1.type = org.apache.flume.source.kafka.KafkaSource
a1.sources.r1.batchSize = 5000
a1.sources.r1.batchDurationMillis = 2000
a1.sources.r1.kafka.bootstrap.servers =  172.0.0.1:8888
a1.sources.r1.kafka.topics=systemlog
a1.sources.r1.kafka.consumer.group.id=kafka_flume_hdfs_systemLog

#source1-interceptor
a1.sources.r1.interceptors=i1
a1.sources.r1.interceptors.i1.type=com.flume.interceptor.TestInterceptor$Builder


## channel1
a1.channels.c1.type = memory

# channel store size
a1.channels.c1.capacity = 100000
# transaction size
a1.channels.c1.transactionCapacity = 10000


## sink1
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = /data/log/
a1.sinks.k1.hdfs.filePrefix = log-
a1.sinks.k1.hdfs.fileSuffix = .log


## 不要产生大量小文乿
a1.sinks.k1.hdfs.rollInterval = 0
a1.sinks.k1.hdfs.rollSize = 67108864
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.rollInterval = 14400
a1.sinks.k1.hdfs.threadsPoolSize = 30
a1.sinks.k1.hdfs.minBlockReplicas=1



## 控制输出文件是原生文件⾿
a1.sinks.k1.hdfs.fileType = DataStream 
a1.sinks.k1.hdfs.writeFormat = Text


## 拼装
a1.sources.r1.channels = c1
a1.sinks.k1.channel= c1

在flume目录下启动

bin/flume-ng agent -n a1 -c conf -f job/kafka-flume-interceptors-hdfs.conf -Dflume.root.logger=INFO,console

启动过程中

 

  

原始数据是 通过logstatsh 采集数据到kafka的

kafka中的数据是这样的

{"@timestamp":"2021-01-08T20:00:02.433Z","path":"/opt/spider/sigar_jar/log/log.log","message":"{\"time\":\"1610136001548\",\"ip\":\"192.168.1.200\",\"mac\":\"****\",\"cpulog\":{\"usage\":\"11\",\"corenum\":\"8\"},\"disklog\":{\"/dev/mapper/centos-root\":\"488209960960,476295784448,0\",\"/dev/sda1\":\"48186880,2359808,0\"},\"memorylog\":{\"used\":\"16045748224\",\"total\":\"16637550592\"},\"internetlog\":{\"rpackets\":\"312097715\",\"tpackets\":\"186182528\",\"rbytes\":\"25964917706\",\"tbytes\":\"102518768869\"},\"vmstat\":{\"us\":\"2\",\"sy\":\"10\"},\"uptime\":\"0.27,0.70,2.24\",\"iostat\":[{\"device\":\"sda\",\"util\":\"1.68\"},{\"device\":\"scd0\",\"util\":\"0.00\"},{\"device\":\"dm-0\",\"util\":\"1.34\"},{\"device\":\"dm-1\",\"util\":\"0.84\"}]}","host":"hdp13","type":"log","@version":"1"}
{"@timestamp":"2021-01-08T20:00:02.433Z","path":"/opt/spider/sigar_jar/log/log.log","message":"{\"time\":\"1610136001882\",\"ip\":\"192.168.1.200\",\"mac\":\"****\",\"cpulog\":{\"usage\":\"4\",\"corenum\":\"8\"},\"disklog\":{\"/dev/mapper/centos-root\":\"488209960960,476298598400,0\",\"/dev/sda1\":\"48186880,2359808,0\"},\"memorylog\":{\"used\":\"16031997952\",\"total\":\"16637550592\"},\"internetlog\":{\"rpackets\":\"312097761\",\"tpackets\":\"186182564\",\"rbytes\":\"25964926803\",\"tbytes\":\"102518774582\"},\"vmstat\":{\"us\":\"2\",\"sy\":\"10\"},\"uptime\":\"0.27,0.70,2.24\",\"iostat\":[{\"device\":\"sda\",\"util\":\"1.68\"},{\"device\":\"scd0\",\"util\":\"0.00\"},{\"device\":\"dm-0\",\"util\":\"1.34\"},{\"device\":\"dm-1\",\"util\":\"0.84\"}]}","host":"hdp13","type":"log","@version":"1"}

到hdfs的数据是这样的

{"time":"1610178255696","ip":"192.168.1.200","mac":"*****","cpulog":{"usage":"0","corenum":"8"},"disklog":{"/dev/mapper/centos-root":"490210693120,486625612800,0","/dev/sda1":"48186880,2380288,0"},"memorylog":{"used":"15882878976","total":"16637550592"},"internetlog":{"rpackets":"315253834","tpackets":"189356169","rbytes":"26328208815","tbytes":"103859485655"},"vmstat":{"us":"2","sy":"9"},"uptime":"0.40,0.95,0.99","iostat":[{"device":"sda","util":"1.69"},{"device":"scd0","util":"0.00"},{"device":"dm-0","util":"1.34"},{"device":"dm-1","util":"0.85"}]}

{"time":"1610178255696","ip":"192.168.1.200","mac":"*****","cpulog":{"usage":"0","corenum":"8"},"disklog":{"/dev/mapper/centos-root":"490210693120,486625612800,0","/dev/sda1":"48186880,2380288,0"},"memorylog":{"used":"15882878976","total":"16637550592"},"internetlog":{"rpackets":"315253834","tpackets":"189356169","rbytes":"26328208815","tbytes":"103859485655"},"vmstat":{"us":"2","sy":"9"},"uptime":"0.40,0.95,0.99","iostat":[{"device":"sda","util":"1.69"},{"device":"scd0","util":"0.00"},{"device":"dm-0","util":"1.34"},{"device":"dm-1","util":"0.85"}]}

简单的测试一下 flume的拦截器,有什么问题还请大佬多多指教

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值