Flume配置链以及自定义flume拦截器

多source配置

在这里插入图片描述

  • 操作命令

    vi exec-nc-logger.conf
    a1.sources = r1 r2
    a1.sinks = k1
    a1.channels = c1
    
    a1.sources.r1.type = exec
    a1.sources.r1.command = tail -F /home/hadoop/data/flume/exec.log
    
    a1.sources.r2.type = netcat
    a1.sources.r2.bind = 0.0.0.0
    a1.sources.r2.port = 44444
    
    a1.channels.c1.type = memory
    
    a1.sinks.k1.type = logger
    
    a1.sources.r1.channels = c1
    a1.sources.r2.channels = c1
    
    a1.sinks.k1.channel = c1
    
    flume-ng agent \
    --name a1 \
    --conf $FLUME_HOME/conf \
    --conf-file $FLUME_HOME/script/exec-nc-logger.conf \
    -Dflume.root.logger=INFO,console
    

多sink配置

在这里插入图片描述

  • 操作命令

    vi nc-logger-hdfs.conf
    a1.sources = r1
    a1.sinks = k1 k2
    a1.channels = c1
    
    a1.sources.r1.type = netcat
    a1.sources.r1.bind = 0.0.0.0
    a1.sources.r1.port = 44444
    
    a1.channels.c1.type = memory
    
    a1.sinks.k1.type = logger
    
    a1.sinks.k2.type = hdfs
    a1.sinks.k2.hdfs.path = hdfs://hadoop01:8020/flume/twosink
    # 多少事件写入到文件,在文件刷到hdfs之前
    a1.sinks.k2.hdfs.batchSize = 10
    # hdfs上文件类型使用SequenceFile要配置压缩格式
    a1.sinks.k2.hdfs.fileType = DataStream
    # hdfs上文件格式,生产上配置Text
    a1.sinks.k2.hdfs.writeFormat = Text
    
    a1.sources.r1.channels = c1
    
    a1.sinks.k1.channel = c1
    a1.sinks.k2.channel = c1
    
    flume-ng agent \
    --name a1 \
    --conf $FLUME_HOME/conf \
    --conf-file $FLUME_HOME/script/nc-logger-hdfs.conf \
    -Dflume.root.logger=INFO,console
    

    注意:数据会分开流向logger和hdfs,不是复制

Flume Channel Selectors

replicating and multiplexing ,If the type is not specified, then defaults to “replicating”.

replicating

在这里插入图片描述

  • 操作命令

    a1.sources = r1
    a1.sinks = k1 k2
    a1.channels = c1 c2
    
    a1.sources.r1.type = netcat
    a1.sources.r1.bind = 0.0.0.0
    a1.sources.r1.port = 44444
    
    a1.channels.c1.type = memory
    a1.channels.c2.type = memory
    
    a1.sinks.k1.type = logger
    
    a1.sinks.k2.type = hdfs
    a1.sinks.k2.hdfs.path = hdfs://hadoop01:8020/flume/channelrepli
    # 多少事件写入到文件,在文件刷到hdfs之前 建议如下配置
    a1.sinks.k2.hdfs.batchSize = 100
    a1.sinks.k2.hdfs.rollInterval = 3600
    a1.sinks.k2.hdfs.rollSize = 134217728
    a1.sinks.k2.hdfs.rollCount = 0
    
    # hdfs上文件类型使用SequenceFile要配置压缩格式
    a1.sinks.k2.hdfs.fileType = DataStream
    # hdfs上文件格式,生产上配置Text
    a1.sinks.k2.hdfs.writeFormat = Text
    
    a1.sources.r1.channels = c1 c2
    a1.sources.r1.selector.type = replicating
    
    a1.sinks.k1.channel = c1
    a1.sinks.k2.channel = c2
    
    flume-ng agent \
    --name a1 \
    --conf $FLUME_HOME/conf \
    --conf-file $FLUME_HOME/script/nc-channel-repli.conf \
    -Dflume.root.logger=INFO,console
    

    注意:两个sink是完全复制的

multiplexing

在这里插入图片描述

  • 操作命令

    vi multi1.conf
    
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1
    
    a1.sources.r1.type = netcat
    a1.sources.r1.bind = hadoop01
    a1.sources.r1.port = 44441
    
    a1.sources.r1.interceptors = i1
    a1.sources.r1.interceptors.i1.type = static
    a1.sources.r1.interceptors.i1.key = state
    a1.sources.r1.interceptors.i1.value = US
    
    a1.channels.c1.type = memory
    
    a1.sinks.k1.type = avro
    a1.sinks.k1.hostname = hadoop01
    a1.sinks.k1.port = 55555
    
    a1.sources.r1.channels = c1
    a1.sinks.k1.channel = c1
    
    flume-ng agent \
    --name a1 \
    --conf $FLUME_HOME/conf \
    --conf-file $FLUME_HOME/script/multi1.conf \
    -Dflume.root.logger=INFO,console
    ------------------------------------------------------------
    vi multi2.conf
    
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1
    
    a1.sources.r1.type = netcat
    a1.sources.r1.bind = hadoop02
    a1.sources.r1.port = 44442
    
    a1.sources.r1.interceptors = i1
    a1.sources.r1.interceptors.i1.type = static
    a1.sources.r1.interceptors.i1.key = state
    a1.sources.r1.interceptors.i1.value = CN
    
    a1.channels.c1.type = memory
    
    a1.sinks.k1.type = avro
    a1.sinks.k1.hostname = hadoop01
    a1.sinks.k1.port = 55555
    
    a1.sources.r1.channels = c1
    a1.sinks.k1.channel = c1
    
    flume-ng agent \
    --name a1 \
    --conf $FLUME_HOME/conf \
    --conf-file $FLUME_HOME/script/multi2.conf \
    -Dflume.root.logger=INFO,console
    
    ----------------------------------------------------
    vi multi3.conf
    
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1
    
    a1.sources.r1.type = netcat
    a1.sources.r1.bind = hadoop03
    a1.sources.r1.port = 44443
    
    a1.sources.r1.interceptors = i1
    a1.sources.r1.interceptors.i1.type = static
    a1.sources.r1.interceptors.i1.key = state
    a1.sources.r1.interceptors.i1.value = UK
    
    a1.channels.c1.type = memory
    
    a1.sinks.k1.type = avro
    a1.sinks.k1.hostname = hadoop01
    a1.sinks.k1.port = 55555
    
    a1.sources.r1.channels = c1
    a1.sinks.k1.channel = c1
    
    flume-ng agent \
    --name a1 \
    --conf $FLUME_HOME/conf \
    --conf-file $FLUME_HOME/script/multi3.conf \
    -Dflume.root.logger=INFO,console
    
    -----------------------------------------------------
    vi multi.conf
    
    a1.sources = r1
    a1.sinks = k1 k2 k3
    a1.channels = c1 c2 c3
    
    a1.sources.r1.type = avro
    a1.sources.r1.bind = hadoop01
    a1.sources.r1.port = 55555
    
    
    a1.sources.r1.selector.type = multiplexing
    a1.sources.r1.selector.header = state
    a1.sources.r1.selector.mapping.US = c1
    a1.sources.r1.selector.mapping.CN = c2
    a1.sources.r1.selector.default = c3
    
    a1.channels.c1.type = memory
    a1.channels.c2.type = memory
    a1.channels.c3.type = memory
    
    a1.sinks.k1.type = logger
    
    a1.sinks.k2.type = hdfs
    a1.sinks.k2.hdfs.path = hdfs://hadoop01:8020/flume/channelmulti/k2
    # 多少事件写入到文件,在文件刷到hdfs之前
    a1.sinks.k2.hdfs.batchSize = 10
    # hdfs上文件类型使用SequenceFile要配置压缩格式
    a1.sinks.k2.hdfs.fileType = DataStream
    # hdfs上文件格式,生产上配置Text
    a1.sinks.k2.hdfs.writeFormat = Text
    
    a1.sinks.k3.type = hdfs
    a1.sinks.k3.hdfs.path = hdfs://hadoop01:8020/flume/channelmulti/k3
    # 多少事件写入到文件,在文件刷到hdfs之前
    a1.sinks.k3.hdfs.batchSize = 10
    # hdfs上文件类型使用SequenceFile要配置压缩格式
    a1.sinks.k3.hdfs.fileType = DataStream
    # hdfs上文件格式,生产上配置Text
    a1.sinks.k3.hdfs.writeFormat = Text
    
    a1.sources.r1.channels = c1 c2 c3
    a1.sinks.k1.channel = c1
    a1.sinks.k2.channel = c2
    a1.sinks.k3.channel = c3
    
    flume-ng agent \
    --name a1 \
    --conf $FLUME_HOME/conf \
    --conf-file $FLUME_HOME/script/multi.conf \
    -Dflume.root.logger=INFO,console
    

    注意:根据每个头的信息走向到每个sink

Flume Sink Processors

Failover Sink Processor and Load balancing Sink Processor

failover

在这里插入图片描述

  • 操作命令

    vi sink.conf
    
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1
    
    a1.sources.r1.type = avro
    a1.sources.r1.bind = hadoop02
    a1.sources.r1.port = 44444
    
    a1.channels.c1.type = memory
    a1.sinks.k1.type = logger
    
    
    a1.sources.r1.channels = c1
    a1.sinks.k1.channel = c1
    
    flume-ng agent \
    --name a1 \
    --conf $FLUME_HOME/conf \
    --conf-file $FLUME_HOME/script/sink.conf \
    -Dflume.root.logger=INFO,console
    
    vi sink.conf
    
    a1.sources = r1
    a1.sinks = k1
    a1.channels = c1
    
    a1.sources.r1.type = avro
    a1.sources.r1.bind = hadoop03
    a1.sources.r1.port = 44444
    
    a1.channels.c1.type = memory
    a1.sinks.k1.type = logger
    
    
    a1.sources.r1.channels = c1
    a1.sinks.k1.channel = c1
    
    flume-ng agent \
    --name a1 \
    --conf $FLUME_HOME/conf \
    --conf-file $FLUME_HOME/script/sink.conf \
    -Dflume.root.logger=INFO,console
    
    
    ---------------------------------------------
    vi failover.conf
    
    a1.sources = r1
    a1.sinks = k1 k2
    a1.channels = c1
    
    
    a1.sinkgroups = g1
    a1.sinkgroups.g1.sinks = k1 k2
    a1.sinkgroups.g1.processor.type = failover
    a1.sinkgroups.g1.processor.priority.k1 = 5
    a1.sinkgroups.g1.processor.priority.k2 = 10
    a1.sinkgroups.g1.processor.maxpenalty = 10000
    
    a1.sources.r1.type = netcat
    a1.sources.r1.bind = hadoop01
    a1.sources.r1.port = 44444
    
    a1.channels.c1.type = memory
    
    a1.sinks.k1.type = avro
    a1.sinks.k1.hostname = hadoop02
    a1.sinks.k1.port = 44444
    
    a1.sinks.k2.type = avro
    a1.sinks.k2.hostname = hadoop03
    a1.sinks.k2.port = 44444
    
    
    a1.sources.r1.channels = c1
    a1.sinks.k1.channel = c1
    a1.sinks.k2.channel = c1
    
    flume-ng agent \
    --name a1 \
    --conf $FLUME_HOME/conf \
    --conf-file $FLUME_HOME/script/failover.conf \
    -Dflume.root.logger=INFO,console
    

    a1.sinkgroups.g1.processor.priority.k1 数据大的优先级高,其中一个挂了,会自动切换到另一个sink

自定义Flume Intercept

  • 实现MultiplexingInterceptor功能

    maven依赖
     <dependency>
        <groupId>org.apache.flume</groupId>
        <artifactId>flume-ng-core</artifactId>
        <version>1.6.0-cdh5.16.2</version>
    </dependency>
    --------------------------------------
    public class MultiplexingInterceptor implements Interceptor {
        private Integer flagfield = 0;
        private Integer timestampfield = 0;
    
        public MultiplexingInterceptor(Integer flagfield,Integer timestampfield) {
            this.flagfield = flagfield;
            this.timestampfield = timestampfield;
        }
    
        /**
         * 拦截器构造实例后的初始化工作
         */
        @Override
        public void initialize() {
    
        }
    
        /**
         * 日志格式:u01,ev1,mall,1568738583468
         * @param event
         * @return
         */
        @Override
        public Event intercept(Event event) {
            //根据event的数据内容,以及参数中指定的标记字段,来产生不同的header值
            byte[] body = event.getBody();
            String line = new String(body);
            String[] split = line.split(",");
            //获取业务标记,并且添加到header
            event.getHeaders().put("flag",split[flagfield]);
            //获取行为事件时间戳,并且添加到header
            event.getHeaders().put("timestamp",split[timestampfield]);
            return event;
        }
    
        @Override
        public List<Event> intercept(List<Event> events) {
            for(Event event: events){
                intercept(event);
            }
            return events;
        }
    
        /**
         * 拦截器销毁之前的一些清理工作
         */
        @Override
        public void close() {
    
        }
    
        public static class MultiplexingInterceptorBuilder implements Interceptor.Builder {
    
            Integer flagfield = 0;
            Integer timestampfield = 0;
    
            /**
             * 用户构建一个拦截器实例
             * @return
             */
            @Override
            public Interceptor build() {
                return new MultiplexingInterceptor(flagfield,timestampfield);
            }
    
            /**
             * 获取参数的入口
             * @param context
             */
            @Override
            public void configure(Context context) {
                flagfield = context.getInteger("flagfield");
                timestampfield = context.getInteger("timestampfield");
            }
        }
    }
    

    打包上传至$FlUME_HOME/lib目录下

  • 测试一个hdfs sink,一个kafka sink

    a1.sources = r1
    a1.channels = c1 c2
    a1.sinks = k1 k2
    
    a1.sources.r1.channels = c1 c2
    a1.sources.r1.type = TAILDIR
    a1.sources.r1.positionFile = /home/hadoop/data/flume/mall_position.json
    a1.sources.r1.filegroups = g1
    a1.sources.r1.filegroups.g1 = /home/hadoop/data/flume/mall.log
    a1.sources.r1.fileHeader = false
    
    a1.sources.r1.interceptors = i1
    a1.sources.r1.interceptors.i1.type = com.fei.bigdata.hadoop.flume.MultiplexingInterceptor$MultiplexingInterceptorBuilder
    a1.sources.r1.interceptors.i1.flagfield = 2
    a1.sources.r1.interceptors.i1.timestampfield = 3
    
    a1.sources.r1.selector.type = multiplexing
    a1.sources.r1.selector.header = flag
    a1.sources.r1.selector.mapping.mall = c1
    a1.sources.r1.selector.mapping.waimai = c2
    a1.sources.r1.selector.default = c2
    
    a1.channels.c1.type = memory
    a1.channels.c1.capacity = 2000
    a1.channels.c1.transactionCapacity = 1000
    
    a1.channels.c2.type = memory
    a1.channels.c2.capacity = 2000
    a1.channels.c2.transactionCapacity = 1000
    
    a1.sinks.k1.channel = c1
    a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
    a1.sinks.k1.kafka.bootstrap.servers = hadoop02:9092
    a1.sinks.k1.kafka.topic = mall
    a1.sinks.k1.kafka.producer.acks = 1
    
    a1.sinks.k2.channel = c2
    a1.sinks.k2.type = hdfs
    a1.sinks.k2.hdfs.path = hdfs://hadoop01:9000/waimai/%Y-%m-%d/%H
    a1.sinks.k2.hdfs.filePrefix = fei-log
    a1.sinks.k2.hdfs.fileSuffix = .log
    a1.sinks.k2.hdfs.rollSize = 268435456
    a1.sinks.k2.hdfs.rollInterval = 120
    a1.sinks.k2.hdfs.rollCount = 0
    a1.sinks.k2.hdfs.batchSize = 1000
    a1.sinks.k2.hdfs.fileType = DataStream
    #为false则使用event header里面放置的时间也就是上面的代码中timestamp
    a1.sinks.k2.hdfs.useLocalTimeStamp = false
    -----------------------------------
    flume-ng agent \
    --name a1 \
    --conf $FLUME_HOME/conf \
    --conf-file $FLUME_HOME/script/interceptor.conf \
    -Dflume.root.logger=INFO,console
    
  • Kafka消费端:kafka-console-consumer.sh --bootstrap-server hadoop02:9092 --topic mall --from-beginning
    在这里插入图片描述

  • Hdfs上面的数据
    在这里插入图片描述
    在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值