Flume自定义组件(上篇)


在聊自定义组件前,先了解下Flume的事务。

Flume事务

在这里插入图片描述

自定义拦截器

案例: 自定义拦截器,将不同内容发往不同Channel,然后分发到不同的分析系统Flume2 和Flume3。

在这里插入图片描述

1.创建一个 maven 项目,并引入以下依赖。

<dependency>

 <groupId>org.apache.flume</groupId>

 <artifactId>flume-ng-core</artifactId>

 <version>1.7.0</version>

</dependency>

2.定义 CustomInterceptor 类并实现 Interceptor 接口。 打包后放入flume的lib目录

package com.xiaomao.flume;/**
 * CreateBy zxmao on  2020/9/30 0030 10:20
 */

import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;

import java.util.List;

/**
 * Copyright (C) zhongda
 *
 * @author zx
 * @date 2020/9/30 0030 10:20
 * @description:
 */
public class CustomInteceptor implements Interceptor {
    public void initialize() {
    }

    public Event intercept(Event event) {
        //处理业务逻辑
        String body = new String(event.getBody());
        if(body.contains("xiaomao")){
            event.getHeaders().put("type","xiaomao");
        }else{
            event.getHeaders().put("type","other");
        }
        return event;
    }

    public List<Event> intercept(List<Event> list) {
        for(Event event:list){
            intercept(event);
        }
        return list;
    }
    public void close() {
    }

    public static class Builder implements Interceptor.Builder{

        public Interceptor build() {
            return new CustomInteceptor();
        }

        public void configure(Context context) {

        }
    }
}

3.Flume1的配置文件:

agent11.sources = r1
agent11.sinks = k1 k2
agent11.channels = c1 c2
# Describe/configure the source
agent11.sources.r1.type = netcat
agent11.sources.r1.bind = localhost
agent11.sources.r1.port = 44444

#channel selector
agent11.sources.r1.selector.type = multiplexing
#拦截器
agent11.sources.r1.interceptors = i1
agent11.sources.r1.interceptors.i1.type = com.xiaomao.flume.CustomInteceptor$Builder
agent11.sources.r1.selector.header = type
agent11.sources.r1.selector.mapping.xiaomao = c1
agent11.sources.r1.selector.mapping.other = c2
# Describe the sink
agent11.sinks.k1.type = avro
agent11.sinks.k1.hostname = slaver01
agent11.sinks.k1.port = 4242

agent11.sinks.k2.type = avro
agent11.sinks.k2.hostname = slaver02
agent11.sinks.k2.port = 4243

# Use a channel which buffers events in memory
agent11.channels.c1.type = memory
agent11.channels.c1.capacity = 1000
agent11.channels.c1.transactionCapacity = 100

#Use a channel which buffers events in memory
agent11.channels.c2.type = memory
agent11.channels.c2.capacity = 1000
agent11.channels.c2.transactionCapacity = 100

# Bind the source and sink to the channel
agent11.sources.r1.channels = c1 c2
agent11.sinks.k1.channel = c1
agent11.sinks.k2.channel = c2

Flume2配置文件:

agent1.sources = r1
agent1.sinks = k1
agent1.channels = c1
# Describe/configure the source
agent1.sources.r1.type = avro
agent1.sources.r1.bind = slaver01
agent1.sources.r1.port = 4242

# Describe the sink
agent1.sinks.k1.type = logger
# Use a channel which buffers events in memory
agent1.channels.c1.type = memory
agent1.channels.c1.capacity = 1000
agent1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
agent1.sources.r1.channels = c1
agent1.sinks.k1.channel = c1

Flume3配置文件:

agent1.sources = r1
agent1.sinks = k1
agent1.channels = c1
# Describe/configure the source
agent1.sources.r1.type = avro
agent1.sources.r1.bind = slaver02
agent1.sources.r1.port = 4243

# Describe the sink
agent1.sinks.k1.type = logger
# Use a channel which buffers events in memory
agent1.channels.c1.type = memory
agent1.channels.c1.capacity = 1000
agent1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
agent1.sources.r1.channels = c1
agent1.sinks.k1.channel = c1

先启动Flume2和Flume3 。然后启动Flume1,最后在flume1上通过 nc localhost 4444 连接 flume1发送数据测试。

自定义Source

简单示例:

1.编写类,并打包

package com.xiaomao.source;/**
 * CreateBy zxmao on  2020/9/30 0030 14:34
 */

import org.apache.flume.Context;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.PollableSource;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.SimpleEvent;
import org.apache.flume.source.AbstractSource;

import java.util.HashMap;

/**
 * Copyright (C) zhongda
 *
 * @author zx
 * @date 2020/9/30 0030 14:34
 * @description:
 */
public class MySource extends AbstractSource implements Configurable, PollableSource {
    //定义配置文件将来要读取的字段
    private Long delay;
    private String field;

    //初始化配置信息
    public void configure(Context context) {
        delay = context.getLong("delay");
        field = context.getString("field", "Hello!");
    }

    public Status process() throws EventDeliveryException {
        try {
            //创建事件头信息
            HashMap<String, String> hearderMap = new HashMap();
            //创建事件
            SimpleEvent event = new SimpleEvent();
            //循环封装事件
            for (int i = 0; i < 5; i++) {
                //给事件设置头信息
                event.setHeaders(hearderMap);
                //给事件设置内容
                event.setBody((field + i).getBytes());
                //将事件写入 channel
                getChannelProcessor().processEvent(event);
                Thread.sleep(delay);
            }
        } catch (Exception e) {
            e.printStackTrace();
            return Status.BACKOFF;
        }
        return Status.READY;
    }

    public long getBackOffSleepIncrement() {
        return 0;
    }

    public long getMaxBackOffSleepInterval() {
        return 0;
    }
}

2.配置文件:

# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = com.xiaomao.source.MySource
a1.sources.r1.delay = 1000
#a1.sources.r1.field = xiaomao
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
自定义sink

Sink 不断地轮询 Channel 中的事件且批量地移除它们,并将这些事件批量写入到存储

或索引系统、或者被发送到另一个 Flume Agent。

Sink 是完全事务性的。在从 Channel 批量删除数据之前,每个 Sink 用 Channel 启动一

个事务。批量事件一旦成功写出到存储系统或下一个 Flume Agent,Sink 就利用 Channel 提

交事务。事务一旦被提交,该 Channel 从自己的内部缓冲区删除事件。

1.编写自定义类:

添加依赖

	<dependencies>
        <dependency>
            <groupId>org.apache.flume</groupId>
            <artifactId>flume-ng-core</artifactId>
            <version>1.7.0</version>
        </dependency>
    </dependencies>
package com.xiaomao.sink;/**
 * CreateBy zxmao on  2020/9/30 0030 15:43
 */

import org.apache.flume.*;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
 * Copyright (C) zhongda
 *
 * @author zx
 * @date 2020/9/30 0030 15:43
 * @description:
 */
public class MySink extends AbstractSink implements Configurable {
    //创建 Logger 对象
    private static final Logger LOG =
            LoggerFactory.getLogger(AbstractSink.class);
    private String prefix;
    private String suffix;
    public Status process() throws EventDeliveryException {
        //声明返回值状态信息
        Status status;
        //获取当前 Sink 绑定的 Channel
        Channel ch = getChannel();
        //获取事务
        Transaction txn = ch.getTransaction();
        //声明事件
        Event event;
        //开启事务
        txn.begin();
        //读取 Channel 中的事件,直到读取到事件结束循环
        while (true) {
            event = ch.take();
            if (event != null) {
                break;
            }
        }
        try {
            //处理事件(打印)
            LOG.info(prefix + new String(event.getBody()) + suffix);
            //事务提交
            txn.commit();
            status = Status.READY;
        } catch (Exception e) {
            //遇到异常,事务回滚
            txn.rollback();
            status = Status.BACKOFF;
        } finally {
            //关闭事务
            txn.close();
        }
        return status;
    }

    public void configure(Context context) {
        //读取配置文件内容,有默认值
        prefix = context.getString("prefix", "hello:");
        //读取配置文件内容,无默认值
        suffix = context.getString("suffix");
    }
}

打包后放入flume lib目录

  1. 配置文件:
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444
# Describe the sink
a1.sinks.k1.type = com.xiaomao.sink.MySink
#a1.sinks.k1.prefix = xiaomao:
a1.sinks.k1.suffix = :xiaomao
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值