flume自定义sink

一、配置文件taildir.conf

a1.sources = r1
a1.channels = c1
a1.sinks = k1

# Describe/configure the source
#source的类型为TAILDIR,这里的类型大小写都可以
a1.sources.r1.type = taildir
a1.sources.r1.channels = c1
#存储tial最后一个位置存储位置
a1.sources.r1.positionFile = /opt/hui/taildir_position.json
#设置tiail的组, 使用空格隔开
a1.sources.r1.filegroups = f1
#设置每个分组的绝对路径
#.匹配后缀为csv的文件,如hehe.csv
a1.sources.r1.filegroups.f1 = /opt/hui/files/.*.csv
a1.sources.r1.fileHeader = true

# Describe the sink
a1.sinks.k1.type = com.hui.yilianzhong.YwdataTask
a1.sinks.k1.username = hehe
a1.sinks.k1.password = haha@213
a1.sinks.k1.batchSize = 5

# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

 

二、maven的pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>cn.itcast.demo</groupId>
    <artifactId>flume-gauss-sink</artifactId>
    <version>1.0.0-SNAPSHOT</version>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <version.flume>1.8.0</version.flume>
    </properties>

    <dependencies>

        <dependency>
            <groupId>org.apache.flume</groupId>
            <artifactId>flume-ng-core</artifactId>
            <version>${version.flume}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flume</groupId>
            <artifactId>flume-ng-configuration</artifactId>
            <version>${version.flume}</version>
        </dependency>

        <dependency>
            <groupId>com.huawei.gaussDb</groupId>
            <artifactId>gsjdbc4</artifactId>
            <version>1.0.0</version>
        </dependency>
    </dependencies>

    <build>
        <finalName>flume-gauss-sink</finalName>
        <plugins>
            <plugin>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>2.3.2</version>
            </plugin>
        </plugins>
    </build>
</project>

 

三、代码

package com.hui.yilianzhong;

import com.google.common.base.Preconditions;
import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.sql.Connection;
import java.sql.SQLException;

/**
 * Created by HuiQ on 2020-03-06.
 */
public class YwdataTask extends AbstractSink implements Configurable {

    private static final Logger LOG = LoggerFactory.getLogger(YwdataTask.class);

    private String username;

    private String password;

    private int batchSize;
    private StringBuilder str = new StringBuilder(); // 要批量插入shbxhj_jcsj表中的数据

    private String eventData;

    //数据处理的逻辑都在process方法中实现
    @Override
    public Status process() {
        Transaction tx = null ;
        Status status=null;
        Channel channel= getChannel();
        try{
            tx= channel.getTransaction();
            tx.begin();
            for(int i=0;i<batchSize;i++)
            {
                // 使用take方法尽可能的以批量的方式从Channel中读取事件,直到没有更多的事件
                Event event =  channel.take();
                if(event == null)
                {
                    break;
                }
                else{  // 也可以不需要else
                    byte[] body = event.getBody();
                    eventData = new String(body);
                    String[] yilianzhongData= new String(body).split("\\|");
                    String wyid = yilianzhongData[0]; // 唯一表示
                    String dwdm = yilianzhongData[1];
                    String dwmc = yilianzhongData[2];
                    String sqrxm = yilianzhongData[3];
                    String sqrhm = yilianzhongData[4];
                    String sqrsj = yilianzhongData[5];
                    String sqsj = yilianzhongData[6]; // 账单日期
                    String jfqx = yilianzhongData[7];
                    String shlx = yilianzhongData[8];
                    String jdyysr = yilianzhongData[9];
                    String yfyysr = yilianzhongData[10]; // 2020年内最近一个月营业收入
                    str.append("('" + wyid + "','" + dwdm + "','" + dwmc + "','" + sqrxm + "','" + sqrhm + "','" + sqrsj + "','" + sqsj + "','" + jfqx + "','" + shlx + "'," + jdyysr + "," + yfyysr + "),");
                }
            }
            if (!"".equals(str.toString())) { // 判断str是否为空
            	//创建数据库连接。
            	Connection conn_gauss = GaussUttils.getConnection(username, password);
                str.delete(str.length()-1, str.length()); // 去除最后一行末尾的逗号
                String datas = str.toString();
                // 将业务数据插入到库中的基础表
                GaussUttils.insertAdd_shbxhj_jcsj(conn_gauss, datas);
                str.delete(0, str.length());
                // 执行业务SQL语句
                GaussUttils.executeSqls(conn_gauss);
                //关闭数据库连接。
                try {
                	conn_gauss.close();
                } catch (SQLException e) {
                	e.printStackTrace();
                }
            }
            tx.commit();
            status=Status.READY;
        }catch (Exception e) {
            if (tx != null) {
                System.out.println("这行插入gauss失败请排查-->" + eventData);
                tx.commit();// commit to drop bad event, otherwise it will enter dead loop.
            }
        } finally {
            if (tx != null) {
                try{
                    tx.close();
                }catch (Exception e){
                    tx.commit();
                    tx.close();
                }
            }
        }
        return status;
    }

    //该方法用于读取Flume中Sink的配置,在Sink初始化时调用
    @Override
    public void configure(Context context) {
        username = context.getString("username");
        Preconditions.checkNotNull(username, "username must be set");

        password = context.getString("password");
        Preconditions.checkNotNull("dateFormat must be set");

        batchSize = context.getInteger("batchSize");
        Preconditions.checkNotNull(batchSize > 0, "batchSize must be a positive number!!");
    }

    //该方法用于Sink启动时调用
    @Override
    public synchronized void start()
    {
        super.start();
        LOG.info("GaussSink start...");
    }

    @Override
    public void stop() {
        super.stop();
        LOG.info("GaussSink stop...");
    }
}

然后在idea中执行mvn clean compile package打包并上传到flume安装目录下的lib目录下,执行flume命令即可运行

bin/flume-ng agent -c conf -f conf/taildir.conf -n a1 -Dflume.root.logger=INFO,console

注意:得把所需要的依赖包上传到flume的lib目录下,我这里的代码只用到了一个数据库的驱动包

备注:后来数据处理那块这样写比较好些,虽然batchSize参数没啥用了。因为上面的代码会导致进的数据行数只有batchSize的整数倍才会执行插入数据库的操作,没达到batchSize整数倍的数据一直在flume中实时性可能差点。

//            for(int i=0;i<batchSize;i++)
//            {
                // 使用take方法尽可能的以批量的方式从Channel中读取事件,直到没有更多的事件
                Event event =  channel.take();
                if(event == null)
                {
                    if (!"".equals(str.toString())) { // 判断str是否为空
                    	//创建数据库连接。
                    	Connection conn_gauss = GaussUttils.getConnection(username, password);
                        str.delete(str.length()-1, str.length()); // 去除最后一行末尾的逗号
                        String datas = str.toString();
                        // 将业务数据插入到gauss库中的基础表
                        GaussUttils.insertAdd_shbxhj_jcsj(conn_gauss, datas);
                        str.delete(0, str.length());
                        // 执行业务SQL语句
                        GaussUttils.executeSqls(conn_gauss);
                        //关闭数据库连接。
                        try {
                        	conn_gauss.close();
                        } catch (SQLException e) {
                        	e.printStackTrace();
                        }
                    }
                } else{
                    byte[] body = event.getBody();
                    eventData = new String(body);
                    String[] yilianzhongData= new String(body).split("\\|");
                    String wyid = yilianzhongData[0];
                    String dwdm = yilianzhongData[1];
                    String dwmc = yilianzhongData[2];
                    String sqrxm = yilianzhongData[3];
                    String sqrhm = yilianzhongData[4];
                    String sqrsj = yilianzhongData[5];
                    String sqsj = yilianzhongData[6];
                    String jfqx = yilianzhongData[7];
                    String shlx = yilianzhongData[8];
                    String jdyysr = yilianzhongData[9];
                    String yfyysr = yilianzhongData[10];
                    str.append("('" + wyid + "','" + dwdm + "','" + dwmc + "','" + sqrxm + "','" + sqrhm + "','" + sqrsj + "','" + sqsj + "','" + jfqx + "','" + shlx + "'," + jdyysr + "," + yfyysr + "),");
                }
//            }
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小强签名设计

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值