Flink HBase Sink

昨天整理了一下Flink写Elasticsearch的SinkFunction,可以支持插入写、更新写和删除写,顺便把HBase的SinkFunction也修改了一下,支持插入写和删除写。有需要的可以试试~

1. 源码

1.1. HBaseSinkFunction

package com.dengdz.sink;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;

import static com.dengdz.sink.HbaseRow.OperationType.DELETE;
import static com.dengdz.sink.HbaseRow.OperationType.INSERT;

/**
 * @author dengdz
 */
public class HBaseSinkFunction extends RichSinkFunction<HbaseRow> {
    private static final Logger LOG = LoggerFactory.getLogger(HBaseSinkFunction.class);

    private String hTableName;
    private String coreSitePath;
    private String hbaseSitePath;

    // 参考默认配置
    private final long bufferFlushMaxSizeInBytes = 2097152;
    private final long bufferFlushMaxMutations = 1000;

    private final long bufferFlushIntervalMillis = 5000;

    private transient Connection connection;
    private transient BufferedMutator mutator;

    private transient AtomicLong numPendingRequests;

    
    public HBaseSinkFunction(
            String hTableName,
            String coreSitePath,
            String hbaseSitePath) {
        this.hTableName = hTableName;
        this.coreSitePath = coreSitePath;
        this.hbaseSitePath = hbaseSitePath;
    }


    @Override
    public void open(Configuration parameters) {
        LOG.info("start open ...");
        org.apache.hadoop.conf.Configuration config = new org.apache.hadoop.conf.Configuration();

        try {


            config.addResource(new Path(coreSitePath));
            config.addResource(new Path(hbaseSitePath));


            this.numPendingRequests = new AtomicLong(0);

            if (null == connection) {
                this.connection = ConnectionFactory.createConnection(config);
            }
            // create a parameter instance, set the table name and custom listener reference.
            BufferedMutatorParams params =
                    new BufferedMutatorParams(TableName.valueOf(hTableName));
            params.writeBufferSize(bufferFlushMaxSizeInBytes);
            this.mutator = connection.getBufferedMutator(params);

        } catch (TableNotFoundException tnfe) {
            LOG.error("The table " + hTableName + " not found ", tnfe);
            throw new RuntimeException("HBase table '" + hTableName + "' not found.", tnfe);
        } catch (IOException ioe) {
            LOG.error("Exception while creating connection to HBase.", ioe);
            throw new RuntimeException("Cannot create connection to HBase.", ioe);
        }
        LOG.info("end open.");
    }


    @Override
    public void invoke(HbaseRow value, Context context) throws Exception {

        mutator.mutate(convertToMutation(value));

        // flush when the buffer number of mutations greater than the configured max size.
        if (numPendingRequests.incrementAndGet() >= bufferFlushMaxMutations) {
            flush();
        }
    }


    public Mutation convertToMutation(HbaseRow hbaseRow) throws Exception {

        HbaseRow.OperationType type = hbaseRow.getType();
        if (type == INSERT) {
            return createPutMutation(hbaseRow);
        } else if (type == DELETE) {
            return createDeleteMutation(hbaseRow);
        } else {
            throw new Exception("Unsupported message kind: " + hbaseRow.getType());
        }
    }

    private void flush() throws IOException {
        mutator.flush();
        numPendingRequests.set(0);
    }

    @Override
    public void close() throws Exception {

        if (mutator != null) {
            try {
                mutator.close();
            } catch (IOException e) {
                LOG.warn("Exception occurs while closing HBase BufferedMutator.", e);
            }
            this.mutator = null;
        }

        if (connection != null) {
            try {
                connection.close();
            } catch (IOException e) {
                LOG.warn("Exception occurs while closing HBase Connection.", e);
            }
            this.connection = null;
        }

    }


    public Put createPutMutation(HbaseRow rowData) {

        byte[] rowkey = Bytes.toBytes(rowData.getRowKey());
        if (rowkey.length == 0) {
            return null;
        }

        Put put = new Put(rowkey);
        byte[] familyBytes = Bytes.toBytes(rowData.getFamily());

        for (Map.Entry<String, Object> kv : rowData.getData().entrySet()) {

            put.addColumn(familyBytes, Bytes.toBytes(kv.getKey()), Bytes.toBytes(String.valueOf(kv.getValue())));

        }

        return put;
    }


    public Delete createDeleteMutation(HbaseRow rowData) {
        byte[] rowkey = Bytes.toBytes(rowData.getRowKey());
        if (rowkey.length == 0) {
            return null;
        }
        // delete
        Delete delete = new Delete(rowkey);
        byte[] familyBytes = Bytes.toBytes(rowData.getFamily());

        for (Map.Entry<String, Object> kv : rowData.getData().entrySet()) {

            delete.addColumn(familyBytes, Bytes.toBytes(kv.getKey()));

        }
        return delete;
    }


}


1.2. HbaseRow

package com.dengdz.sink;

import com.alibaba.fastjson.JSONObject;

/**
 * @author dengdz
 */
public class HbaseRow {
    private OperationType type;
    private String rowKey;
    private String family;
    private JSONObject data;

    public HbaseRow(OperationType type, String rowKey, String family, JSONObject data) {
        this.type = type;
        this.rowKey = rowKey;
        this.family = family;
        this.data = data;
    }

    public OperationType getType() {
        return type;
    }

    public void setType(OperationType type) {
        this.type = type;
    }

    public String getRowKey() {
        return rowKey;
    }

    public void setRowKey(String rowKey) {
        this.rowKey = rowKey;
    }

    public String getFamily() {
        return family;
    }

    public void setFamily(String family) {
        this.family = family;
    }

    public JSONObject getData() {
        return data;
    }

    public void setData(JSONObject data) {
        this.data = data;
    }

    public enum OperationType {
        INSERT, DELETE
    }
}
如果你想使用 Flink 批量将数据写入 HBase,可以自定义一个 HBaseSinkFunction。下面是一个简单的示例: ```java public class HBaseBatchSinkFunction extends RichSinkFunction<List<Tuple2<String, String>>> { private transient Connection connection; private transient BufferedMutator bufferedMutator; @Override public void open(Configuration parameters) throws Exception { Configuration config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum", "localhost"); config.set("hbase.zookeeper.property.clientPort", "2181"); config.set("zookeeper.znode.parent", "/hbase"); config.set("hbase.client.write.buffer", "10000000"); config.set("hbase.client.retries.number", "3"); connection = ConnectionFactory.createConnection(config); TableName tableName = TableName.valueOf("my_table"); BufferedMutatorParams params = new BufferedMutatorParams(tableName); params.writeBufferSize(1024 * 1024); bufferedMutator = connection.getBufferedMutator(params); } @Override public void invoke(List<Tuple2<String, String>> values, Context context) throws Exception { List<Put> puts = new ArrayList<>(); for (Tuple2<String, String> value : values) { Put put = new Put(Bytes.toBytes(value.f0)); put.addColumn(Bytes.toBytes("my_cf"), Bytes.toBytes("my_col"), Bytes.toBytes(value.f1)); puts.add(put); } bufferedMutator.mutate(puts); } @Override public void close() throws Exception { if (bufferedMutator != null) { bufferedMutator.flush(); bufferedMutator.close(); } if (connection != null) { connection.close(); } } } ``` 在这个自定义的 HBaseSinkFunction 中,我们使用 BufferedMutator 批量写入数据。在 open() 方法中,我们获取 HBase 连接和缓冲器。在 invoke() 方法中,我们将数据转换为 Put 对象,并添加到缓冲器中。最后,在 close() 方法中,我们刷新缓冲器并关闭连接。 在你的 Flink 程序中,你可以使用这个自定义的 HBaseSinkFunction,例如: ```java DataStream<Tuple2<String, String>> dataStream = ...; dataStream.addSink(new HBaseBatchSinkFunction()); ``` 这样,你就可以批量将数据写入 HBase 了。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值