FlinkSQL自定义TableSink

以自定义华为云DLISink为例,写一个自定义Sink
从StreamTableSinkFactory开始,进行声明一个Sink,并且指定我们想要的参数

import com.qsc.sql.sinks.DLITableSink;
import com.qsc.sql.sinks.OdpsTableSink;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.descriptors.DescriptorProperties;
import org.apache.flink.table.factories.StreamTableSinkFactory;
import org.apache.flink.table.sinks.StreamTableSink;
import org.apache.flink.types.Row;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.apache.flink.table.descriptors.Schema.*;

/**
 * @author chunlong_liu
 * @description
 * @date 2021/9/29
 */
public class DLISinkFactory implements StreamTableSinkFactory<Row> {

    @Override
    public StreamTableSink<Row> createStreamTableSink(Map<String, String> properties) {
        DescriptorProperties params = new DescriptorProperties();
        params.putProperties(properties);
        TableSchema tableSchema = params.getTableSchema(SCHEMA);
        DLITableSink dliTableSink = new DLITableSink(properties);
        return (OdpsTableSink) dliTableSink.configure(tableSchema.getFieldNames(), tableSchema.getFieldTypes());
    }

    @Override
    public Map<String, String> requiredContext() {
        Map<String, String> context = new HashMap<>();
        context.put("connector.type", "dli");
        return context;
    }

    @Override
    public List<String> supportedProperties() {
        List<String> list = new ArrayList<>();
        list.add("connector.type");
        // accessid
        list.add("connector.dli.accessid");
        // accesskey
        list.add("connector.dli.accesskey");
        // region name
        list.add("connector.dli.region.name");
        // project id
        list.add("connector.dli.projectid");
        // queue name
        list.add("connector.dli.queue.name");
        // database
        list.add("connector.dli.database");
        // table
        list.add("connector.dli.table");
        // flush max size
        list.add("connector.odps.flush.max.size");
        // flush max time
        list.add("connector.odps.flush.max.time");
        // schema
        list.add(SCHEMA + ".#." + SCHEMA_TYPE);
        list.add(SCHEMA + ".#." + SCHEMA_NAME);
        return list;
    }
}

编写DLITableSink,根据传入的参数解析配置,生成connect

import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.table.sinks.AppendStreamTableSink;
import org.apache.flink.table.sinks.TableSink;
import org.apache.flink.table.types.DataType;
import org.apache.flink.types.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Map;

import static org.apache.flink.table.types.utils.TypeConversions.fromLegacyInfoToDataType;

/**
 * @author chunlong_liu
 * @description
 * @date 2021/9/29
 */
public class DLITableSink implements AppendStreamTableSink<Row> {

    public static Logger logger = LoggerFactory.getLogger(OdpsTableSink.class);

    private String[] fieldNames;
    private TypeInformation<?>[] fieldTypes;

    private final Map<String, String> properties;

    public DLITableSink(Map<String, String> properties) {
        this.properties = properties;
    }

    @Override
    public void emitDataStream(DataStream<Row> dataStream) {
        consumeDataStream(dataStream);
    }

    @Override
    public TableSink<Row> configure(String[] fieldNames, TypeInformation<?>[] fieldTypes) {
        DLITableSink configuredSink = new DLITableSink(properties);
        configuredSink.fieldNames = fieldNames;
        configuredSink.fieldTypes = fieldTypes;
        return configuredSink;
    }

    @Override
    public String[] getFieldNames() {
        return fieldNames;
    }

    @Override
    public TypeInformation<?>[] getFieldTypes() {
        return fieldTypes;
    }

    @Override
    public TypeInformation<Row> getOutputType() {
        return Types.ROW_NAMED(fieldNames, fieldTypes);
    }

    @Override
    public DataType getConsumedDataType() {
        final RowTypeInfo rowTypeInfo = new RowTypeInfo(getFieldTypes(), getFieldNames());
        return fromLegacyInfoToDataType(rowTypeInfo);
    }

    @Override
    public DataStreamSink<?> consumeDataStream(DataStream<Row> dataStream) {
        // accessId
        String accessId = this.properties.get("connector.dli.accessid");
        // accessKey
        String accessKey = this.properties.get("connector.dli.accesskey");
        // odps project
        String dliRegionName = this.properties.get("connector.dli.region.name");
        // tunnelUrl
        String dliProjectId = this.properties.get("connector.dli.projectid");
        // queue_name
        String dliQueueName = this.properties.get("connector.dli.queue.name");
        // database
        String dliDatabase = this.properties.get("connector.dli.database");
        // table
        String dliTable = this.properties.get("connector.dli.table");
        // 数据落地相关配置
        String flushMaxSizeStr = this.properties.get("connector.dli.flush.max.size");
        Long flushMaxSize = 1L;
        if (StringUtils.isNotBlank(flushMaxSizeStr)) {
            flushMaxSize = Long.valueOf(flushMaxSizeStr);
        }
        String flushMaxTimeStr = this.properties.get("connector.dli.flush.max.time");
        Long flushMaxTime = 0L;
        if (StringUtils.isNotBlank(flushMaxSizeStr)) {
            flushMaxTime = Long.valueOf(flushMaxTimeStr);
        }
        return dataStream.addSink(new DLISink(accessId, accessKey, dliRegionName, dliProjectId, dliQueueName, dliDatabase, dliTable, flushMaxSize, flushMaxTime));
    }

}

编写DLISink,进行存储到DLI上

import com.huawei.dli.sdk.DLIClient;
import com.huawei.dli.sdk.Queue;
import com.huawei.dli.sdk.UploadJob;
import com.huawei.dli.sdk.Writer;
import com.huawei.dli.sdk.authentication.AuthenticationMode;
import com.huawei.dli.sdk.common.DLIInfo;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.util.ExecutorThreadFactory;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.table.shaded.org.joda.time.DateTime;
import org.apache.flink.types.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;

/**
 * @author chunlong_liu
 * @description
 * @date 2021/9/29
 */
public class DLISink extends RichSinkFunction<Row> {

    private Logger logger = LoggerFactory.getLogger(DLISink.class);

    // odps账号相关信息
    private String accessId;
    private String accessKey;
    private String dliRegionName;
    private String dliProjectId;
    private String dliQueueName;
    private String dliDatabase;
    private String dliTable;
    // 刷新数据相关配置
    private Long flushMaxSize;
    private Long flushMaxTime;
    // 调度线程
    private ScheduledExecutorService scheduler;

    private DLIClient client;

    private Queue queue;

    // 缓存数据,用于批量更新
    private List<String> tmpDataList = new ArrayList<>();

    public DLISink() {
    }

    public DLISink(String accessId, String accessKey, String dliRegionName, String dliProjectId, String dliQueueName, String dliDatabase, String dliTable, Long flushMaxSize, Long flushMaxTime) {
        this.accessId = accessId;
        this.accessKey = accessKey;
        this.dliRegionName = dliRegionName;
        this.dliProjectId = dliProjectId;
        this.dliQueueName = dliQueueName;
        this.dliDatabase = dliDatabase;
        this.dliTable = dliTable;
        this.flushMaxSize = flushMaxSize;
        this.flushMaxTime = flushMaxTime;
    }

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        DLIInfo dliInfo = new DLIInfo(this.dliRegionName, this.accessId, this.accessKey, this.dliProjectId);
        this.client = new DLIClient(AuthenticationMode.AKSK, dliInfo);
        this.queue = client.getQueue(this.dliQueueName);

        if (flushMaxTime != 0) {
            this.scheduler = Executors.newScheduledThreadPool(
                    1, new ExecutorThreadFactory("jdbc-upsert-output-format"));
            this.scheduler.scheduleWithFixedDelay(() -> {
                synchronized (DLISink.this) {
                    flush();
                }
            }, flushMaxTime, flushMaxTime, TimeUnit.MILLISECONDS);
        }
    }

    @Override
    public void invoke(Row value, Context context) throws Exception {
        if (tmpDataList.size() >= flushMaxSize) {
            synchronized (DLISink.this) {
                flush();
            }
        }
        tmpDataList.add(value.getField(0).toString());
    }

    private void flush() {
        String dt = DateTime.now().toString("yyyyMMdd");
        int dataBlockSize = 67_108_864; // 64MB
        int flushRecords = 5000;
        try {
            String part1Spec2 = "pt='"+dt+"'";//分区
            UploadJob uploadJob = new UploadJob(this.queue, this.dliDatabase, this.dliTable, part1Spec2, false);
            uploadJob.setWriterDataBlockSize(dataBlockSize);
            Writer writer = uploadJob.createWriter();
            com.huawei.dli.sdk.common.Row row = uploadJob.newRow();
            int count = 0;
            Iterator<String> iterator = tmpDataList.iterator();
            while(iterator.hasNext()) {
                count++;
                row.setString(0, iterator.next());
                writer.write(row);
                iterator.remove();
                if (count % flushRecords == 0) {
                    writer.flush();
                }
            }
            writer.flush();
            writer.close();
            uploadJob.beginCommit();
        } catch (Exception e) {
            throw new RuntimeException("写入dli错误", e);
        }
    }

    @Override
    public void close() throws Exception {
        super.close();
        if (this.scheduler != null) {
            this.scheduler.shutdown();
        }

    }
}

不要忘了还有一点
在resource目录下创建目录:
META-INF/services/org.apache.flink.table.factories.TableFactory
声明这个方法

com.xxx.sql.factory.DLISinkFactory
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值