参考官方mysql自定义一个mysql sink connector

最新推荐文章于 2024-08-01 04:27:59 发布

weixin_34380296

最新推荐文章于 2024-08-01 04:27:59 发布

阅读量523

点赞数

文章标签：数据库 java 大数据

原文链接：https://my.oschina.net/qiangzigege/blog/2872867

版权

2019独角兽企业重金招聘Python工程师标准>>>

写4个类，比如我的是下面4个类

备注：因为我司用的是内部的zebra框架，所以代码自行替换连接池为druid等.

参数也请自行替换

1）MyJDBCAppendTableSink.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.api.java.io.jdbc;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.table.sinks.AppendStreamTableSink;
import org.apache.flink.table.sinks.BatchTableSink;
import org.apache.flink.table.sinks.TableSink;
import org.apache.flink.table.util.TableConnectorUtil;
import org.apache.flink.types.Row;
import org.apache.flink.util.InstantiationUtil;
import org.apache.flink.util.Preconditions;

import java.io.IOException;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

/**
 * An at-least-once Table sink for JDBC.
 *
 * <p>The mechanisms of Flink guarantees delivering messages at-least-once to this sink (if
 * checkpointing is enabled). However, one common use case is to run idempotent queries
 * (e.g., <code>REPLACE</code> or <code>INSERT OVERWRITE</code>) to upsert into the database and
 * achieve exactly-once semantic.</p>
 */
@SuppressWarnings("rawtypes")
public class MyJDBCAppendTableSink implements AppendStreamTableSink<Row>, BatchTableSink<Row> {

    private final MyJDBCOutputFormat outputFormat;

    private String[] fieldNames;
    private TypeInformation[] fieldTypes;

    MyJDBCAppendTableSink(MyJDBCOutputFormat outputFormat) {
        this.outputFormat = outputFormat;
    }

    public static MyJDBCAppendTableSinkBuilder builder() {
        return new MyJDBCAppendTableSinkBuilder();
    }

    @Override
    public void emitDataStream(DataStream<Row> dataStream) {
        dataStream
                .addSink(new MyJDBCSinkFunction(outputFormat))
                .name(TableConnectorUtil.generateRuntimeName(this.getClass(), fieldNames));
    }

    @Override
    public void emitDataSet(DataSet<Row> dataSet) {
        dataSet.output(outputFormat);
    }

    @Override
    public TypeInformation<Row> getOutputType() {
        return new RowTypeInfo(fieldTypes, fieldNames);
    }

    @Override
    public String[] getFieldNames() {
        return fieldNames;
    }

    @Override
    public TypeInformation<?>[] getFieldTypes() {
        return fieldTypes;
    }

    @Override
    public TableSink<Row> configure(String[] fieldNames, TypeInformation<?>[] fieldTypes) {
        int[] types = outputFormat.getTypesArray();

        String sinkSchema =
            String.join(", ", IntStream.of(types).mapToObj(JDBCTypeUtil::getTypeName).collect(Collectors.toList()));
        String tableSchema =
            String.join(", ", Stream.of(fieldTypes).map(JDBCTypeUtil::getTypeName).collect(Collectors.toList()));
        String msg = String.format("Schema of output table is incompatible with JDBCAppendTableSink schema. " +
            "Table schema: [%s], sink schema: [%s]", tableSchema, sinkSchema);

        Preconditions.checkArgument(fieldTypes.length == types.length, msg);
        for (int i = 0; i < types.length; ++i) {
            Preconditions.checkArgument(
                JDBCTypeUtil.typeInformationToSqlType(fieldTypes[i]) == types[i],
                msg);
        }

        MyJDBCAppendTableSink copy;
        try {
            copy = new MyJDBCAppendTableSink(InstantiationUtil.clone(outputFormat));
        } catch (IOException | ClassNotFoundException e) {
            throw new RuntimeException(e);
        }

        copy.fieldNames = fieldNames;
        copy.fieldTypes = fieldTypes;
        return copy;
    }

    @VisibleForTesting
    MyJDBCOutputFormat getOutputFormat() {
        return outputFormat;
    }
}

2)MyJDBCAppendTableSinkBuilder.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.api.java.io.jdbc;

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Preconditions;

/**
 * A builder to configure and build the JDBCAppendTableSink.
 */
public class MyJDBCAppendTableSinkBuilder {
    private String query;
    private int[]  parameterTypes;
    private String jdbcRef;

    public MyJDBCAppendTableSinkBuilder setJdbcRef(String ref) {
        this.jdbcRef = ref;
        return this;
    }

    /**
     * Specify the query that the sink will execute. Usually user can specify
     * INSERT, REPLACE or UPDATE to push the data to the database.
     * @param query The query to be executed by the sink.
     * @see org.apache.flink.api.java.io.jdbc.JDBCOutputFormat.JDBCOutputFormatBuilder#setQuery(String)
     */
    public MyJDBCAppendTableSinkBuilder setQuery(String query) {
        this.query = query;
        return this;
    }

    /**
     * Specify the type of the rows that the sink will be accepting.
     * @param types the type of each field
     */
    public MyJDBCAppendTableSinkBuilder setParameterTypes(TypeInformation<?>... types) {
        int[] ty = new int[types.length];
        for (int i = 0; i < types.length; ++i) {
            ty[i] = JDBCTypeUtil.typeInformationToSqlType(types[i]);
        }
        this.parameterTypes = ty;
        return this;
    }

    /**
     * Specify the type of the rows that the sink will be accepting.
     * @param types the type of each field defined by {@see java.sql.Types}.
     */
    public MyJDBCAppendTableSinkBuilder setParameterTypes(int... types) {
        this.parameterTypes = types;
        return this;
    }

    /**
     * Finalizes the configuration and checks validity.
     *
     * @return Configured JDBCOutputFormat
     */
    public MyJDBCAppendTableSink build() {
        Preconditions.checkNotNull(
            parameterTypes, "Types of the query parameters are not specified."
                            + " Please specify types using the setParameterTypes() method.");

        MyJDBCOutputFormat format = MyJDBCOutputFormat.buildJDBCOutputFormat().setQuery(query)
            .setSqlTypes(parameterTypes).setJdbcRef(jdbcRef).finish();

        return new MyJDBCAppendTableSink(format);
    }
}

3)MyJDBCOutputFormat.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.api.java.io.jdbc;

import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.types.Row;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.dianping.zebra.group.jdbc.GroupDataSource;

import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;

/**
 * OutputFormat to write Rows into a JDBC database.
 * The OutputFormat has to be configured using the supplied OutputFormatBuilder.
 *
 * @see Row
 * @see DriverManager
 */
public class MyJDBCOutputFormat extends RichOutputFormat<Row> {

    /**  */
    private static final long   serialVersionUID = -6949239510340172802L;

    private static final Logger LOG              = LoggerFactory.getLogger(JDBCOutputFormat.class);

    private int[]               typesArray;
    private String              sql;
    private String              jdbcRef;
    private GroupDataSource     groupDataSource;

    public MyJDBCOutputFormat() {
    }

    @Override
    public void configure(Configuration parameters) {
        LOG.info("configure function invoked , thread -[{}]", Thread.currentThread().getName());
    }

    /**
     * Connects to the target database and initializes the prepared statement.
     *
     * @param taskNumber The number of the parallel instance.
     * @throws IOException Thrown, if the output could not be opened due to an
     * I/O problem.
     */
    @Override
    public void open(int taskNumber, int numTasks) throws IOException {
        //老代码
        //        try {
        //            establishConnection();
        //            upload = dbConn.prepareStatement(query);
        //        } catch (SQLException sqe) {
        //            throw new IllegalArgumentException("open() failed.", sqe);
        //        } catch (ClassNotFoundException cnfe) {
        //            throw new IllegalArgumentException("JDBC driver class not found.", cnfe);
        //        }
        GroupDataSource dataSource = new GroupDataSource();
        dataSource.setJdbcRef(jdbcRef);
        dataSource.init();
        this.groupDataSource = dataSource;
        LOG.info(
            "open function invoked , taskNumber-[{}], numTasks-[{}], jdbcRef-[{}] ,thread -[{}]",
            taskNumber, numTasks, jdbcRef, Thread.currentThread().getName());
    }

    //private void establishConnection() throws SQLException, ClassNotFoundException {
    //废弃
    //        Class.forName(drivername);
    //        if (username == null) {
    //            dbConn = DriverManager.getConnection(dbURL);
    //        } else {
    //            dbConn = DriverManager.getConnection(dbURL, username, password);
    //        }
    //}

    /**
     * Adds a record to the prepared statement.
     *
     * <p>When this method is called, the output format is guaranteed to be opened.
     *
     * <p>WARNING: this may fail when no column types specified (because a best effort approach is attempted in order to
     * insert a null value but it's not guaranteed that the JDBC driver handles PreparedStatement.setObject(pos, null))
     *
     * @param row The records to add to the output.
     * @see PreparedStatement
     * @throws IOException Thrown, if the records could not be added due to an I/O problem.
     */
    @Override
    public void writeRecord(Row row) throws IOException {
        //
        Connection dbConn = null;
        PreparedStatement preparedStatement = null;
        //
        try {
            if (typesArray != null && typesArray.length > 0
                && typesArray.length != row.getArity()) {
                LOG.warn(
                    "Column SQL types array doesn't match arity of passed Row! Check the passed array...");
            }
            dbConn = groupDataSource.getConnection();
            preparedStatement = dbConn.prepareStatement(sql);
            if (typesArray == null) {
                // no types provided
                for (int index = 0; index < row.getArity(); index++) {
                    LOG.warn(
                        "Unknown column type for column {}. Best effort approach to set its value: {}.",
                        index + 1, row.getField(index));
                    preparedStatement.setObject(index + 1, row.getField(index));
                }
            } else {
                // types provided
                for (int index = 0; index < row.getArity(); index++) {

                    if (row.getField(index) == null) {
                        preparedStatement.setNull(index + 1, typesArray[index]);
                    } else {
                        // casting values as suggested by http://docs.oracle.com/javase/1.5.0/docs/guide/jdbc/getstart/mapping.html
                        switch (typesArray[index]) {
                            case java.sql.Types.NULL:
                                preparedStatement.setNull(index + 1, typesArray[index]);
                                break;
                            case java.sql.Types.BOOLEAN:
                            case java.sql.Types.BIT:
                                preparedStatement.setBoolean(index + 1,
                                    (boolean) row.getField(index));
                                break;
                            case java.sql.Types.CHAR:
                            case java.sql.Types.NCHAR:
                            case java.sql.Types.VARCHAR:
                            case java.sql.Types.LONGVARCHAR:
                            case java.sql.Types.LONGNVARCHAR:
                                preparedStatement.setString(index + 1,
                                    (String) row.getField(index));
                                break;
                            case java.sql.Types.TINYINT:
                                preparedStatement.setByte(index + 1, (byte) row.getField(index));
                                break;
                            case java.sql.Types.SMALLINT:
                                preparedStatement.setShort(index + 1, (short) row.getField(index));
                                break;
                            case java.sql.Types.INTEGER:
                                preparedStatement.setInt(index + 1, (int) row.getField(index));
                                break;
                            case java.sql.Types.BIGINT:
                                preparedStatement.setLong(index + 1, (long) row.getField(index));
                                break;
                            case java.sql.Types.REAL:
                                preparedStatement.setFloat(index + 1, (float) row.getField(index));
                                break;
                            case java.sql.Types.FLOAT:
                            case java.sql.Types.DOUBLE:
                                preparedStatement.setDouble(index + 1,
                                    (double) row.getField(index));
                                break;
                            case java.sql.Types.DECIMAL:
                            case java.sql.Types.NUMERIC:
                                preparedStatement.setBigDecimal(index + 1,
                                    (java.math.BigDecimal) row.getField(index));
                                break;
                            case java.sql.Types.DATE:
                                preparedStatement.setDate(index + 1,
                                    (java.sql.Date) row.getField(index));
                                break;
                            case java.sql.Types.TIME:
                                preparedStatement.setTime(index + 1,
                                    (java.sql.Time) row.getField(index));
                                break;
                            case java.sql.Types.TIMESTAMP:
                                preparedStatement.setTimestamp(index + 1,
                                    (java.sql.Timestamp) row.getField(index));
                                break;
                            case java.sql.Types.BINARY:
                            case java.sql.Types.VARBINARY:
                            case java.sql.Types.LONGVARBINARY:
                                preparedStatement.setBytes(index + 1, (byte[]) row.getField(index));
                                break;
                            default:
                                preparedStatement.setObject(index + 1, row.getField(index));
                                LOG.warn(
                                    "Unmanaged sql type ({}) for column {}. Best effort approach to set its value: {}.",
                                    typesArray[index], index + 1, row.getField(index));
                                // case java.sql.Types.SQLXML
                                // case java.sql.Types.ARRAY:
                                // case java.sql.Types.JAVA_OBJECT:
                                // case java.sql.Types.BLOB:
                                // case java.sql.Types.CLOB:
                                // case java.sql.Types.NCLOB:
                                // case java.sql.Types.DATALINK:
                                // case java.sql.Types.DISTINCT:
                                // case java.sql.Types.OTHER:
                                // case java.sql.Types.REF:
                                // case java.sql.Types.ROWID:
                                // case java.sql.Types.STRUC
                        }
                    }
                }
            }
            if (false == preparedStatement.execute()) {
                LOG.error("execute preparedStatement fail !!!");
            }
        } catch (SQLException e) {
            throw new RuntimeException("Preparation/Execution of JDBC statement failed.", e);
        } finally {
            //释放preparedStatement
            if (null != preparedStatement) {
                try {
                    preparedStatement.close();
                    preparedStatement = null;
                } catch (SQLException e) {

                }
            }
            //释放dbConn
            if (null != dbConn) {
                try {
                    dbConn.close();//释放到连接池,此逻辑由zebra保证
                    dbConn = null;
                } catch (SQLException e) {

                }
            }
        }

    }

    void flush() {
    }

    public int[] getTypesArray() {
        return typesArray;
    }

    /**
     * Executes prepared statement and closes all resources of this instance.
     *
     * @throws IOException Thrown, if the input could not be closed properly.
     */
    @Override
    public void close() throws IOException {
        if (null != groupDataSource) {
            try {
                groupDataSource.close();
            } catch (SQLException e) {
                LOG.error("", e);
            } finally {
                groupDataSource = null;//help GC
            }
        }
    }

    public static MyJDBCOutputFormatBuilder buildJDBCOutputFormat() {
        return new MyJDBCOutputFormatBuilder();
    }

    /**
     * Builder for a {@link JDBCOutputFormat}.
     */
    public static class MyJDBCOutputFormatBuilder {
        private final MyJDBCOutputFormat format;

        protected MyJDBCOutputFormatBuilder() {
            this.format = new MyJDBCOutputFormat();
        }

        public MyJDBCOutputFormatBuilder setQuery(String query) {
            format.sql = query;
            return this;
        }

        public MyJDBCOutputFormatBuilder setSqlTypes(int[] typesArray) {
            format.typesArray = typesArray;
            return this;
        }

        public MyJDBCOutputFormatBuilder setJdbcRef(String ref) {
            format.jdbcRef = ref;
            return this;
        }

        /**
         * Finalizes the configuration and checks validity.
         *
         * @return Configured MyJDBCOutputFormat
         */
        public MyJDBCOutputFormat finish() {
            if (format.sql == null) {
                throw new IllegalArgumentException("No sql statement supplied.");
            }
            return format;
        }
    }

}

4)MyJDBCSinkFunction.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.api.java.io.jdbc;

import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.types.Row;

class MyJDBCSinkFunction extends RichSinkFunction<Row> implements CheckpointedFunction {
    /**  */
    private static final long serialVersionUID = 2120156461628723467L;
    final MyJDBCOutputFormat outputFormat;

    MyJDBCSinkFunction(MyJDBCOutputFormat outputFormat) {
        this.outputFormat = outputFormat;
    }

    @Override
    public void invoke(Row value) throws Exception {
        outputFormat.writeRecord(value);
    }

    @Override
    public void snapshotState(FunctionSnapshotContext context) throws Exception {
        outputFormat.flush();
    }

    @Override
    public void initializeState(FunctionInitializationContext context) throws Exception {
    }

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        RuntimeContext ctx = getRuntimeContext();
        outputFormat.setRuntimeContext(ctx);
        outputFormat.open(ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks());
    }

    @Override
    public void close() throws Exception {
        outputFormat.close();
        super.close();
    }
}

转载于:https://my.oschina.net/qiangzigege/blog/2872867