这个源码也有这个是1.14的,但是1.12要用,还没改connectorname spi不要冲突了
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package connector.hbase.flink14.sink; import connector.hbase.flink14base.options.HBaseWriteOptions; import connector.hbase.flink14base.sink.HBaseSinkFunction; import connector.hbase.flink14base.sink.RowDataToMutationConverter; import connector.hbase.flink14base.util.HBaseTableSchema; import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.table.connector.ChangelogMode; import org.apache.flink.table.connector.sink.DynamicTableSink; import org.apache.flink.table.connector.sink.SinkFunctionProvider; import org.apache.flink.table.data.RowData; import org.apache.flink.types.RowKind; import org.apache.hadoop.conf.Configuration; /** * HBase table sink implementation. */ public class HBaseDynamicTableSink implements DynamicTableSink { private final String tableName; private final HBaseTableSchema hbaseTableSchema; private final Configuration hbaseConf; private final HBaseWriteOptions writeOptions; private final String nullStringLiteral; public HBaseDynamicTableSink( String tableName, HBaseTableSchema hbaseTableSchema, Configuration hbaseConf, HBaseWriteOptions writeOptions, String nullStringLiteral) { this.tableName = tableName; this.hbaseTableSchema = hbaseTableSchema; this.hbaseConf = hbaseConf; this.writeOptions = writeOptions; this.nullStringLiteral = nullStringLiteral; } @Override public SinkRuntimeProvider getSinkRuntimeProvider(Context context) { HBaseSinkFunction<RowData> sinkFunction = new HBaseSinkFunction<>( tableName, hbaseConf, new RowDataToMutationConverter(hbaseTableSchema, nullStringLiteral), writeOptions.getBufferFlushMaxSizeInBytes(), writeOptions.getBufferFlushMaxRows(), writeOptions.getBufferFlushIntervalMillis()); return SinkFunctionProvider.of(sinkFunction, writeOptions.getParallelism()); } @Override public ChangelogMode getChangelogMode(ChangelogMode requestedMode) { // UPSERT mode ChangelogMode.Builder builder = ChangelogMode.newBuilder(); for (RowKind kind : requestedMode.getContainedKinds()) { if (kind != RowKind.UPDATE_BEFORE) { builder.addContainedKind(kind); } } return builder.build(); } @Override public DynamicTableSink copy() { return new HBaseDynamicTableSink( tableName, hbaseTableSchema, hbaseConf, writeOptions, nullStringLiteral); } @Override public String asSummaryString() { return "HBase"; } // ------------------------------------------------------------------------------------------- @VisibleForTesting public HBaseTableSchema getHBaseTableSchema() { return this.hbaseTableSchema; } @VisibleForTesting public HBaseWriteOptions getWriteOptions() { return writeOptions; } @VisibleForTesting public Configuration getConfiguration() { return this.hbaseConf; } @VisibleForTesting public String getTableName() { return this.tableName; } }
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package connector.hbase.flink14.source; import connector.hbase.flink14base.source.TableInputSplit; import connector.hbase.flink14base.util.HBaseConfigurationUtil; import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.api.common.io.InputFormat; import org.apache.flink.api.common.io.LocatableInputSplitAssigner; import org.apache.flink.api.common.io.RichInputFormat; import org.apache.flink.api.common.io.statistics.BaseStatistics; import org.apache.flink.configuration.Configuration; import org.apache.flink.core.io.InputSplitAssigner; import org.apache.flink.util.IOUtils; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.List; /** Abstract {@link InputFormat} to read data from HBase tables. */ public abstract class AbstractTableInputFormat<T> extends RichInputFormat<T, TableInputSplit> { protected static final Logger LOG = LoggerFactory.getLogger(org.apache.flink.connector.hbase2.source.AbstractTableInputFormat.class); private static final long serialVersionUID = 1L; // helper variable to decide whether the input is exhausted or not protected boolean endReached = false; protected transient Connection connection = null; protected transient Table table = null; protected transient RegionLocator regionLocator = null; protected transient Scan scan = null; /** HBase iterator wrapper. */ protected ResultScanner resultScanner = null; protected byte[] currentRow; protected long scannedRows; // Configuration is not serializable protected byte[] serializedConfig; public AbstractTableInputFormat(org.apache.hadoop.conf.Configuration hConf) { serializedConfig = HBaseConfigurationUtil.serializeConfiguration(hConf); } /** * Creates a {@link Scan} object and opens the {@link HTable} connection to initialize the HBase * table. * * @throws IOException Thrown, if the connection could not be opened due to an I/O problem. */ protected abstract void initTable() throws IOException; /** * Returns an instance of Scan that retrieves the required subset of records from the HBase * table. * * @return The appropriate instance of Scan for this use case. */ protected abstract Scan getScanner(); /** * What table is to be read. * * <p>Per instance of a TableInputFormat derivative only a single table name is possible. * * @return The name of the table */ protected abstract String getTableName(); /** * HBase returns an instance of {@link Result}. * * <p>This method maps the returned {@link Result} instance into the output type {@link T}. * * @param r The Result instance from HBase that needs to be converted * @return The appropriate instance of {@link T} that contains the data of Result. */ protected abstract T mapResultToOutType(Result r); @Override public void configure(Configuration parameters) {} protected org.apache.hadoop.conf.Configuration getHadoopConfiguration() { return HBaseConfigurationUtil.deserializeConfiguration( serializedConfig, HBaseConfigurationUtil.getHBaseConfiguration()); } /** * Creates a {@link Scan} object and opens the {@link HTable} connection. The connection is * opened in this method and closed in {@link #close()}. * * @param split The split to be opened. * @throws IOException Thrown, if the spit could not be opened due to an I/O problem. */ @Override public void open(TableInputSplit split) throws IOException { initTable(); if (split == null) { throw new IOException("Input split is null!"); } logSplitInfo("opening", split); // set scan range currentRow = split.getStartRow(); scan.setStartRow(currentRow); scan.setStopRow(split.getEndRow()); resultScanner = table.getScanner(scan); endReached = false; scannedRows = 0; } @Override public T nextRecord(T reuse) throws IOException { if (resultScanner == null) { throw new IOException("No table result scanner provided!"); } Result res; try { res = resultScanner.next(); } catch (Exception e) { resultScanner.close(); // workaround for timeout on scan LOG.warn( "Error after scan of " + scannedRows + " rows. Retry with a new scanner...", e); scan.withStartRow(currentRow, false); resultScanner = table.getScanner(scan); res = resultScanner.next(); } if (res != null) { scannedRows++; currentRow = res.getRow(); return mapResultToOutType(res); } endReached = true; return null; } private void logSplitInfo(String action, TableInputSplit split) { int splitId = split.getSplitNumber(); String splitStart = Bytes.toString(split.getStartRow()); String splitEnd = Bytes.toString(split.getEndRow()); String splitStartKey = splitStart.isEmpty() ? "-" : splitStart; String splitStopKey = splitEnd.isEmpty() ? "-" : splitEnd; String[] hostnames = split.getHostnames(); LOG.info( "{} split (this={})[{}|{}|{}|{}]", action, this, splitId, hostnames, splitStartKey, splitStopKey); } @Override public boolean reachedEnd() throws IOException { return endReached; } @Override public void close() throws IOException { LOG.info("Closing split (scanned {} rows)", scannedRows); currentRow = null; IOUtils.closeQuietly(resultScanner); resultScanner = null; closeTable(); } public void closeTable() { if (table != null) { try { table.close(); } catch (IOException e) { LOG.warn("Exception occurs while closing HBase Table.", e); } table = null; } if (connection != null) { try { connection.close(); } catch (IOException e) { LOG.warn("Exception occurs while closing HBase Connection.", e); } connection = null; } } @Override public TableInputSplit[] createInputSplits(final int minNumSplits) throws IOException { try { initTable(); // Get the starting and ending row keys for every region in the currently open table final Pair<byte[][], byte[][]> keys = regionLocator.getStartEndKeys(); if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) { LOG.warn( "Unexpected region keys: {} appeared in HBase table: {}, all region information are: {}.", keys, table, regionLocator.getAllRegionLocations()); throw new IOException( "HBase Table expects at least one region in scan," + " please check the HBase table status in HBase cluster"); } final byte[] startRow = scan.getStartRow(); final byte[] stopRow = scan.getStopRow(); final boolean scanWithNoLowerBound = startRow.length == 0; final boolean scanWithNoUpperBound = stopRow.length == 0; final List<TableInputSplit> splits = new ArrayList<>(minNumSplits); for (int i = 0; i < keys.getFirst().length; i++) { final byte[] startKey = keys.getFirst()[i]; final byte[] endKey = keys.getSecond()[i]; final String regionLocation = regionLocator.getRegionLocation(startKey, false).getHostnamePort(); // Test if the given region is to be included in the InputSplit while splitting the // regions of a table if (!includeRegionInScan(startKey, endKey)) { continue; } // Find the region on which the given row is being served final String[] hosts = new String[] {regionLocation}; // Determine if regions contains keys used by the scan boolean isLastRegion = endKey.length == 0; if ((scanWithNoLowerBound || isLastRegion || Bytes.compareTo(startRow, endKey) < 0) && (scanWithNoUpperBound || Bytes.compareTo(stopRow, startKey) > 0)) { final byte[] splitStart = scanWithNoLowerBound || Bytes.compareTo(startKey, startRow) >= 0 ? startKey : startRow; final byte[] splitStop = (scanWithNoUpperBound || Bytes.compareTo(endKey, stopRow) <= 0) && !isLastRegion ? endKey : stopRow; int id = splits.size(); final TableInputSplit split = new TableInputSplit( id, hosts, table.getName().getName(), splitStart, splitStop); splits.add(split); } } LOG.info("Created " + splits.size() + " splits"); for (TableInputSplit split : splits) { logSplitInfo("created", split); } return splits.toArray(new TableInputSplit[splits.size()]); } finally { closeTable(); } } /** * Test if the given region is to be included in the scan while splitting the regions of a * table. * * @param startKey Start key of the region * @param endKey End key of the region * @return true, if this region needs to be included as part of the input (default). */ protected boolean includeRegionInScan(final byte[] startKey, final byte[] endKey) { return true; } @Override public InputSplitAssigner getInputSplitAssigner(TableInputSplit[] inputSplits) { return new LocatableInputSplitAssigner(inputSplits); } @Override public BaseStatistics getStatistics(BaseStatistics cachedStatistics) { return null; } @VisibleForTesting public Connection getConnection() { return connection; } }
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package connector.hbase.flink14.source; import connector.hbase.flink14base.options.HBaseLookupOptions; import connector.hbase.flink14base.source.AbstractHBaseDynamicTableSource; import connector.hbase.flink14base.source.HBaseRowDataLookupFunction; import connector.hbase.flink14base.util.HBaseTableSchema; import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.api.common.io.InputFormat; import org.apache.flink.table.connector.source.AsyncTableFunctionProvider; import org.apache.flink.table.connector.source.DynamicTableSource; import org.apache.flink.table.connector.source.TableFunctionProvider; import org.apache.flink.table.data.RowData; import org.apache.hadoop.conf.Configuration; import static org.apache.flink.util.Preconditions.checkArgument; /** HBase table source implementation. */ public class HBaseDynamicTableSource extends AbstractHBaseDynamicTableSource { public HBaseDynamicTableSource( Configuration conf, String tableName, HBaseTableSchema hbaseSchema, String nullStringLiteral, HBaseLookupOptions lookupOptions) { super(conf, tableName, hbaseSchema, nullStringLiteral, lookupOptions); } @Override public LookupRuntimeProvider getLookupRuntimeProvider(LookupContext context) { checkArgument( context.getKeys().length == 1 && context.getKeys()[0].length == 1, "Currently, HBase table can only be lookup by single rowkey."); checkArgument( hbaseSchema.getRowKeyName().isPresent(), "HBase schema must have a row key when used in lookup mode."); checkArgument( hbaseSchema .convertsToTableSchema() .getTableColumn(context.getKeys()[0][0]) .filter(f -> f.getName().equals(hbaseSchema.getRowKeyName().get())) .isPresent(), "Currently, HBase table only supports lookup by rowkey field."); if (lookupOptions.getLookupAsync()) { return AsyncTableFunctionProvider.of( new HBaseRowDataAsyncLookupFunction( conf, tableName, hbaseSchema, nullStringLiteral, lookupOptions)); } else { return TableFunctionProvider.of( new HBaseRowDataLookupFunction( conf, tableName, hbaseSchema, nullStringLiteral, lookupOptions)); } } @Override public DynamicTableSource copy() { return new HBaseDynamicTableSource( conf, tableName, hbaseSchema, nullStringLiteral, lookupOptions); } @Override protected InputFormat<RowData, ?> getInputFormat() { return new HBaseRowDataInputFormat(conf, tableName, hbaseSchema, nullStringLiteral); } @VisibleForTesting public HBaseLookupOptions getLookupOptions() { return this.lookupOptions; } }
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package connector.hbase.flink14.source; import connector.hbase.flink14base.options.HBaseLookupOptions; import connector.hbase.flink14base.util.HBaseConfigurationUtil; import connector.hbase.flink14base.util.HBaseSerde; import connector.hbase.flink14base.util.HBaseTableSchema; import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.metrics.Gauge; import org.apache.flink.runtime.util.ExecutorThreadFactory; import org.apache.flink.shaded.guava18.com.google.common.cache.Cache; import org.apache.flink.shaded.guava18.com.google.common.cache.CacheBuilder; import org.apache.flink.table.data.GenericRowData; import org.apache.flink.table.data.RowData; import org.apache.flink.table.functions.AsyncTableFunction; import org.apache.flink.table.functions.FunctionContext; import org.apache.flink.util.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Threads; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Collection; import java.util.Collections; import java.util.concurrent.*; /** * The HBaseRowDataAsyncLookupFunction is an implemenation to lookup HBase data by rowkey in async * fashion. It looks up the result as {@link RowData}. */ public class HBaseRowDataAsyncLookupFunction extends AsyncTableFunction<RowData> { private static final Logger LOG = LoggerFactory.getLogger(HBaseRowDataAsyncLookupFunction.class); private static final long serialVersionUID = 1L; private final String hTableName; private final byte[] serializedConfig; private final HBaseTableSchema hbaseTableSchema; private final String nullStringLiteral; private transient AsyncConnection asyncConnection; private transient AsyncTable<ScanResultConsumer> table; private transient HBaseSerde serde; private final long cacheMaxSize; private final long cacheExpireMs; private final int maxRetryTimes; private transient Cache<Object, RowData> cache; /** The size for thread pool. */ private static final int THREAD_POOL_SIZE = 16; public HBaseRowDataAsyncLookupFunction( Configuration configuration, String hTableName, HBaseTableSchema hbaseTableSchema, String nullStringLiteral, HBaseLookupOptions lookupOptions) { this.serializedConfig = HBaseConfigurationUtil.serializeConfiguration(configuration); this.hTableName = hTableName; this.hbaseTableSchema = hbaseTableSchema; this.nullStringLiteral = nullStringLiteral; this.cacheMaxSize = lookupOptions.getCacheMaxSize(); this.cacheExpireMs = lookupOptions.getCacheExpireMs(); this.maxRetryTimes = lookupOptions.getMaxRetryTimes(); } @Override public void open(FunctionContext context) { LOG.info("start open ..."); final ExecutorService threadPool = Executors.newFixedThreadPool( THREAD_POOL_SIZE, new ExecutorThreadFactory( "hbase-async-lookup-worker", Threads.LOGGING_EXCEPTION_HANDLER)); Configuration config = prepareRuntimeConfiguration(); CompletableFuture<AsyncConnection> asyncConnectionFuture = ConnectionFactory.createAsyncConnection(config); try { asyncConnection = asyncConnectionFuture.get(); table = asyncConnection.getTable(TableName.valueOf(hTableName), threadPool); this.cache = cacheMaxSize <= 0 || cacheExpireMs <= 0 ? null : CacheBuilder.newBuilder() .recordStats() .expireAfterWrite(cacheExpireMs, TimeUnit.MILLISECONDS) .maximumSize(cacheMaxSize) .build(); if (cache != null && context != null) { context.getMetricGroup() .gauge("lookupCacheHitRate", (Gauge<Double>) () -> cache.stats().hitRate()); } } catch (InterruptedException | ExecutionException e) { LOG.error("Exception while creating connection to HBase.", e); throw new RuntimeException("Cannot create connection to HBase.", e); } this.serde = new HBaseSerde(hbaseTableSchema, nullStringLiteral); LOG.info("end open."); } /** * The invoke entry point of lookup function. * * @param future The result or exception is returned. * @param rowKey the lookup key. Currently only support single rowkey. */ public void eval(CompletableFuture<Collection<RowData>> future, Object rowKey) { int currentRetry = 0; if (cache != null) { RowData cacheRowData = cache.getIfPresent(rowKey); if (cacheRowData != null) { if (cacheRowData.getArity() == 0) { future.complete(Collections.emptyList()); } else { future.complete(Collections.singletonList(cacheRowData)); } return; } } // fetch result fetchResult(future, currentRetry, rowKey); } /** * Execute async fetch result . * * @param resultFuture The result or exception is returned. * @param currentRetry Current number of retries. * @param rowKey the lookup key. */ private void fetchResult( CompletableFuture<Collection<RowData>> resultFuture, int currentRetry, Object rowKey) { Get get = serde.createGet(rowKey); CompletableFuture<Result> responseFuture = table.get(get); responseFuture.whenCompleteAsync( (result, throwable) -> { if (throwable != null) { if (throwable instanceof TableNotFoundException) { LOG.error("Table '{}' not found ", hTableName, throwable); resultFuture.completeExceptionally( new RuntimeException( "HBase table '" + hTableName + "' not found.", throwable)); } else { LOG.error( String.format( "HBase asyncLookup error, retry times = %d", currentRetry), throwable); if (currentRetry >= maxRetryTimes) { resultFuture.completeExceptionally(throwable); } else { try { Thread.sleep(1000 * currentRetry); } catch (InterruptedException e1) { resultFuture.completeExceptionally(e1); } fetchResult(resultFuture, currentRetry + 1, rowKey); } } } else { if (result.isEmpty()) { resultFuture.complete(Collections.emptyList()); if (cache != null) { cache.put(rowKey, new GenericRowData(0)); } } else { if (cache != null) { RowData rowData = serde.convertToNewRow(result); resultFuture.complete(Collections.singletonList(rowData)); cache.put(rowKey, rowData); } else { resultFuture.complete( Collections.singletonList(serde.convertToNewRow(result))); } } } }); } private Configuration prepareRuntimeConfiguration() { // create default configuration from current runtime env (`hbase-site.xml` in classpath) // first, // and overwrite configuration using serialized configuration from client-side env // (`hbase-site.xml` in classpath). // user params from client-side have the highest priority Configuration runtimeConfig = HBaseConfigurationUtil.deserializeConfiguration( serializedConfig, HBaseConfigurationUtil.getHBaseConfiguration()); // do