这个源码也有这个是1.14的,但是1.12要用,还没改connectorname spi不要冲突了
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package connector.hbase.flink14.sink;
import connector.hbase.flink14base.options.HBaseWriteOptions;
import connector.hbase.flink14base.sink.HBaseSinkFunction;
import connector.hbase.flink14base.sink.RowDataToMutationConverter;
import connector.hbase.flink14base.util.HBaseTableSchema;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.connector.sink.SinkFunctionProvider;
import org.apache.flink.table.data.RowData;
import org.apache.flink.types.RowKind;
import org.apache.hadoop.conf.Configuration;
/**
* HBase table sink implementation.
*/
public class HBaseDynamicTableSink implements DynamicTableSink {
private final String tableName;
private final HBaseTableSchema hbaseTableSchema;
private final Configuration hbaseConf;
private final HBaseWriteOptions writeOptions;
private final String nullStringLiteral;
public HBaseDynamicTableSink(
String tableName,
HBaseTableSchema hbaseTableSchema,
Configuration hbaseConf,
HBaseWriteOptions writeOptions,
String nullStringLiteral) {
this.tableName = tableName;
this.hbaseTableSchema = hbaseTableSchema;
this.hbaseConf = hbaseConf;
this.writeOptions = writeOptions;
this.nullStringLiteral = nullStringLiteral;
}
@Override
public SinkRuntimeProvider getSinkRuntimeProvider(Context context) {
HBaseSinkFunction<RowData> sinkFunction =
new HBaseSinkFunction<>(
tableName,
hbaseConf,
new RowDataToMutationConverter(hbaseTableSchema, nullStringLiteral),
writeOptions.getBufferFlushMaxSizeInBytes(),
writeOptions.getBufferFlushMaxRows(),
writeOptions.getBufferFlushIntervalMillis());
return SinkFunctionProvider.of(sinkFunction, writeOptions.getParallelism());
}
@Override
public ChangelogMode getChangelogMode(ChangelogMode requestedMode) {
// UPSERT mode
ChangelogMode.Builder builder = ChangelogMode.newBuilder();
for (RowKind kind : requestedMode.getContainedKinds()) {
if (kind != RowKind.UPDATE_BEFORE) {
builder.addContainedKind(kind);
}
}
return builder.build();
}
@Override
public DynamicTableSink copy() {
return new HBaseDynamicTableSink(
tableName, hbaseTableSchema, hbaseConf, writeOptions, nullStringLiteral);
}
@Override
public String asSummaryString() {
return "HBase";
}
// -------------------------------------------------------------------------------------------
@VisibleForTesting
public HBaseTableSchema getHBaseTableSchema() {
return this.hbaseTableSchema;
}
@VisibleForTesting
public HBaseWriteOptions getWriteOptions() {
return writeOptions;
}
@VisibleForTesting
public Configuration getConfiguration() {
return this.hbaseConf;
}
@VisibleForTesting
public String getTableName() {
return this.tableName;
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package connector.hbase.flink14.source;
import connector.hbase.flink14base.source.TableInputSplit;
import connector.hbase.flink14base.util.HBaseConfigurationUtil;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.LocatableInputSplitAssigner;
import org.apache.flink.api.common.io.RichInputFormat;
import org.apache.flink.api.common.io.statistics.BaseStatistics;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.io.InputSplitAssigner;
import org.apache.flink.util.IOUtils;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/** Abstract {@link InputFormat} to read data from HBase tables. */
public abstract class AbstractTableInputFormat<T> extends RichInputFormat<T, TableInputSplit> {
protected static final Logger LOG = LoggerFactory.getLogger(org.apache.flink.connector.hbase2.source.AbstractTableInputFormat.class);
private static final long serialVersionUID = 1L;
// helper variable to decide whether the input is exhausted or not
protected boolean endReached = false;
protected transient Connection connection = null;
protected transient Table table = null;
protected transient RegionLocator regionLocator = null;
protected transient Scan scan = null;
/** HBase iterator wrapper. */
protected ResultScanner resultScanner = null;
protected byte[] currentRow;
protected long scannedRows;
// Configuration is not serializable
protected byte[] serializedConfig;
public AbstractTableInputFormat(org.apache.hadoop.conf.Configuration hConf) {
serializedConfig = HBaseConfigurationUtil.serializeConfiguration(hConf);
}
/**
* Creates a {@link Scan} object and opens the {@link HTable} connection to initialize the HBase
* table.
*
* @throws IOException Thrown, if the connection could not be opened due to an I/O problem.
*/
protected abstract void initTable() throws IOException;
/**
* Returns an instance of Scan that retrieves the required subset of records from the HBase
* table.
*
* @return The appropriate instance of Scan for this use case.
*/
protected abstract Scan getScanner();
/**
* What table is to be read.
*
* <p>Per instance of a TableInputFormat derivative only a single table name is possible.
*
* @return The name of the table
*/
protected abstract String getTableName();
/**
* HBase returns an instance of {@link Result}.
*
* <p>This method maps the returned {@link Result} instance into the output type {@link T}.
*
* @param r The Result instance from HBase that needs to be converted
* @return The appropriate instance of {@link T} that contains the data of Result.
*/
protected abstract T mapResultToOutType(Result r);
@Override
public void configure(Configuration parameters) {}
protected org.apache.hadoop.conf.Configuration getHadoopConfiguration() {
return HBaseConfigurationUtil.deserializeConfiguration(
serializedConfig, HBaseConfigurationUtil.getHBaseConfiguration());
}
/**
* Creates a {@link Scan} object and opens the {@link HTable} connection. The connection is
* opened in this method and closed in {@link #close()}.
*
* @param split The split to be opened.
* @throws IOException Thrown, if the spit could not be opened due to an I/O problem.
*/
@Override
public void open(TableInputSplit split) throws IOException {
initTable();
if (split == null) {
throw new IOException("Input split is null!");
}
logSplitInfo("opening", split);
// set scan range
currentRow = split.getStartRow();
scan.setStartRow(currentRow);
scan.setStopRow(split.getEndRow());
resultScanner = table.getScanner(scan);
endReached = false;
scannedRows = 0;
}
@Override
public T nextRecord(T reuse) throws IOException {
if (resultScanner == null) {
throw new IOException("No table result scanner provided!");
}
Result res;
try {
res = resultScanner.next();
} catch (Exception e) {
resultScanner.close();
// workaround for timeout on scan
LOG.warn(
"Error after scan of " + scannedRows + " rows. Retry with a new scanner...", e);
scan.withStartRow(currentRow, false);
resultScanner = table.getScanner(scan);
res = resultScanner.next();
}
if (res != null) {
scannedRows++;
currentRow = res.getRow();
return mapResultToOutType(res);
}
endReached = true;
return null;
}
private void logSplitInfo(String action, TableInputSplit split) {
int splitId = split.getSplitNumber();
String splitStart = Bytes.toString(split.getStartRow());
String splitEnd = Bytes.toString(split.getEndRow());
String splitStartKey = splitStart.isEmpty() ? "-" : splitStart;
String splitStopKey = splitEnd.isEmpty() ? "-" : splitEnd;
String[] hostnames = split.getHostnames();
LOG.info(
"{} split (this={})[{}|{}|{}|{}]",
action,
this,
splitId,
hostnames,
splitStartKey,
splitStopKey);
}
@Override
public boolean reachedEnd() throws IOException {
return endReached;
}
@Override
public void close() throws IOException {
LOG.info("Closing split (scanned {} rows)", scannedRows);
currentRow = null;
IOUtils.closeQuietly(resultScanner);
resultScanner = null;
closeTable();
}
public void closeTable() {
if (table != null) {
try {
table.close();
} catch (IOException e) {
LOG.warn("Exception occurs while closing HBase Table.", e);
}
table = null;
}
if (connection != null) {
try {
connection.close();
} catch (IOException e) {
LOG.warn("Exception occurs while closing HBase Connection.", e);
}
connection = null;
}
}
@Override
public TableInputSplit[] createInputSplits(final int minNumSplits) throws IOException {
try {
initTable();
// Get the starting and ending row keys for every region in the currently open table
final Pair<byte[][], byte[][]> keys = regionLocator.getStartEndKeys();
if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) {
LOG.warn(
"Unexpected region keys: {} appeared in HBase table: {}, all region information are: {}.",
keys,
table,
regionLocator.getAllRegionLocations());
throw new IOException(
"HBase Table expects at least one region in scan,"
+ " please check the HBase table status in HBase cluster");
}
final byte[] startRow = scan.getStartRow();
final byte[] stopRow = scan.getStopRow();
final boolean scanWithNoLowerBound = startRow.length == 0;
final boolean scanWithNoUpperBound = stopRow.length == 0;
final List<TableInputSplit> splits = new ArrayList<>(minNumSplits);
for (int i = 0; i < keys.getFirst().length; i++) {
final byte[] startKey = keys.getFirst()[i];
final byte[] endKey = keys.getSecond()[i];
final String regionLocation =
regionLocator.getRegionLocation(startKey, false).getHostnamePort();
// Test if the given region is to be included in the InputSplit while splitting the
// regions of a table
if (!includeRegionInScan(startKey, endKey)) {
continue;
}
// Find the region on which the given row is being served
final String[] hosts = new String[] {regionLocation};
// Determine if regions contains keys used by the scan
boolean isLastRegion = endKey.length == 0;
if ((scanWithNoLowerBound || isLastRegion || Bytes.compareTo(startRow, endKey) < 0)
&& (scanWithNoUpperBound || Bytes.compareTo(stopRow, startKey) > 0)) {
final byte[] splitStart =
scanWithNoLowerBound || Bytes.compareTo(startKey, startRow) >= 0
? startKey
: startRow;
final byte[] splitStop =
(scanWithNoUpperBound || Bytes.compareTo(endKey, stopRow) <= 0)
&& !isLastRegion
? endKey
: stopRow;
int id = splits.size();
final TableInputSplit split =
new TableInputSplit(
id, hosts, table.getName().getName(), splitStart, splitStop);
splits.add(split);
}
}
LOG.info("Created " + splits.size() + " splits");
for (TableInputSplit split : splits) {
logSplitInfo("created", split);
}
return splits.toArray(new TableInputSplit[splits.size()]);
} finally {
closeTable();
}
}
/**
* Test if the given region is to be included in the scan while splitting the regions of a
* table.
*
* @param startKey Start key of the region
* @param endKey End key of the region
* @return true, if this region needs to be included as part of the input (default).
*/
protected boolean includeRegionInScan(final byte[] startKey, final byte[] endKey) {
return true;
}
@Override
public InputSplitAssigner getInputSplitAssigner(TableInputSplit[] inputSplits) {
return new LocatableInputSplitAssigner(inputSplits);
}
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStatistics) {
return null;
}
@VisibleForTesting
public Connection getConnection() {
return connection;
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package connector.hbase.flink14.source;
import connector.hbase.flink14base.options.HBaseLookupOptions;
import connector.hbase.flink14base.source.AbstractHBaseDynamicTableSource;
import connector.hbase.flink14base.source.HBaseRowDataLookupFunction;
import connector.hbase.flink14base.util.HBaseTableSchema;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.table.connector.source.AsyncTableFunctionProvider;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.connector.source.TableFunctionProvider;
import org.apache.flink.table.data.RowData;
import org.apache.hadoop.conf.Configuration;
import static org.apache.flink.util.Preconditions.checkArgument;
/** HBase table source implementation. */
public class HBaseDynamicTableSource extends AbstractHBaseDynamicTableSource {
public HBaseDynamicTableSource(
Configuration conf,
String tableName,
HBaseTableSchema hbaseSchema,
String nullStringLiteral,
HBaseLookupOptions lookupOptions) {
super(conf, tableName, hbaseSchema, nullStringLiteral, lookupOptions);
}
@Override
public LookupRuntimeProvider getLookupRuntimeProvider(LookupContext context) {
checkArgument(
context.getKeys().length == 1 && context.getKeys()[0].length == 1,
"Currently, HBase table can only be lookup by single rowkey.");
checkArgument(
hbaseSchema.getRowKeyName().isPresent(),
"HBase schema must have a row key when used in lookup mode.");
checkArgument(
hbaseSchema
.convertsToTableSchema()
.getTableColumn(context.getKeys()[0][0])
.filter(f -> f.getName().equals(hbaseSchema.getRowKeyName().get()))
.isPresent(),
"Currently, HBase table only supports lookup by rowkey field.");
if (lookupOptions.getLookupAsync()) {
return AsyncTableFunctionProvider.of(
new HBaseRowDataAsyncLookupFunction(
conf, tableName, hbaseSchema, nullStringLiteral, lookupOptions));
} else {
return TableFunctionProvider.of(
new HBaseRowDataLookupFunction(
conf, tableName, hbaseSchema, nullStringLiteral, lookupOptions));
}
}
@Override
public DynamicTableSource copy() {
return new HBaseDynamicTableSource(
conf, tableName, hbaseSchema, nullStringLiteral, lookupOptions);
}
@Override
protected InputFormat<RowData, ?> getInputFormat() {
return new HBaseRowDataInputFormat(conf, tableName, hbaseSchema, nullStringLiteral);
}
@VisibleForTesting
public HBaseLookupOptions getLookupOptions() {
return this.lookupOptions;
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package connector.hbase.flink14.source;
import connector.hbase.flink14base.options.HBaseLookupOptions;
import connector.hbase.flink14base.util.HBaseConfigurationUtil;
import connector.hbase.flink14base.util.HBaseSerde;
import connector.hbase.flink14base.util.HBaseTableSchema;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.metrics.Gauge;
import org.apache.flink.runtime.util.ExecutorThreadFactory;
import org.apache.flink.shaded.guava18.com.google.common.cache.Cache;
import org.apache.flink.shaded.guava18.com.google.common.cache.CacheBuilder;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.functions.AsyncTableFunction;
import org.apache.flink.table.functions.FunctionContext;
import org.apache.flink.util.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Threads;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.concurrent.*;
/**
* The HBaseRowDataAsyncLookupFunction is an implemenation to lookup HBase data by rowkey in async
* fashion. It looks up the result as {@link RowData}.
*/
public class HBaseRowDataAsyncLookupFunction extends AsyncTableFunction<RowData> {
private static final Logger LOG =
LoggerFactory.getLogger(HBaseRowDataAsyncLookupFunction.class);
private static final long serialVersionUID = 1L;
private final String hTableName;
private final byte[] serializedConfig;
private final HBaseTableSchema hbaseTableSchema;
private final String nullStringLiteral;
private transient AsyncConnection asyncConnection;
private transient AsyncTable<ScanResultConsumer> table;
private transient HBaseSerde serde;
private final long cacheMaxSize;
private final long cacheExpireMs;
private final int maxRetryTimes;
private transient Cache<Object, RowData> cache;
/** The size for thread pool. */
private static final int THREAD_POOL_SIZE = 16;
public HBaseRowDataAsyncLookupFunction(
Configuration configuration,
String hTableName,
HBaseTableSchema hbaseTableSchema,
String nullStringLiteral,
HBaseLookupOptions lookupOptions) {
this.serializedConfig = HBaseConfigurationUtil.serializeConfiguration(configuration);
this.hTableName = hTableName;
this.hbaseTableSchema = hbaseTableSchema;
this.nullStringLiteral = nullStringLiteral;
this.cacheMaxSize = lookupOptions.getCacheMaxSize();
this.cacheExpireMs = lookupOptions.getCacheExpireMs();
this.maxRetryTimes = lookupOptions.getMaxRetryTimes();
}
@Override
public void open(FunctionContext context) {
LOG.info("start open ...");
final ExecutorService threadPool =
Executors.newFixedThreadPool(
THREAD_POOL_SIZE,
new ExecutorThreadFactory(
"hbase-async-lookup-worker", Threads.LOGGING_EXCEPTION_HANDLER));
Configuration config = prepareRuntimeConfiguration();
CompletableFuture<AsyncConnection> asyncConnectionFuture =
ConnectionFactory.createAsyncConnection(config);
try {
asyncConnection = asyncConnectionFuture.get();
table = asyncConnection.getTable(TableName.valueOf(hTableName), threadPool);
this.cache =
cacheMaxSize <= 0 || cacheExpireMs <= 0
? null
: CacheBuilder.newBuilder()
.recordStats()
.expireAfterWrite(cacheExpireMs, TimeUnit.MILLISECONDS)
.maximumSize(cacheMaxSize)
.build();
if (cache != null && context != null) {
context.getMetricGroup()
.gauge("lookupCacheHitRate", (Gauge<Double>) () -> cache.stats().hitRate());
}
} catch (InterruptedException | ExecutionException e) {
LOG.error("Exception while creating connection to HBase.", e);
throw new RuntimeException("Cannot create connection to HBase.", e);
}
this.serde = new HBaseSerde(hbaseTableSchema, nullStringLiteral);
LOG.info("end open.");
}
/**
* The invoke entry point of lookup function.
*
* @param future The result or exception is returned.
* @param rowKey the lookup key. Currently only support single rowkey.
*/
public void eval(CompletableFuture<Collection<RowData>> future, Object rowKey) {
int currentRetry = 0;
if (cache != null) {
RowData cacheRowData = cache.getIfPresent(rowKey);
if (cacheRowData != null) {
if (cacheRowData.getArity() == 0) {
future.complete(Collections.emptyList());
} else {
future.complete(Collections.singletonList(cacheRowData));
}
return;
}
}
// fetch result
fetchResult(future, currentRetry, rowKey);
}
/**
* Execute async fetch result .
*
* @param resultFuture The result or exception is returned.
* @param currentRetry Current number of retries.
* @param rowKey the lookup key.
*/
private void fetchResult(
CompletableFuture<Collection<RowData>> resultFuture, int currentRetry, Object rowKey) {
Get get = serde.createGet(rowKey);
CompletableFuture<Result> responseFuture = table.get(get);
responseFuture.whenCompleteAsync(
(result, throwable) -> {
if (throwable != null) {
if (throwable instanceof TableNotFoundException) {
LOG.error("Table '{}' not found ", hTableName, throwable);
resultFuture.completeExceptionally(
new RuntimeException(
"HBase table '" + hTableName + "' not found.",
throwable));
} else {
LOG.error(
String.format(
"HBase asyncLookup error, retry times = %d",
currentRetry),
throwable);
if (currentRetry >= maxRetryTimes) {
resultFuture.completeExceptionally(throwable);
} else {
try {
Thread.sleep(1000 * currentRetry);
} catch (InterruptedException e1) {
resultFuture.completeExceptionally(e1);
}
fetchResult(resultFuture, currentRetry + 1, rowKey);
}
}
} else {
if (result.isEmpty()) {
resultFuture.complete(Collections.emptyList());
if (cache != null) {
cache.put(rowKey, new GenericRowData(0));
}
} else {
if (cache != null) {
RowData rowData = serde.convertToNewRow(result);
resultFuture.complete(Collections.singletonList(rowData));
cache.put(rowKey, rowData);
} else {
resultFuture.complete(
Collections.singletonList(serde.convertToNewRow(result)));
}
}
}
});
}
private Configuration prepareRuntimeConfiguration() {
// create default configuration from current runtime env (`hbase-site.xml` in classpath)
// first,
// and overwrite configuration using serialized configuration from client-side env
// (`hbase-site.xml` in classpath).
// user params from client-side have the highest priority
Configuration runtimeConfig =
HBaseConfigurationUtil.deserializeConfiguration(
serializedConfig, HBaseConfigurationUtil.getHBaseConfiguration());
// do