基于官方文档 User-defined Sources & Sinks 实现,该实现完成了一个自定义Socket Table Source来将Socket传入的数据转为Table。
Flink版本:1.12.1
maven依赖在最后,出现问题首先排查依赖版本是否正确。

一、TableSource实现
Dynamic Table Factory
在使用Table source时,首先需要建表:
CREATE TABLE UserScores (name STRING, score INT)
WITH (
'connector' = 'socket',
'hostname' = 'localhost',
'port' = '9999',
'byte-delimiter' = '10',
'format' = 'changelog-csv',
'changelog-csv.column-delimiter' = '|'
);
在建表语句中会对数据源进行设置。
而设置语句则是可以进行自定义的,而对其中的设置进行解析需要在DynamicTableFactory实现。
package test.SocketSourceSql;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.table.connector.format.DecodingFormat;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.factories.DeserializationFormatFactory;
import org.apache.flink.table.factories.DynamicTableFactory;
import org.apache.flink.table.factories.DynamicTableSourceFactory;
import org.apache.flink.table.factories.FactoryUtil;
import org.apache.flink.table.types.DataType;
import java.util.HashSet;
import java.util.Set;
/**
* @ClassName SocketSqlFactory
* @Date 2021/8/27 17:41
* @Version 1.0
* @Description
**/
public class SocketSqlFactory implements DynamicTableSourceFactory {
public static final ConfigOption<String> HOSTNAME = ConfigOptions.key("hostname").stringType().noDefaultValue();
public static final ConfigOption<Integer> PORT = ConfigOptions.key("port").intType().noDefaultValue();
public static final ConfigOption<Integer> BYTE_DELIMITER = ConfigOptions.key("byte-delimiter")
.intType()
.defaultValue(10);
/**
* 用于 'connector' = '...'
* @return socket
*/
@Override
public String factoryIdentifier() {
return "socket";
}
@Override
public Set<ConfigOption<?>> requiredOptions() {
final Set<ConfigOption<?>> options = new HashSet<>();
options.add(HOSTNAME);
options.add(PORT);
options.add(FactoryUtil.FORMAT); // use pre-defined option for format
return options;
}
@Override
public Set<ConfigOption<?>> optionalOptions() {
final Set<ConfigOption<?>> options = new HashSet<>();
options.add(BYTE_DELIMITER);
return options;
}
public DynamicTableSource createDynamicTableSource(Context context) {
final FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
final DecodingFormat<DeserializationSchema<RowData>> decodingFormat = helper.discoverDecodingFormat(
DeserializationFormatFactory.class, FactoryUtil.FORMAT);
helper.validate();
final ReadableConfig options = helper.getOptions();
final String hostname = options.get(HOSTNAME);
final int port = options.get(PORT);
final byte byteDelimiter = (byte) (int)options.get(BYTE_DELIMITER);
final DataType producedDataType = context.getCatalogTable().getSchema().toPersistedRowDataType();
return new SocketDynamicTableSource(hostname, port, byteDelimiter, decodingFormat, producedDataType);
}
}
其中factoryIdentifier负责’connector’字段的解析,requredOptions中设置必须字段,optionalOptions中设置可选的字段。
createDynamicTableSource则根据传入的参数,返回一个Dynamic Table Source
Dynamic Table Source
Dynamic Table Source需要实现ScanTableSource或LookupTableSource
在实现方法中,核心实现在getScanRuntimeProvider方法中创建一个运行时类SourceFunction,该类中实现了具体的数据来源
package test.SocketSourceSql;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.format.DecodingFormat;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.connector.source.ScanTableSource;
import org.apache.flink.table.connector.source.SourceFunctionProvider;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.DataType;
/**
* @ClassName SocketDynamicTableSource
* @Date 2021/8/30 9:38
* @Version 1.0
* @Description
**/
public class SocketDynamicTableSource implements ScanTableSource {
private final String hostname;
private final int port;
private final byte byteDelimiter;
private final DecodingFormat<DeserializationSchema<RowData>> decodingFormat;
private final DataType producedDataType;
public SocketDynamicTableSource(
String hostname,
int port,
byte byteDelimiter,
DecodingFormat<DeserializationSchema<RowData>> decodingFormat,
DataType producedDataType) {
this.hostname = hostname;
this.port = port;
this.byteDelimiter = byteDelimiter;
this.decodingFormat = decodingFormat;
this.producedDataType = producedDataType;
}
@Override
public ChangelogMode getChangelogMode() {
return decodingFormat.getChangelogMode();
}
@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderContext) {
final DeserializationSchema<RowData> deserializer = decodingFormat.createRuntimeDecoder(
runtimeProviderContext,
producedDataType);
final SourceFunction<RowData> sourceFunction = new ESSourceFunction(
hostname,
port,
byteDelimiter,
deserializer);
return SourceFunctionProvider.of(sourceFunction, false);
}
@Override
public DynamicTableSource copy() {
return new SocketDynamicTableSource(hostname, port, byteDelimiter, decodingFormat, producedDataType);
}
@Override
public String asSummaryString() {
return "elastic Table Source";
}
}
Source Function
到SourceFunction这一步就和自定义DataStream一样了,在run()方法中实现数据来源并添加到上下文SourceContext中。
package test.SocketSourceSql;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.table.data.RowData;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.net.InetSocketAddress;
import java.net.Socket;
/**
* @ClassName SocketSourceFunction
* @Date 2021/8/30 9:56
* @Version 1.0
* @Description
**/
public class SocketSourceFunction extends RichSourceFunction<RowData> implements ResultTypeQueryable<RowData> {
private final String hostname;
private final int port;
private final byte byteDelimiter;
private final DeserializationSchema<RowData> deserializer;
private volatile boolean isRunning = true;
private Socket currentSocket;
public SocketSourceFunction(String hostname, int port, byte byteDelimiter, DeserializationSchema<RowData> deserializer) {
this.hostname = hostname;
this.port = port;
this.byteDelimiter = byteDelimiter;
this.deserializer = deserializer;
}
@Override
public TypeInformation<RowData> getProducedType() {
return deserializer.getProducedType();
}
@Override
public void run(SourceContext<RowData> ctx) throws Exception {
while (isRunning) {
// open and consume from socket
try (final Socket socket = new Socket()) {
currentSocket = socket;
socket.connect(new InetSocketAddress(hostname, port), 0);
try (InputStream stream = socket.getInputStream()) {
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
int b;
while ((b = stream.read()) >= 0) {
// buffer until delimiter
if (b != byteDelimiter) {
buffer.write(b);
}
// decode and emit record
else {
ctx.collect(deserializer.deserialize(buffer.toByteArray()));
buffer.reset();
}
}
}
} catch (Throwable t) {
t.printStackTrace(); // print and continue
}
Thread.sleep(1000);
}
}
@Override
public void cancel() {
isRunning = false;
try {
currentSocket.close();
} catch (Throwable t) {
// ignore
}
}
}
二、数据格式处理
和DynamicTableSource一样,数据格式需要在工厂类中解析建表的参数
ChangelogCsvFormatFactory
package test.SocketSourceSql;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.table.connector.format.DecodingFormat;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.factories.FactoryUtil;
import org.apache.flink.table.factories.DeserializationFormatFactory;
import org.apache.flink.table.factories.DynamicTableFactory;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
public class ChangelogCsvFormatFactory implements DeserializationFormatFactory {
// define all options statically
public static final ConfigOption<String> COLUMN_DELIMITER = ConfigOptions.key("column-delimiter")
.stringType()
.defaultValue("|");
@Override
public String factoryIdentifier() {
return "changelog-csv";
}
@Override
public Set<ConfigOption<?>> requiredOptions() {
return Collections.emptySet();
}
@Override
public Set<ConfigOption<?>> optionalOptions() {
final Set<ConfigOption<?>> options = new HashSet<>();
options.add(COLUMN_DELIMITER);
return options;
}
@Override
public DecodingFormat<DeserializationSchema<RowData>> createDecodingFormat(
DynamicTableFactory.Context context,
ReadableConfig formatOptions) {
// either implement your custom validation logic here ...
// or use the provided helper method
FactoryUtil.validateFactoryOptions(this, formatOptions);
// get the validated options
final String columnDelimiter = formatOptions.get(COLUMN_DELIMITER);
// create and return the format
return new ChangelogCsvFormat(columnDelimiter);
}
}
ChangelogCsvFormat
package test.SocketSourceSql;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.format.DecodingFormat;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.connector.source.DynamicTableSource.DataStructureConverter;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.types.RowKind;
import java.util.List;
public class ChangelogCsvFormat implements DecodingFormat<DeserializationSchema<RowData>> {
private final String columnDelimiter;
public ChangelogCsvFormat(String columnDelimiter) {
this.columnDelimiter = columnDelimiter;
}
@Override
@SuppressWarnings("unchecked")
public DeserializationSchema<RowData> createRuntimeDecoder(
DynamicTableSource.Context context,
DataType producedDataType) {
// create type information for the DeserializationSchema
final TypeInformation<RowData> producedTypeInfo = context.createTypeInformation(
producedDataType);
// most of the code in DeserializationSchema will not work on internal data structures
// create a converter for conversion at the end
final DataStructureConverter converter = context.createDataStructureConverter(producedDataType);
// use logical types during runtime for parsing
final List<LogicalType> parsingTypes = producedDataType.getLogicalType().getChildren();
// create runtime class
return new ChangelogCsvDeserializer(parsingTypes, converter, producedTypeInfo, columnDelimiter);
}
@Override
public ChangelogMode getChangelogMode() {
// define that this format can produce INSERT and DELETE rows
return ChangelogMode.newBuilder()
.addContainedKind(RowKind.INSERT)
.addContainedKind(RowKind.DELETE)
.build();
}
}
ChangelogCsvDeserializer
package test.SocketSourceSql;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.table.connector.RuntimeConverter.Context;
import org.apache.flink.table.connector.source.DynamicTableSource.DataStructureConverter;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.LogicalTypeRoot;
import org.apache.flink.types.Row;
import org.apache.flink.types.RowKind;
import java.util.List;
import java.util.regex.Pattern;
public class ChangelogCsvDeserializer implements DeserializationSchema<RowData> {
private final List<LogicalType> parsingTypes;
private final DataStructureConverter converter;
private final TypeInformation<RowData> producedTypeInfo;
private final String columnDelimiter;
public ChangelogCsvDeserializer(
List<LogicalType> parsingTypes,
DataStructureConverter converter,
TypeInformation<RowData> producedTypeInfo,
String columnDelimiter) {
this.parsingTypes = parsingTypes;
this.converter = converter;
this.producedTypeInfo = producedTypeInfo;
this.columnDelimiter = columnDelimiter;
}
@Override
public TypeInformation<RowData> getProducedType() {
// return the type information required by Flink's core interfaces
return producedTypeInfo;
}
@Override
public void open(InitializationContext context) {
// converters must be open
converter.open(Context.create(ChangelogCsvDeserializer.class.getClassLoader()));
}
@Override
public RowData deserialize(byte[] message) {
// parse the columns including a changelog flag
final String[] columns = new String(message).split(Pattern.quote(columnDelimiter));
final RowKind kind = RowKind.valueOf(columns[0]);
final Row row = new Row(kind, parsingTypes.size());
for (int i = 0; i < parsingTypes.size(); i++) {
row.setField(i, parse(parsingTypes.get(i).getTypeRoot(), columns[i + 1]));
}
// convert to internal data structure
return (RowData) converter.toInternal(row);
}
private static Object parse(LogicalTypeRoot root, String value) {
switch (root) {
case INTEGER:
return Integer.parseInt(value);
case VARCHAR:
return value;
default:
throw new IllegalArgumentException();
}
}
@Override
public boolean isEndOfStream(RowData nextElement) {
return false;
}
}
三、主程序
package test.SocketSourceSql;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
/**
* @Date 2021/8/30 10:52
* @Version 1.0
* @Description
**/
@Slf4j
public class SocketReader {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
String CreateTable = "CREATE TABLE UserScores (name STRING, score INT) \n" +
"WITH( \n" +
" 'connector'='Socket',\n" +
" 'hostname'='192.168.51.113',\n" +
" 'port'='9999',\n" +
" 'byte-delimiter'='10',\n" +
" 'format'='changelog-csv',\n" +
" 'changelog-csv.column-delimiter' = '|'\n" +
")";
tableEnv.executeSql(CreateTable);
String view = "SELECT name, SUM(score) FROM UserScores GROUP BY name";
tableEnv.executeSql(view).print();
// DataStream stream = tableEnv.toRetractStream(table, Row.class);
// stream.print();
}
}
结果:

<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.12.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-avro</artifactId>
<version>1.13.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.11</artifactId>
<version>1.13.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>1.12.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>1.12.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.ibm.icu/icu4j -->
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
<version>4.6.1</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.20</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.7</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java</artifactId>
<version>1.12.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-uber-blink_2.11</artifactId>
<version>1.12.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.google.guava/guava -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>30.1.1-jre</version>
</dependency>
1103

被折叠的 条评论
为什么被折叠?



