前言:
DataType 和 LogcialType的主要区别是,DataType包含了LogicalType并增加conversionClass的支持。
对于LogicalType官方给出了正反序列化的方式
(1)序列化 : asSummaryString()
(2)反序列化:org.apache.flink.table.types.logical.utils.LogicalTypeParser#parse(java.lang.String) 方法即可。
DataType的toString() 方法也是调用其内部持有的logicalType的asSummaryString()
所以重点就是怎么能再序列化和反序列化中 支持 conversionClass
一、序列化Logical再set conversionClass
(仅支持AtomicDataType的DataType)
自定义注解实现正反序列化,对Logical使用官方方案,在用bridgedTo()方法设置conversionClass
由于获取并保存conversionClass时候仅获取并保存了当前的,并没有递归的获取如row类型各个字段的conversionClass,所以仅支持简单类型。
序列化-DataTypeSerializer类
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializerProvider;
import com.fasterxml.jackson.databind.ser.std.StdSerializer;
import org.apache.flink.table.types.DataType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
public class DataTypeSerializer extends StdSerializer<DataType> {
private static final Logger LOG = LoggerFactory.getLogger(DataTypeSerializer.class);
public static final String SEPARATOR = "&";
public static ObjectMapper om = new ObjectMapper();
protected DataTypeSerializer(Class<DataType> t) {
super(t);
}
public DataTypeSerializer() {
this(null);
}
@Override
public void serialize(DataType value, JsonGenerator gen, SerializerProvider provider) throws IOException {
if (value != null) {
String main = value.toString();
Class<?> conversionClass = value.getConversionClass();
String conversionClassStr = om.writeValueAsString(conversionClass);
LOG.debug(String.format("serialize DataType:%s, conversion class: %s", main, conversionClass.toString()));
String finalStr = main + SEPARATOR + conversionClassStr;
gen.writeString(finalStr);
} else {
gen.writeNull();
return;
}
}
}
反序列化-DataTypeDeserializer类
import com.fasterxml.jackson.core.JacksonException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import com.h3c.it_bigdata.module.transfer.util.type.CommonDataTypeFactory;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.table.types.DataType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
public class DataTypeDeserializer extends StdDeserializer<DataType> {
private static final Logger LOG = LoggerFactory.getLogger(DataTypeDeserializer.class);
private static ObjectMapper om = new ObjectMapper();
public static final String SEPARATOR = "&";
private static final CommonDataTypeFactory factory = new CommonDataTypeFactory();
public DataTypeDeserializer(Class<?> vc) {
super(vc);
}
public DataTypeDeserializer() {
this(null);
}
@Override
public DataType deserialize(JsonParser p, DeserializationContext ctx) throws IOException, JacksonException {
String str = p.getValueAsString();
if (StringUtils.isBlank(str)) {
return null;
} else {
String[] split = str.split(SEPARATOR);
String lgTypeStr = split[0];
String conversionClassStr = split[1];
Class<?> conversionClass = om.readValue(conversionClassStr, Class.class);
DataTypeFactory dataTypeFactory = TypeUtils.getDataTypeFactory();
DataType dataType = dataTypeFactory.createDataType(str);
dataType.bridgedTo(Class.forName(str));
LOG.debug(String.format("deserialize DataType:%s, conversion class:%s", dataType.getLogicalType().toString(), conversionClass.toString()));
return dataType;
}
}
}
DataType实例化工具,本质就是调用CatalogManager 的 getDataTypeFactory() 来构造DataType。
因为org.apache.flink.table.catalog.DataTypeFactoryImpl 的默认权限是包,不是public。需要借助CatalogManager 实现。
其实如果不使用DataTypeFactory 也是可以的。直接使用 switch(logicalType.getLogicalTypeRoot()){}
进行逐个枚举,再 return DataTypes. ***
也是可以的。这里偷了一个懒。
public class TypeUtils{
public static DataTypeFactory getDataTypeFactory() {
Configuration tableConfig = new Configuration();
EnvironmentSettings settings = EnvironmentSettings.newInstance().build();
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
CatalogManager catalogManager =
CatalogManager.newBuilder()
.classLoader(classLoader)
.config(tableConfig)
.defaultCatalog(
settings.getBuiltInCatalogName(),
new GenericInMemoryCatalog(
settings.getBuiltInCatalogName(),
settings.getBuiltInDatabaseName()))
.build();
return catalogManager.getDataTypeFactory();
}
}
二、使用jackson反序列化 javabean再递归set conversionClass
可以配置(支持Row、Map、Array、Structrued类型)
序列化 使用ObjectMapper序列化成字符串。
反序列化:先生成java bean,再在java bean 中构造getDataType()使其能够递归调用。生成DataType
序列化-DataTypeSerializer类
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializerProvider;
import com.fasterxml.jackson.databind.ser.std.StdSerializer;
import org.apache.flink.table.types.DataType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
public class DataTypeSerializer extends StdSerializer<DataType> {
// private static final Logger LOG = LoggerFactory.getLogger(DataTypeSerializer.class);
public static ObjectMapper om = new ObjectMapper();
protected DataTypeSerializer(Class<DataType> t) {
super(t);
}
public DataTypeSerializer() {
this(null);
}
@Override
public void serialize(DataType value, JsonGenerator gen, SerializerProvider provider) throws IOException {
if (value != null) {
String str = om.writeValueAsString(value);
gen.writeString(str);
} else {
gen.writeNull();
return;
}
}
}
反序列化-DataTypeDeserializer类
import com.fasterxml.jackson.core.JacksonException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.table.types.DataType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
public class DataTypeDeserializer extends StdDeserializer<DataType> {
// private static final Logger LOG = LoggerFactory.getLogger(DataTypeDeserializer.class);
private static ObjectMapper om = new ObjectMapper();
public DataTypeDeserializer(Class<?> vc) {
super(vc);
}
public DataTypeDeserializer() {
this(null);
}
@Override
public DataType deserialize(JsonParser p, DeserializationContext ctx) throws IOException, JacksonException {
String str = p.getValueAsString();
if (StringUtils.isBlank(str)) {
return null;
} else {
DataTypeProto dataTypeProto = om.readValue(str, DataTypeProto.class);
DataType output = null;
try {
output = dataTypeProto.getDataType();
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
return output;
}
}
}
DataType原型-DataTypeProto类
其中对Structured的转换不太确定。
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.LogicalTypeRoot;
import java.util.*;
@Data
@AllArgsConstructor
@NoArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
public class DataTypeProto {
private LogicalTypeProto logicalType;
private String conversionClass;
private List<DataTypeProto> children;
@JsonIgnore
public DataType getDataType() throws ClassNotFoundException {
DataType dt = getDataTypeWithoutNullable();
if (logicalType.isNullable()) {
return dt.nullable();
} else {
return dt.notNull();
}
}
@JsonIgnore
public DataType getDataTypeWithoutNullable() throws ClassNotFoundException {
LogicalTypeRoot logicalTypeRoot = getLogicalTypeRoot();
switch (logicalTypeRoot) {
case CHAR:
return DataTypes.CHAR(logicalType.getLength()).bridgedTo(Class.forName(conversionClass));
case VARCHAR:
return DataTypes.VARCHAR(logicalType.getLength()).bridgedTo(Class.forName(conversionClass));
case BOOLEAN:
return DataTypes.BOOLEAN().bridgedTo(Class.forName(conversionClass));
case BINARY:
return DataTypes.BINARY(logicalType.getLength()).bridgedTo(Class.forName(conversionClass));
case VARBINARY:
return DataTypes.VARBINARY(logicalType.getLength()).bridgedTo(Class.forName(conversionClass));
case DECIMAL:
return DataTypes.DECIMAL(logicalType.getPrecision(), logicalType.getScale()).bridgedTo(Class.forName(conversionClass));
case TINYINT:
return DataTypes.TINYINT().bridgedTo(Class.forName(conversionClass));
case SMALLINT:
return DataTypes.SMALLINT().bridgedTo(Class.forName(conversionClass));
case INTEGER:
return DataTypes.INT().bridgedTo(Class.forName(conversionClass));
case BIGINT:
return DataTypes.BIGINT().bridgedTo(Class.forName(conversionClass));
case FLOAT:
return DataTypes.FLOAT().bridgedTo(Class.forName(conversionClass));
case DOUBLE:
return DataTypes.DOUBLE().bridgedTo(Class.forName(conversionClass));
case DATE:
return DataTypes.DATE().bridgedTo(Class.forName(conversionClass));
case TIME_WITHOUT_TIME_ZONE:
return DataTypes.TIME(logicalType.getPrecision()).bridgedTo(Class.forName(conversionClass));
case TIMESTAMP_WITHOUT_TIME_ZONE:
return DataTypes.TIMESTAMP(logicalType.getPrecision()).bridgedTo(Class.forName(conversionClass));
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
return DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(logicalType.getPrecision()).bridgedTo(Class.forName(conversionClass));
case TIMESTAMP_WITH_TIME_ZONE:
return DataTypes.TIMESTAMP_WITH_TIME_ZONE(logicalType.getPrecision()).bridgedTo(Class.forName(conversionClass));
case ARRAY:
return DataTypes.ARRAY(children.get(0).getDataType()).bridgedTo(Class.forName(conversionClass));
case MULTISET:
return DataTypes.MULTISET(children.get(0).getDataType()).bridgedTo(Class.forName(conversionClass));
case MAP:
return DataTypes.MAP(children.get(0).getDataType(), children.get(1).getDataType()).bridgedTo(Class.forName(conversionClass));
case ROW:
List<DataTypes.Field> rowFields = new ArrayList<>();
for (int i = 0; i < children.size(); i++) {
DataType rowFieldDataType = children.get(i).getDataType();
String rowFieldName = logicalType.getFieldNames().get(i);
rowFields.add(DataTypes.FIELD(rowFieldName, rowFieldDataType));
}
return DataTypes.ROW(rowFields.toArray(new DataTypes.Field[0])).bridgedTo(Class.forName(conversionClass));
case NULL:
return DataTypes.NULL().bridgedTo(Class.forName(conversionClass));
case STRUCTURED_TYPE:
List<DataTypes.Field> structFields = new ArrayList<>();
for (int i = 0; i < children.size(); i++) {
DataType rowFieldDataType = children.get(i).getDataType();
String rowFieldName = logicalType.getAttributes().get(i).getName();
structFields.add(DataTypes.FIELD(rowFieldName, rowFieldDataType));
}
// STRUCTURED还不太确定。
return DataTypes.STRUCTURED(
Class.forName(conversionClass),
structFields.toArray(new DataTypes.Field[0])
).bridgedTo(Class.forName(conversionClass));
case DISTINCT_TYPE:
case RAW:
case SYMBOL:
case INTERVAL_YEAR_MONTH:
case INTERVAL_DAY_TIME:
default:
throw new UnsupportedOperationException("Unsupported type: " + logicalTypeRoot);
}
}
@JsonIgnore
public LogicalTypeRoot getLogicalTypeRoot() {
return LogicalTypeRoot.valueOf(logicalType.getTypeRoot());
}
}
LogicalType原型-LogicalTypeProto类
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.List;
@Data
@NoArgsConstructor
@AllArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
public class LogicalTypeProto {
private String typeRoot;
private boolean nullable;
private int precision;
private int scale;
private int length;
private List<String> fieldNames;
private List<Attribute> attributes;
@Data
@AllArgsConstructor
@NoArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
public static class Attribute {
private String name;
}
}
三、jackson序列化举例
下面是Row类型的DataType
DataType rowType= DataTypes.ROW(
DataTypes.FIELD("id", DataTypes.BIGINT()),
DataTypes.FIELD("name", DataTypes.STRING().bridgedTo(StringData.class)),
DataTypes.FIELD("address", DataTypes.STRING(), "home town"),
DataTypes.FIELD("info", DataTypes.ROW(DataTypes.FIELD("telephone", DataTypes.STRING()), DataTypes.FIELD("birth", DataTypes.TIMESTAMP(3).bridgedTo(TimestampData.class))).bridgedTo(RowData.class)),
DataTypes.FIELD("age", DataTypes.INT()),
DataTypes.FIELD("props", DataTypes.MAP(DataTypes.STRING(), DataTypes.TIME(3)))
);
与之对应的objectMapper.writeValueAsString(rowType) 的结果:
{
"logicalType": {
"typeRoot": "ROW",
"fields": [
{
"name": "id",
"type": {
"typeRoot": "BIGINT",
"children": [],
"defaultConversion": "java.lang.Long",
"nullable": true
},
"description": {
"present": false
}
},
{
"name": "name",
"type": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"description": {
"present": false
}
},
{
"name": "address",
"type": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"description": {
"present": true
}
},
{
"name": "info",
"type": {
"typeRoot": "ROW",
"fields": [
{
"name": "marige",
"type": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"description": {
"present": false
}
},
{
"name": "birth",
"type": {
"typeRoot": "TIMESTAMP_WITHOUT_TIME_ZONE",
"kind": "REGULAR",
"precision": 3,
"children": [],
"defaultConversion": "java.time.LocalDateTime",
"nullable": true
},
"description": {
"present": false
}
}
],
"children": [
{
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
{
"typeRoot": "TIMESTAMP_WITHOUT_TIME_ZONE",
"kind": "REGULAR",
"precision": 3,
"children": [],
"defaultConversion": "java.time.LocalDateTime",
"nullable": true
}
],
"defaultConversion": "org.apache.flink.types.Row",
"fieldNames": [
"marige",
"birth"
],
"fieldCount": 2,
"nullable": true
},
"description": {
"present": false
}
},
{
"name": "age",
"type": {
"typeRoot": "INTEGER",
"children": [],
"defaultConversion": "java.lang.Integer",
"nullable": true
},
"description": {
"present": false
}
},
{
"name": "props",
"type": {
"typeRoot": "MAP",
"keyType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"valueType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"children": [
{
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
{
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
}
],
"defaultConversion": "java.util.Map",
"nullable": true
},
"description": {
"present": false
}
}
],
"children": [
{
"typeRoot": "BIGINT",
"children": [],
"defaultConversion": "java.lang.Long",
"nullable": true
},
{
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
{
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
{
"typeRoot": "ROW",
"fields": [
{
"name": "marige",
"type": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"description": {
"present": false
}
},
{
"name": "birth",
"type": {
"typeRoot": "TIMESTAMP_WITHOUT_TIME_ZONE",
"kind": "REGULAR",
"precision": 3,
"children": [],
"defaultConversion": "java.time.LocalDateTime",
"nullable": true
},
"description": {
"present": false
}
}
],
"children": [
{
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
{
"typeRoot": "TIMESTAMP_WITHOUT_TIME_ZONE",
"kind": "REGULAR",
"precision": 3,
"children": [],
"defaultConversion": "java.time.LocalDateTime",
"nullable": true
}
],
"defaultConversion": "org.apache.flink.types.Row",
"fieldNames": [
"marige",
"birth"
],
"fieldCount": 2,
"nullable": true
},
{
"typeRoot": "INTEGER",
"children": [],
"defaultConversion": "java.lang.Integer",
"nullable": true
},
{
"typeRoot": "MAP",
"keyType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"valueType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"children": [
{
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
{
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
}
],
"defaultConversion": "java.util.Map",
"nullable": true
}
],
"defaultConversion": "org.apache.flink.types.Row",
"fieldNames": [
"id",
"name",
"address",
"info",
"age",
"props"
],
"fieldCount": 6,
"nullable": true
},
"conversionClass": "org.apache.flink.types.Row",
"children": [
{
"logicalType": {
"typeRoot": "BIGINT",
"children": [],
"defaultConversion": "java.lang.Long",
"nullable": true
},
"conversionClass": "java.lang.Long",
"children": []
},
{
"logicalType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"conversionClass": "org.apache.flink.table.data.StringData",
"children": []
},
{
"logicalType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"conversionClass": "java.lang.String",
"children": []
},
{
"logicalType": {
"typeRoot": "ROW",
"fields": [
{
"name": "marige",
"type": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"description": {
"present": false
}
},
{
"name": "birth",
"type": {
"typeRoot": "TIMESTAMP_WITHOUT_TIME_ZONE",
"kind": "REGULAR",
"precision": 3,
"children": [],
"defaultConversion": "java.time.LocalDateTime",
"nullable": true
},
"description": {
"present": false
}
}
],
"children": [
{
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
{
"typeRoot": "TIMESTAMP_WITHOUT_TIME_ZONE",
"kind": "REGULAR",
"precision": 3,
"children": [],
"defaultConversion": "java.time.LocalDateTime",
"nullable": true
}
],
"defaultConversion": "org.apache.flink.types.Row",
"fieldNames": [
"marige",
"birth"
],
"fieldCount": 2,
"nullable": true
},
"conversionClass": "org.apache.flink.types.Row",
"children": [
{
"logicalType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"conversionClass": "java.lang.String",
"children": []
},
{
"logicalType": {
"typeRoot": "TIMESTAMP_WITHOUT_TIME_ZONE",
"kind": "REGULAR",
"precision": 3,
"children": [],
"defaultConversion": "java.time.LocalDateTime",
"nullable": true
},
"conversionClass": "java.time.LocalDateTime",
"children": []
}
]
},
{
"logicalType": {
"typeRoot": "INTEGER",
"children": [],
"defaultConversion": "java.lang.Integer",
"nullable": true
},
"conversionClass": "java.lang.Integer",
"children": []
},
{
"logicalType": {
"typeRoot": "MAP",
"keyType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"valueType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"children": [
{
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
{
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
}
],
"defaultConversion": "java.util.Map",
"nullable": true
},
"conversionClass": "java.util.Map",
"keyDataType": {
"logicalType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"conversionClass": "java.lang.String",
"children": []
},
"valueDataType": {
"logicalType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"conversionClass": "java.lang.String",
"children": []
},
"children": [
{
"logicalType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"conversionClass": "java.lang.String",
"children": []
},
{
"logicalType": {
"typeRoot": "VARCHAR",
"length": 2147483647,
"children": [],
"defaultConversion": "java.lang.String",
"nullable": true
},
"conversionClass": "java.lang.String",
"children": []
}
]
}
]
}