总结Flink Table & SQL 中,如何将一个数组(Array)或对象(Map)展开。
测试数据
用datagen连接器构造测试数据。DDL如下:
CREATE TABLE source_table (
userId INT,
eventTime as '2021-10-01 08:08:08',
eventType as 'click',
productId INT,
-- 数组(Array)类型
productImages as ARRAY['image1','image2'],
-- 对象(Map)类型
pageInfo as MAP['pageId','1','pageName','yyds']
) WITH (
'connector' = 'datagen',
'number-of-rows' = '2',
'fields.userId.kind' = 'random',
'fields.userId.min' = '2',
'fields.userId.max' = '2',
'fields.productId.kind' = 'sequence',
'fields.productId.start' = '1',
'fields.productId.end' = '2'
);
测试数据如下:
userId eventTime eventType productId productImages pageInfo
2 2021-10-01 08:08:08 click 1 [image1, image2] {pageId=1, pageName=yyds}
2 2021-10-01 08:08:08 click 2 [image1, image2] {pageId=1, pageName=yyds}
展开Array
1. 单列多行
将数组展开为单列多行。
基于UNNEST
SELECT userId,eventTime,eventType,productId,productImage
FROM source_table, UNNEST(productImages) as t(productImage);
基于UDTF
package com.bigdata.flink.sql.udtf;
import org.apache.flink.table.annotation.DataTypeHint;
import org.apache.flink.table.annotation.FunctionHint;
import org.apache.flink.table.functions.TableFunction;
@FunctionHint(
input = @DataTypeHint("ARRAY<String>"),
output = @DataTypeHint("String"))
public class ExpandArrayOneColumnMultRowUDTF extends TableFunction {
public void eval(String... productImages) {
for (String productImage : productImages) {
collect(productImage);
}
}
}
// SQL
SELECT userId, eventTime, eventType, productId, productImage FROM source_table
, LATERAL TABLE (ExpandArrayOneColumnMultRowUDTF(`productImages`)) AS t(productImage);
结果示例
userId eventTime eventType productId productImage
2 2021-10-01 08:08:08 click 1 image1
2 2021-10-01 08:08:08 click 1 image2
2 2021-10-01 08:08:08 click 2 image1
2 2021-10-01 08:08:08 click 2 image2
展开Map
1. 多列单行
将Map展开为多列单行。
基于UNNEST
// 默认,用这种方式即可展开Map
SELECT userId,eventTime,eventType,productId,pageInfo['pageId'] as pageId,pageInfo['pageName'] as pageName
FROM source_table;
基于UDTF
package com.bigdata.flink.sql.udtf;
import org.apache.flink.table.annotation.DataTypeHint;
import org.apache.flink.table.annotation.FunctionHint;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.types.Row;
import java.util.Map;
@FunctionHint(
input = @DataTypeHint("MAP<STRING, STRING>"),
output = @DataTypeHint("ROW<pageId STRING, pageName STRING>"))
public class ExpandMapMultColumnOneRowUDTF extends TableFunction {
public void eval(Map<String, String> pageInfo) {
// 原来的一行,对应输出一行
collect(Row.of(pageInfo.get("pageId"), pageInfo.get("pageName")));
}
}
// SQL使用
SELECT userId,eventTime,eventType,productId,pageId,pageName FROM source_table
, LATERAL TABLE (ExpandMapMultColumnOneRowUDTF(`pageInfo`)) AS t(pageId,pageName)
结果示例
userId eventTime eventType productId pageId pageName
2 2021-10-01 08:08:08 click 1 1 yyds
2 2021-10-01 08:08:08 click 2 1 yyds
2. 多列多行
将Map展开为多列多行。
基于UNNEST
SELECT userId,eventTime,eventType,productId,mapKey,mapValue
FROM source_table, UNNEST(pageInfo) as t(mapKey,mapValue);
基于UDTF
package com.bigdata.flink.sql.udtf;
import org.apache.flink.table.annotation.DataTypeHint;
import org.apache.flink.table.annotation.FunctionHint;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.types.Row;
import java.util.Map;
@FunctionHint(
input = @DataTypeHint("MAP<STRING, STRING>"),
output = @DataTypeHint("ROW<mapKey STRING, mapValue STRING>"))
public class ExpandMapMultColumnMultRowUDTF extends TableFunction {
public void eval(Map<String, String> pageInfo) {
for (Map.Entry<String, String> entry : pageInfo.entrySet()) {
// 原来的一行,每个Key都输出一行
collect(Row.of(entry.getKey(), entry.getValue()));
}
}
}
// SQL使用
SELECT userId,eventTime,eventType,productId,mapKey,mapValue FROM source_table
, LATERAL TABLE (ExpandMapMultColumnMultRowUDTF(`pageInfo`)) AS t(mapKey,mapValue)
结果示例
userId eventTime eventType productId mapKey mapValue
2 2021-10-01 08:08:08 click 1 pageId 1
2 2021-10-01 08:08:08 click 1 pageName yyds
2 2021-10-01 08:08:08 click 2 pageId 1
2 2021-10-01 08:08:08 click 2 pageName yyds
本文详细介绍了如何在Apache Flink SQL中通过UNNEST和用户定义表函数,分别将数组(Array)和对象(Map)转换为单列多行或多列单/多行格式。通过实例展示了如何使用UNNEST和自定义UDTF来实现数据展平操作。
780

被折叠的 条评论
为什么被折叠?



