近期遇到一个实时flinksql需求,需要根据ids数组字段解析成名称数组字段。。。
其中parent_path存放的内容是点号分割的字符串"1659077318807721985.1659120595539924993.1659121050219255810"
第一步:新建kafka source源
create TEMPORARY table if not exists test_table(
id BIGINT,
name STRING,
`parent_path` STRING,
deleted INT,
proctime AS PROCTIME (),
`partition` INT NOT NULL METADATA VIRTUAL,
`offset` BIGINT NOT NULL METADATA VIRTUAL,
`timestamp` TIMESTAMP
WITH LOCAL TIME ZONE NOT NULL METADATA VIRTUAL, PRIMARY KEY (id) NOT ENFORCED
)
WITH (
'connector' = 'upsert-kafka',
'properties.bootstrap.servers' = '${secret_values.v_kafka_ods_bootstrap_server_2}',
'topic' = 'ods_test_table',
'properties.group.id' = 'test_groupid',
'value.json.ignore-parse-errors' = 'true' ,
'key.format' = 'json',
'value.format' = 'json',
'value.fields-include' = 'EXCEPT_KEY',
'value.json.infer-schema.flatten-nested-columns.enable' = 'false',
'value.json.infer-schema.primitive-as-string' = 'false'
);
第二步:解析parent_path,并一行拆分成三行
SELECT *FROM source_kafka_test_table,
lateral table(string_split(parent_path, '.')) as T(parentpath)
第三步:关联自身表取名称,把三行聚合成一行
select a.id,a.name,a.parent_path,LISTAGG(b.name) names
from ( SELECT id,name,parent_path,parentpath
FROM source_kafka_test_table,
lateral table(string_split(parent_path, '.')) as T(parentpath)
) a left join source_kafka_test_table b on a.parentpath=b.id
group by a.id, a.name,a.parent_path