SQL 基础版
数据流通
-- ********************************************************************--
-- Author: Write your name here
-- Created Time: 2023-09-11 17:17:13
-- Description: Write your description here
-- Hints: You can use SET statements to modify the configuration
--********************************************************************--
-- 创建kafka对应的source表
DROP TEMPORARY TABLE IF EXISTS source_kafka01;
CREATE TEMPORARY TABLE source_kafka01 (
`user_id` STRING
,`uName` STRING
,`uId` BIGINT
-- kafka元信息列(阿里云flink-kafka连接)
-- https://help.aliyun.com/zh/flink/developer-reference/kafka-connector#section-5w6-fzc-ex2
,`ts` TIMESTAMP_LTZ(3) METADATA FROM 'timestamp' VIRTUAL
) WITH (
'connector' = 'kafka',
'topic' = 'sk_test_01',
'properties.bootstrap.servers' = 'vpc:port',
'scan.startup.mode' = 'earliest-offset',
'key.fields' = 'user_id',
'key.format' = 'raw'
--从指定的毫秒时间戳1655395200000开始消费
-- 'scan.startup.mode' = 'timestamp',
-- 'scan.startup.timestamp-millis' = '1655395200000',
,'value.format' = 'json'
,'value.fields-include' = 'EXCEPT_KEY'
-- ,'json.ignore-parse-errors' = 'true'
-- ,'value.fields-include' = 'ALL'
)
;
-- 创建对应的holo sink表
DROP TEMPORARY TABLE IF EXISTS sink_hologres;
CREATE TEMPORARY TABLE sink_hologres (
`user_id` STRING,
`uName` STRING,
`uId` BIGINT,
ts BIGINT
) WITH (
'connector'='hologres',
'dbname'='test_sk',
'tablename'='test02',
'username' = 'xxx',
'password' = 'xxx',
-- 使用指定vpc网络,经典网络使用失败
'endpoint'='vpc:port',
'field_delimiter'='|' --该参数可选。
);
INSERT INTO sink_hologres
SELECT
`user_id`
,`uName`
,`uId`
-- 强制转换数据(flink官网查询)
-- https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/dev/table/functions/systemfunctions/#temporal-functions
,UNIX_TIMESTAMP(CAST(ts AS STRING)) AS `ts`
FROM source_kafka01
;
TopN基础版
TopN和离线版的理论上是差不多的,都是增加排序函数,分成两种形式
1、流动数据是一条条触发
2、窗口TopN是水位线触发
SELECT *
FROM (
SELECT `user_id`
,`uName`
, `uId`
, ROW_NUMBER() OVER (PARTITION BY `user_id` ORDER BY `ts` DESC) AS row_num
FROM source_kafka01)
WHERE row_num <= 5
运行结果