-- Mysql 建表语句,注意这是在Mysql执行的
CREATE TABLE `dim_behavior` (
`id` int(10) NOT NULL AUTO_INCREMENT COMMENT '自增主键',
`en_behavior` varchar(255) COLLATE utf8_bin DEFAULT NULL COMMENT '英文 行为',
`zh_behavior` varchar(255) COLLATE utf8_bin DEFAULT NULL COMMENT '中文 行为',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
-- Mysql插入两条数据
INSERT INTO `dijie_test`.`dim_behavior`(`id`, `en_behavior`, `zh_behavior`) VALUES (1, 'buy', '购买');
INSERT INTO `dijie_test`.`dim_behavior`(`id`, `en_behavior`, `zh_behavior`) VALUES (2, 'pv', '浏览');
-- 在Flink SQL Client中建立t1
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
user_id bigint,
item_id bigint,
category_id bigint,
behavior varchar,
ts bigint
) WITH (
'connector.type' = 'filesystem',
'connector.path' = 'hdfs://Desktop:9000/test/UserBehavior.csv',
'format.type' = 'csv',
'format.field-delimiter' = ','
)
;
-- Flink SQL定义t2,用来向Kafka写入数据
DROP TABLE IF EXISTS t2;
CREATE TABLE t2 (
user_id BIGINT,
item_id BIGINT,
category_id BIGINT,
behavior STRING,
ts BIGINT
) WITH (
'update-mode' = 'append',
'connector.type' = 'kafka',
'connector.version' = 'universal',
'connector.topic' = 'zeppelin_01_test',
'connector.properties.zookeeper.connect' = 'Desktop:2181',
'connector.properties.bootstrap.servers' = 'Desktop:9091',
'format.type'='json'
);
insert into t2 select user_id,item_id,category_id,behavior,UNIX_TIMESTAMP() as ts from t1;
DROP TABLE IF EXISTS t3;
CREATE TABLE t3(
user_id BIGINT,
item_id BIGINT,
category_id BIGINT,
behavior STRING,
ts BIGINT,
r_t AS TO_TIMESTAMP(FROM_UNIXTIME(ts,'yyyy-MM-dd HH:mm:ss'),'yyyy-MM-dd HH:mm:ss'),-- 计算列,因为ts是bigint,没法作为水印,所以用UDF转成TimeStamp
WATERMARK FOR r_t AS r_t - INTERVAL '5' SECOND -- 指定水印生成方式
)WITH (
'update-mode' = 'append',
'connector.type' = 'kafka',
'connector.version' = 'universal',
'connector.topic' = 'zeppelin_01_test',
'connector.properties.zookeeper.connect' = 'Desktop:2181',
'connector.properties.bootstrap.servers' = 'Desktop:9091',
'connector.properties.group.id' = 'zeppelin_01_test',
'connector.startup-mode' = 'earliest-offset',
'format.type'='json'
);
CREATE TABLE `dim_behavior` (
`id` int ,
`en_behavior` varchar ,
`zh_behavior` varchar
)WITH (
'connector.type' = 'jdbc',
'connector.url' = 'jdbc:mysql://Desktop:3306/dijie_test',
'connector.table' = 'dim_behavior',
'connector.driver' = 'com.mysql.jdbc.Driver',
'connector.username' = 'appleyuchi',
'connector.password' = 'appleyuchi' ,
'connector.lookup.cache.max-rows' = '5000',
'connector.lookup.cache.ttl' = '10s'
);
select
zh_behavior,
count(distinct user_id) as cnt_distin_user,
tumble_start(c.r_t,interval '10' second) as tumble_start
from
(
select b.*,a.* from (
select *,proctime() as p from t3
) a
left join dim_behavior FOR SYSTEM_TIME AS OF a.p AS b
on a.behavior = b.en_behavior
where b.zh_behavior is not null
) c group by c.zh_behavior,tumble(c.r_t,interval '10' second);
一键复制
编辑
Web IDE
原始数据
按行查看
历史