开发过程临时sql,仅供参考
https://nightlies.apache.org/flink/flink-docs-release-1.18/docs/connectors/table/upsert-kafka/
– 保留1天状态
set table.exec.state.ttl=86400000;
CREATE TABLE source_behavior_event_exposure_1284(
reportInfo
row<reportVersion
String>,
time
row<clientTime
String, serverTime
String>,
user
row<uuri
String, pushId
String, scrmUserUri
String>,
business
String,
proc_time
AS PROCTIME()
)
with (
‘properties.bootstrap.servers’ = ‘10.10.10.10:9092’,
‘topic’ = ‘behavior_event_exposure’ ,
‘properties.group.id’ = ‘source_behavior_event_exposure_990’,
‘connector’ = ‘kafka’,
‘avro-confluent.schema-registry.url’ = ‘http://aaaa.bbbb.com’,
‘avro-confluent.schema-registry.subject’ = ‘behavior_event_exposure’,
‘format’ = ‘avro-confluent’,
‘scan.startup.mode’ = ‘timestamp’,
‘scan.startup.timestamp-millis’ = ‘1700040586303’
);
CREATE TABLE sink_kafka_api_precision_exp_visit_hour_incr_1d_exp (
dt string
,shop_uri string
,period_hour bigint
,exposure_cnt bigint
,server_time bigint
,primary key (dt,shop_uri,period_hour) not enforced
) WITH (
‘connector’ = ‘upsert-kafka’,
‘topic’ = ‘rt_api_precision_exp_visit_hour_incr_1d_exp_1284’,
‘properties.bootstrap.servers’ = ‘10.10.10.10:9092’,
‘key.format’ = ‘json’,
‘value.format’ = ‘json’
);
insert into sink_kafka_api_precision_exp_visit_hour_incr_1d_exp
– create view tmp_exposure_shop_sale as
select
from_unixtime(cast(cast(time
.serverTime as bigint)/1000 as bigint),‘yyyy-MM-dd’) as dt,
shopUri as shop_uri,
HOUR(TO_TIMESTAMP(FROM_UNIXTIME(cast(cast(time
.serverTime as bigint)/1000 as bigint) ))) as period_hour,
count(shopUri) as exposure_cnt,
max(cast(cast(time
.serverTime as bigint)/1000 as bigint)) as server_time
– a.PROCTIME() AS proctime
from source_behavior_event_exposure_1284 a,
LATERAL TABLE(array_explode(get_json_object(business, ‘KaTeX parse error: Expected group after '_' at position 127: …reportVersion,'_̲',0) in ( ….isFlowDiscovery’) is null
or get_json_object(attr, ‘
.
i
s
F
l
o
w
D
i
s
c
o
v
e
r
y
′
)
<
>
′
t
r
u
e
′
)
−
−
流量分发
a
n
d
i
f
(
(
(
g
e
t
j
s
o
n
o
b
j
e
c
t
(
a
t
t
r
,
′
.isFlowDiscovery') <> 'true' ) -- 流量分发 and if(( (get_json_object(attr,'
.isFlowDiscovery′)<>′true′)−−流量分发andif(((getjsonobject(attr,′.marketing’) is not null and get_json_object(attr,‘
.
m
a
r
k
e
t
i
n
g
′
)
<
>
′
′
)
o
r
(
g
e
t
j
s
o
n
o
b
j
e
c
t
(
a
t
t
r
,
′
.marketing') <> '') or (get_json_object(attr,'
.marketing′)<>′′)or(getjsonobject(attr,′.attr.marketing’) is not null and get_json_object(attr,‘
.
a
t
t
r
.
m
a
r
k
e
t
i
n
g
′
)
<
>
′
′
)
o
r
(
g
e
t
j
s
o
n
o
b
j
e
c
t
(
a
t
t
r
,
′
.attr.marketing') <> '') or (get_json_object(attr,'
.attr.marketing′)<>′′)or(getjsonobject(attr,′.origin’) is not null and get_json_object(attr,‘
.
o
r
i
g
i
n
′
)
<
>
′
′
)
o
r
(
g
e
t
j
s
o
n
o
b
j
e
c
t
(
a
t
t
r
,
′
.origin') <> '') or (get_json_object(attr,'
.origin′)<>′′)or(getjsonobject(attr,′.attr.origin’) is not null and get_json_object(attr,‘
.
a
t
t
r
.
o
r
i
g
i
n
′
)
<
>
′
′
)
o
r
(
g
e
t
j
s
o
n
o
b
j
e
c
t
(
a
t
t
r
,
′
.attr.origin') <> '') or (get_json_object(attr,'
.attr.origin′)<>′′)or(getjsonobject(attr,′.adType’) is not null and get_json_object(attr,‘
.
a
d
T
y
p
e
′
)
<
>
′
′
)
o
r
(
g
e
t
j
s
o
n
o
b
j
e
c
t
(
a
t
t
r
,
′
.adType') <> '') or (get_json_object(attr,'
.adType′)<>′′)or(getjsonobject(attr,′.attr.adType’) is not null and get_json_object(attr,‘$.attr.adType’) <> ‘’)
),1,0) = 0 – 青竹
and from_unixtime(cast(cast(time
.serverTime as bigint)/1000 as bigint),‘yyyy-MM-dd’) = CURRENT_DATE
and HOUR(TO_TIMESTAMP(FROM_UNIXTIME(cast(cast(a.time
.serverTime as bigint)/1000 as bigint) ))) = HOUR(CURRENT_TIME)
and CHAR_LENGTH(cast(uuri as string)) <= 16
and cast(uuri as string) not like ‘M%’
group by shopUri ,HOUR(TO_TIMESTAMP(FROM_UNIXTIME(cast(cast(time
.serverTime as bigint)/1000 as bigint) ))),
from_unixtime(cast(cast(time
.serverTime as bigint)/1000 as bigint),‘yyyy-MM-dd’)
– ,a.PROCTIME()
;
CREATE TABLE source_KAFKA_new_user_visit_incr_1d_1284 (
dt String,
time
bigint comment ‘时间’,
userinfo_id string comment ‘用户id’,
business
string,
report_id string,
userinfo_uri String comment ‘用户uri’,
sc String comment ‘sc’,
r String comment ‘r值’,
last_r String comment ‘last_r值’,
sale_uri String comment ‘拍品uri’,
shop_uri String comment ‘店铺uri’,
ts AS TO_TIMESTAMP_LTZ(time
, 3),
WATERMARK FOR ts
AS ts
- INTERVAL ‘2’ MINUTE
)
WITH (
‘properties.bootstrap.servers’ = ‘10.10.10.10.58:9092’,
‘topic’ = ‘rt_dwt_new_user_visit_incr_1d’ ,
‘properties.group.id’ = ‘jade_task_745’,
‘connector’ = ‘kafka’, ‘format’ = ‘json’ ,
‘scan.startup.mode’ = ‘timestamp’,
‘scan.startup.timestamp-millis’ = ‘1700040586303’
);
CREATE TABLE sink_kafka_api_precision_exp_visit_hour_incr_1d_visit (
dt string
,shop_uri string
,period_hour bigint
,sale_pv bigint
,server_time bigint
,primary key (dt,shop_uri,period_hour) not enforced
) WITH (
‘connector’ = ‘upsert-kafka’,
‘topic’ = ‘rt_api_precision_exp_visit_hour_incr_1d_visit_1284’,
‘properties.bootstrap.servers’ = ‘10.10.10.10:9092’,
‘key.format’ = ‘json’,
‘value.format’ = ‘json’
– ‘connector’ = ‘logger’,
– ‘print-identifier’ = ‘test_ysj_Glog’
);
insert into sink_kafka_api_precision_exp_visit_hour_incr_1d_visit
– create view tmp_visit_shop_sale as
select
dt,
shop_uri,
HOUR(TO_TIMESTAMP(FROM_UNIXTIME(cast(time
/1000 as bigint) ))) as period_hour,
count(sale_uri) as sale_pv,
max(cast(time
/1000 as bigint)) as server_time
from source_KAFKA_new_user_visit_incr_1d_1284
where
shop_uri is not null
and sale_uri is not null
and sale_uri <> ‘’
– and sc not like ‘%qz%’
and if(CHAR_LENGTH(COALESCE(get_json_object(business,‘
.
a
t
t
r
.
m
a
r
k
e
t
i
n
g
′
)
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
b
u
s
i
n
e
s
s
,
′
.attr.marketing'),get_json_object(business,'
.attr.marketing′),getjsonobject(business,′.attr.origin’), get_json_object(business,‘$.attr.adType’))) > 1, 1, 0) = 0
and CHAR_LENGTH(userinfo_uri) <= 16
and userinfo_uri not like ‘M%’
and dt = CURRENT_DATE
and (
last_r in (‘saleList’,‘similarGoods’,‘menu_home’,‘menu_newHome’,‘wechat’)
or last_r like ‘wechatMenu%’
)
and r not like ‘%discovery_flowDistribution%’
– and COALESCE(split(r,‘-’)[3],split(r,‘-’)[2],split(r,‘-’)[1],split(r,‘-’)[0]) not like ‘%discovery_flowDistribution%’
and HOUR(TO_TIMESTAMP(FROM_UNIXTIME(cast(time
/1000 as bigint) ))) = HOUR(CURRENT_TIME)
group by dt,shop_uri,HOUR(TO_TIMESTAMP(FROM_UNIXTIME(cast(time
/1000 as bigint) )))
;
CREATE TABLE mdl_HBASE_rt_dim_rt_dim_wpt_user_t_user_uri_1286(
rowkey
varchar,
cf ROW<userinfoid
string>,
PRIMARY KEY (rowkey) NOT ENFORCED
)
with (
‘zookeeper.quorum’ = ‘10.10.10.10:2181,10.10.10.11:2181,10.10.10.12:2181’,
‘table-name’ = ‘rt_dim:rt_dim_wpt_user_t_user_uri’,
‘connector’ = ‘hbase-1.4’
);
CREATE TABLE source_kafka_api_precision_exp_visit_hour_incr_1d_visit (
dt string
,shop_uri string
,period_hour bigint
,sale_pv bigint
,server_time bigint
,ts
AS TO_TIMESTAMP_LTZ(server_time * 1000, 3)
,WATERMARK FOR ts
AS ts
- INTERVAL ‘5’ second
) WITH (
‘properties.bootstrap.servers’ = ‘10.10.10.10:9092’,
‘topic’ = ‘rt_api_precision_exp_visit_hour_incr_1d_visit_1284’,
‘properties.group.id’ = ‘jade_task_1284’,
‘connector’ = ‘kafka’,
– 上线
‘format’ = ‘json’,
‘scan.startup.mode’ = ‘timestamp’,
‘scan.startup.timestamp-millis’ = ‘1700060360501’
);
– CREATE TABLE tmp_exp_visit_hour_incr_1d_visit (
– – dts TIMESTAMP(3)
– – ,
– dt string
– ,shop_uri string
– ,period_hour bigint
– ,sale_pv bigint
– ) WITH (
– ‘connector’ = ‘logger’,
– ‘print-identifier’ = ‘test_ysj_Glog’
– );
– insert into tmp_exp_visit_hour_incr_1d_visit
– select
– – dts,
– dt,
– shop_id,
– period_hour,
– sale_pv
– from tmp_exp_visit_hour_incr_1d_view
;
CREATE TABLE source_kafka_api_precision_exp_visit_hour_incr_1d_exp (
dt string
,shop_uri string
,period_hour bigint
,exposure_cnt bigint
,server_time bigint
– ,ts AS TO_TIMESTAMP_LTZ(server_time
, 3),
,ts
AS TO_TIMESTAMP_LTZ(server_time * 1000, 3)
,WATERMARK FOR ts
AS ts
- INTERVAL ‘5’ second
– ,PRIMARY KEY (dt,shop_uri,period_hour) NOT enforced
) WITH (
‘properties.bootstrap.servers’ = ‘10.10.10.10:9092’,
‘topic’ = ‘rt_api_precision_exp_visit_hour_incr_1d_exp_1284’,
‘properties.group.id’ = ‘jade_task_1284’,
‘connector’ = ‘kafka’,
– 上线
‘format’ = ‘json’,
‘scan.startup.mode’ = ‘timestamp’,
‘scan.startup.timestamp-millis’ = ‘1700060360501’
);
– CREATE TABLE tmp_exp_visit_hour_incr_1d_exp (
– – dts TIMESTAMP(3)
– – ,
– dt string
– ,shop_uri string
– ,period_hour bigint
– ,exposure_cnt bigint
– ) WITH (
– ‘connector’ = ‘logger’,
– ‘print-identifier’ = ‘test_ysj_Glog’
– );
create view tmp_visit_hour_incr_1d_view as
select
– TUMBLE_START(ts, INTERVAL ‘1’ MINUTE) as dts
– ,
dt
,shop_uri
,period_hour
,max(sale_pv) as sale_pv
from source_kafka_api_precision_exp_visit_hour_incr_1d_visit
group by
TUMBLE(ts, INTERVAL ‘5’ MINUTE)
,dt
,shop_uri
,period_hour
;
create view tmp_exp_hour_incr_1d_view as
select
– TUMBLE_START(ts, INTERVAL ‘1’ MINUTE) as dts
– ,
dt
,shop_uri
,period_hour
,max(exposure_cnt) as exposure_cnt
from source_kafka_api_precision_exp_visit_hour_incr_1d_exp
group by
TUMBLE(ts, INTERVAL ‘5’ MINUTE)
,dt
,shop_uri
,period_hour
;
– insert into tmp_exp_visit_hour_incr_1d_exp
– select
– – dts,
– dt,
– shop_uri,
– period_hour,
– exposure_cnt
– from tmp_exp_visit_hour_incr_1d_view
– CREATE TABLE tmp_exp_visit_hour_incr_1d_exp_visit (
– – dts TIMESTAMP(3)
– – ,
– shop_uri string
– ,first_category_id bigint
– ,period_hour bigint
– ,sale_pv bigint
– ,exposure_cnt bigint
– ,dt string
– ) WITH (
– ‘connector’ = ‘logger’,
– ‘print-identifier’ = ‘test_ysj_Glog’
– );
CREATE TABLE sink_KAFKA_sale_exposure_visit_incr_1d_5801 (
shop_id int,
first_category_id int,
period_hour int,
sale_pv bigint,
exposure_cnt bigint,
dt string,
PRIMARY KEY (dt,shop_id,period_hour) NOT ENFORCED
)
with (
‘url’ = ‘jdbc:mysql://10.10.10.10:3306/realtime’,
‘table-name’ = ‘api_precision_exp_visit_hour_incr_1d’,
‘connector’ = ‘jdbc’,
‘username’ = ‘ysj’,
‘password’ = 88888888888’ ,
‘sink.buffer-flush.max-rows’ = ‘500’,
‘sink.buffer-flush.interval’ = ‘1s’
– ‘connector’ = ‘logger’,
– ‘print-identifier’ = ‘test_ysj_Glog’
);
insert into sink_KAFKA_sale_exposure_visit_incr_1d_5801
select
cast(t2.cf.userinfoid as int) as shop_id
,t1.first_category_id
,t1.period_hour
,t1.sale_pv
,t1.exposure_cnt
,t1.dt
from
(
select
coalesce(a.shop_uri,b.shop_uri) as shop_uri
,0 as first_category_id
,cast(coalesce(a.period_hour,b.period_hour) as int) as period_hour
,coalesce(b.sale_pv,0) as sale_pv
,coalesce(a.exposure_cnt,0) as exposure_cnt
,coalesce(a.dt,b.dt) as dt
from tmp_exp_hour_incr_1d_view as a
full join
tmp_visit_hour_incr_1d_view as b
on a.dt = b.dt
and a.shop_uri = b.shop_uri
and a.period_hour = b.period_hour
) t1
join mdl_HBASE_rt_dim_rt_dim_wpt_user_t_user_uri_1286 t2
on t1.shop_uri = t2.rowkey
;