【FLINK SQL】实例

开发过程临时sql,仅供参考

https://nightlies.apache.org/flink/flink-docs-release-1.18/docs/connectors/table/upsert-kafka/

– 保留1天状态
set table.exec.state.ttl=86400000;
CREATE TABLE source_behavior_event_exposure_1284(
reportInfo row<reportVersion String>,
time row<clientTime String, serverTime String>,
user row<uuri String, pushId String, scrmUserUri String>,
business String,
proc_time AS PROCTIME()
)
with (
‘properties.bootstrap.servers’ = ‘10.10.10.10:9092’,
‘topic’ = ‘behavior_event_exposure’ ,
‘properties.group.id’ = ‘source_behavior_event_exposure_990’,
‘connector’ = ‘kafka’,
‘avro-confluent.schema-registry.url’ = ‘http://aaaa.bbbb.com’,
‘avro-confluent.schema-registry.subject’ = ‘behavior_event_exposure’,
‘format’ = ‘avro-confluent’,
‘scan.startup.mode’ = ‘timestamp’,
‘scan.startup.timestamp-millis’ = ‘1700040586303’
);
CREATE TABLE sink_kafka_api_precision_exp_visit_hour_incr_1d_exp (
dt string
,shop_uri string
,period_hour bigint
,exposure_cnt bigint
,server_time bigint
,primary key (dt,shop_uri,period_hour) not enforced
) WITH (
‘connector’ = ‘upsert-kafka’,
‘topic’ = ‘rt_api_precision_exp_visit_hour_incr_1d_exp_1284’,
‘properties.bootstrap.servers’ = ‘10.10.10.10:9092’,
‘key.format’ = ‘json’,
‘value.format’ = ‘json’
);
insert into sink_kafka_api_precision_exp_visit_hour_incr_1d_exp
– create view tmp_exposure_shop_sale as
select
from_unixtime(cast(cast(time.serverTime as bigint)/1000 as bigint),‘yyyy-MM-dd’) as dt,
shopUri as shop_uri,
HOUR(TO_TIMESTAMP(FROM_UNIXTIME(cast(cast(time.serverTime as bigint)/1000 as bigint) ))) as period_hour,
count(shopUri) as exposure_cnt,
max(cast(cast(time.serverTime as bigint)/1000 as bigint)) as server_time
– a.PROCTIME() AS proctime
from source_behavior_event_exposure_1284 a,
LATERAL TABLE(array_explode(get_json_object(business, ‘KaTeX parse error: Expected group after '_' at position 127: …reportVersion,'_̲',0) in ( ….isFlowDiscovery’) is null
or get_json_object(attr, ‘ . i s F l o w D i s c o v e r y ′ ) < > ′ t r u e ′ ) − − 流量分发 a n d i f ( ( ( g e t j s o n o b j e c t ( a t t r , ′ .isFlowDiscovery') <> 'true' ) -- 流量分发 and if(( (get_json_object(attr,' .isFlowDiscovery)<>true)流量分发andif(((getjsonobject(attr,.marketing’) is not null and get_json_object(attr,‘ . m a r k e t i n g ′ ) < > ′ ′ ) o r ( g e t j s o n o b j e c t ( a t t r , ′ .marketing') <> '') or (get_json_object(attr,' .marketing)<>′′)or(getjsonobject(attr,.attr.marketing’) is not null and get_json_object(attr,‘ . a t t r . m a r k e t i n g ′ ) < > ′ ′ ) o r ( g e t j s o n o b j e c t ( a t t r , ′ .attr.marketing') <> '') or (get_json_object(attr,' .attr.marketing)<>′′)or(getjsonobject(attr,.origin’) is not null and get_json_object(attr,‘ . o r i g i n ′ ) < > ′ ′ ) o r ( g e t j s o n o b j e c t ( a t t r , ′ .origin') <> '') or (get_json_object(attr,' .origin)<>′′)or(getjsonobject(attr,.attr.origin’) is not null and get_json_object(attr,‘ . a t t r . o r i g i n ′ ) < > ′ ′ ) o r ( g e t j s o n o b j e c t ( a t t r , ′ .attr.origin') <> '') or (get_json_object(attr,' .attr.origin)<>′′)or(getjsonobject(attr,.adType’) is not null and get_json_object(attr,‘ . a d T y p e ′ ) < > ′ ′ ) o r ( g e t j s o n o b j e c t ( a t t r , ′ .adType') <> '') or (get_json_object(attr,' .adType)<>′′)or(getjsonobject(attr,.attr.adType’) is not null and get_json_object(attr,‘$.attr.adType’) <> ‘’)
),1,0) = 0 – 青竹
and from_unixtime(cast(cast(time.serverTime as bigint)/1000 as bigint),‘yyyy-MM-dd’) = CURRENT_DATE
and HOUR(TO_TIMESTAMP(FROM_UNIXTIME(cast(cast(a.time.serverTime as bigint)/1000 as bigint) ))) = HOUR(CURRENT_TIME)
and CHAR_LENGTH(cast(uuri as string)) <= 16
and cast(uuri as string) not like ‘M%’
group by shopUri ,HOUR(TO_TIMESTAMP(FROM_UNIXTIME(cast(cast(time.serverTime as bigint)/1000 as bigint) ))),
from_unixtime(cast(cast(time.serverTime as bigint)/1000 as bigint),‘yyyy-MM-dd’)
– ,a.PROCTIME()
;

CREATE TABLE source_KAFKA_new_user_visit_incr_1d_1284 (
dt String,
time bigint comment ‘时间’,
userinfo_id string comment ‘用户id’,
business string,
report_id string,
userinfo_uri String comment ‘用户uri’,
sc String comment ‘sc’,
r String comment ‘r值’,
last_r String comment ‘last_r值’,
sale_uri String comment ‘拍品uri’,
shop_uri String comment ‘店铺uri’,
ts AS TO_TIMESTAMP_LTZ(time, 3),
WATERMARK FOR ts AS ts - INTERVAL ‘2’ MINUTE
)
WITH (
‘properties.bootstrap.servers’ = ‘10.10.10.10.58:9092’,
‘topic’ = ‘rt_dwt_new_user_visit_incr_1d’ ,
‘properties.group.id’ = ‘jade_task_745’,
‘connector’ = ‘kafka’, ‘format’ = ‘json’ ,
‘scan.startup.mode’ = ‘timestamp’,
‘scan.startup.timestamp-millis’ = ‘1700040586303’
);

CREATE TABLE sink_kafka_api_precision_exp_visit_hour_incr_1d_visit (
dt string
,shop_uri string
,period_hour bigint
,sale_pv bigint
,server_time bigint
,primary key (dt,shop_uri,period_hour) not enforced
) WITH (
‘connector’ = ‘upsert-kafka’,
‘topic’ = ‘rt_api_precision_exp_visit_hour_incr_1d_visit_1284’,
‘properties.bootstrap.servers’ = ‘10.10.10.10:9092’,
‘key.format’ = ‘json’,
‘value.format’ = ‘json’
– ‘connector’ = ‘logger’,
– ‘print-identifier’ = ‘test_ysj_Glog’
);

insert into sink_kafka_api_precision_exp_visit_hour_incr_1d_visit
– create view tmp_visit_shop_sale as
select
dt,
shop_uri,
HOUR(TO_TIMESTAMP(FROM_UNIXTIME(cast(time/1000 as bigint) ))) as period_hour,
count(sale_uri) as sale_pv,
max(cast(time/1000 as bigint)) as server_time
from source_KAFKA_new_user_visit_incr_1d_1284
where
shop_uri is not null
and sale_uri is not null
and sale_uri <> ‘’
– and sc not like ‘%qz%’
and if(CHAR_LENGTH(COALESCE(get_json_object(business,‘ . a t t r . m a r k e t i n g ′ ) , g e t j s o n o b j e c t ( b u s i n e s s , ′ .attr.marketing'),get_json_object(business,' .attr.marketing),getjsonobject(business,.attr.origin’), get_json_object(business,‘$.attr.adType’))) > 1, 1, 0) = 0
and CHAR_LENGTH(userinfo_uri) <= 16
and userinfo_uri not like ‘M%’
and dt = CURRENT_DATE
and (
last_r in (‘saleList’,‘similarGoods’,‘menu_home’,‘menu_newHome’,‘wechat’)
or last_r like ‘wechatMenu%’
)
and r not like ‘%discovery_flowDistribution%’
– and COALESCE(split(r,‘-’)[3],split(r,‘-’)[2],split(r,‘-’)[1],split(r,‘-’)[0]) not like ‘%discovery_flowDistribution%’
and HOUR(TO_TIMESTAMP(FROM_UNIXTIME(cast(time/1000 as bigint) ))) = HOUR(CURRENT_TIME)
group by dt,shop_uri,HOUR(TO_TIMESTAMP(FROM_UNIXTIME(cast(time/1000 as bigint) )))
;

CREATE TABLE mdl_HBASE_rt_dim_rt_dim_wpt_user_t_user_uri_1286(
rowkey varchar,
cf ROW<userinfoid string>,
PRIMARY KEY (rowkey) NOT ENFORCED
)
with (
‘zookeeper.quorum’ = ‘10.10.10.10:2181,10.10.10.11:2181,10.10.10.12:2181’,
‘table-name’ = ‘rt_dim:rt_dim_wpt_user_t_user_uri’,
‘connector’ = ‘hbase-1.4’
);
CREATE TABLE source_kafka_api_precision_exp_visit_hour_incr_1d_visit (
dt string
,shop_uri string
,period_hour bigint
,sale_pv bigint
,server_time bigint
,ts AS TO_TIMESTAMP_LTZ(server_time * 1000, 3)
,WATERMARK FOR ts AS ts - INTERVAL ‘5’ second
) WITH (
‘properties.bootstrap.servers’ = ‘10.10.10.10:9092’,
‘topic’ = ‘rt_api_precision_exp_visit_hour_incr_1d_visit_1284’,
‘properties.group.id’ = ‘jade_task_1284’,
‘connector’ = ‘kafka’,
– 上线
‘format’ = ‘json’,
‘scan.startup.mode’ = ‘timestamp’,
‘scan.startup.timestamp-millis’ = ‘1700060360501’

);

– CREATE TABLE tmp_exp_visit_hour_incr_1d_visit (
– – dts TIMESTAMP(3)
– – ,
– dt string
– ,shop_uri string
– ,period_hour bigint
– ,sale_pv bigint
– ) WITH (
– ‘connector’ = ‘logger’,
– ‘print-identifier’ = ‘test_ysj_Glog’
– );

– insert into tmp_exp_visit_hour_incr_1d_visit
– select
– – dts,
– dt,
– shop_id,
– period_hour,
– sale_pv
– from tmp_exp_visit_hour_incr_1d_view

;

CREATE TABLE source_kafka_api_precision_exp_visit_hour_incr_1d_exp (
dt string
,shop_uri string
,period_hour bigint
,exposure_cnt bigint
,server_time bigint
– ,ts AS TO_TIMESTAMP_LTZ(server_time, 3),
,ts AS TO_TIMESTAMP_LTZ(server_time * 1000, 3)
,WATERMARK FOR ts AS ts - INTERVAL ‘5’ second
– ,PRIMARY KEY (dt,shop_uri,period_hour) NOT enforced
) WITH (
‘properties.bootstrap.servers’ = ‘10.10.10.10:9092’,
‘topic’ = ‘rt_api_precision_exp_visit_hour_incr_1d_exp_1284’,
‘properties.group.id’ = ‘jade_task_1284’,
‘connector’ = ‘kafka’,
– 上线
‘format’ = ‘json’,
‘scan.startup.mode’ = ‘timestamp’,
‘scan.startup.timestamp-millis’ = ‘1700060360501’

);

– CREATE TABLE tmp_exp_visit_hour_incr_1d_exp (
– – dts TIMESTAMP(3)
– – ,
– dt string
– ,shop_uri string
– ,period_hour bigint
– ,exposure_cnt bigint
– ) WITH (
– ‘connector’ = ‘logger’,
– ‘print-identifier’ = ‘test_ysj_Glog’
– );

create view tmp_visit_hour_incr_1d_view as
select
– TUMBLE_START(ts, INTERVAL ‘1’ MINUTE) as dts
– ,
dt
,shop_uri
,period_hour
,max(sale_pv) as sale_pv
from source_kafka_api_precision_exp_visit_hour_incr_1d_visit
group by
TUMBLE(ts, INTERVAL ‘5’ MINUTE)
,dt
,shop_uri
,period_hour
;

create view tmp_exp_hour_incr_1d_view as
select
– TUMBLE_START(ts, INTERVAL ‘1’ MINUTE) as dts
– ,
dt
,shop_uri
,period_hour
,max(exposure_cnt) as exposure_cnt
from source_kafka_api_precision_exp_visit_hour_incr_1d_exp
group by
TUMBLE(ts, INTERVAL ‘5’ MINUTE)
,dt
,shop_uri
,period_hour
;

– insert into tmp_exp_visit_hour_incr_1d_exp
– select
– – dts,
– dt,
– shop_uri,
– period_hour,
– exposure_cnt
– from tmp_exp_visit_hour_incr_1d_view

– CREATE TABLE tmp_exp_visit_hour_incr_1d_exp_visit (
– – dts TIMESTAMP(3)
– – ,
– shop_uri string
– ,first_category_id bigint
– ,period_hour bigint
– ,sale_pv bigint
– ,exposure_cnt bigint
– ,dt string
– ) WITH (
– ‘connector’ = ‘logger’,
– ‘print-identifier’ = ‘test_ysj_Glog’
– );

CREATE TABLE sink_KAFKA_sale_exposure_visit_incr_1d_5801 (
shop_id int,
first_category_id int,
period_hour int,
sale_pv bigint,
exposure_cnt bigint,
dt string,
PRIMARY KEY (dt,shop_id,period_hour) NOT ENFORCED
)
with (
‘url’ = ‘jdbc:mysql://10.10.10.10:3306/realtime’,
‘table-name’ = ‘api_precision_exp_visit_hour_incr_1d’,
‘connector’ = ‘jdbc’,
‘username’ = ‘ysj’,
‘password’ = 88888888888’ ,
‘sink.buffer-flush.max-rows’ = ‘500’,
‘sink.buffer-flush.interval’ = ‘1s’
– ‘connector’ = ‘logger’,
– ‘print-identifier’ = ‘test_ysj_Glog’

);

insert into sink_KAFKA_sale_exposure_visit_incr_1d_5801
select
cast(t2.cf.userinfoid as int) as shop_id
,t1.first_category_id
,t1.period_hour
,t1.sale_pv
,t1.exposure_cnt
,t1.dt
from
(
select
coalesce(a.shop_uri,b.shop_uri) as shop_uri
,0 as first_category_id
,cast(coalesce(a.period_hour,b.period_hour) as int) as period_hour
,coalesce(b.sale_pv,0) as sale_pv
,coalesce(a.exposure_cnt,0) as exposure_cnt
,coalesce(a.dt,b.dt) as dt
from tmp_exp_hour_incr_1d_view as a
full join
tmp_visit_hour_incr_1d_view as b
on a.dt = b.dt
and a.shop_uri = b.shop_uri
and a.period_hour = b.period_hour
) t1
join mdl_HBASE_rt_dim_rt_dim_wpt_user_t_user_uri_1286 t2
on t1.shop_uri = t2.rowkey
;

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值