总结过去的Flink-SQL:一个基于阿里云Blink-Stream-Studio的实时指标SQL

贴一个曾经开发过的小需求,主要是按照产品与时间的细粒度对业务指标进行实时统计

几个月前的了,我使用的是阿里云的Stream Studio可以直接写Flink-SQL进行统计,近期我们将本地线上环境升级到了Flink-1.11.1社区版,Flink-SQL的语法基本与Blink一致,并且Flink-1.11.1社区增加了对Hive的交互支持。

一个小小的不满,Flink-1.11.1社区版并不能像阿里云Blink-Stream-Studio平台一样支持DDL的形式创建维度表,在1.11社区版中还是得用以前LookUpFunction的方式来做维度交互,不支持DDL创建维度表,这个我相信在未来的新版本中也会增加!毕竟要用SQL咱们就全都用SQL多好~

 

以下是我曾经基于阿里云Blink-Stream-Studio平台做的一个简单需求,我负责我司近20个业务渠道的实时计算需求与研发,很多人都会用Flink,但大家的需求场景各不相同,这里列举一个易懂的小项目:实时统计某个业务渠道的实时交易情况,退款情况,SKU实售情况,今日SKU-GMV。

--SQL
--********************************************************************--
--Author: guangyu
--CreateTime: 2020-06-16 20:01:25
--Comment: 请输入业务注释信息
--********************************************************************--
-- 数据源
-- CREATE TABLE src_youzan_data (
-- 	`data`             VARCHAR
-- ) WITH (
-- 	type = 'datahub',
-- 	endPoint = 'http://dh-cn-beijing.aliyun-inc.com',
-- 	roleArn='acs:ram::199526033333379:role/aliyunstreamdefaultrole',
-- 	project = 'DC_TEST',
-- 	topic = 'src_youzan_data'
-- );
CREATE TABLE src_youzan_data (
	`data`             VARCHAR
) WITH (
	type = 'datahub',
	endPoint = 'http://dh-cn-beijing.aliyun-inc.com',
	roleArn='acs:ram::199526033333379:role/aliyunstreamdefaultrole',
	project = 'DC_xxxx',
	topic = 'src_youzan_data'
);


-- 商品纬度表
CREATE TABLE dim_product (
	id                                     INT,
	product_id                             VARCHAR,
	product_code                           VARCHAR,
	second_code                            VARCHAR,
	`name`                                 VARCHAR,
	brand_name                             VARCHAR,
	storage_location                       VARCHAR,
	middle_code                            VARCHAR,
	origin                                 VARCHAR,
	buy_specification                      VARCHAR,
	big_category_name                      VARCHAR,
	contents                               VARCHAR,
	small_category_name                    VARCHAR,
	brand_code                             VARCHAR,
	sale_specification                     VARCHAR,
	product_status                         VARCHAR,
	wholesale_specification                VARCHAR,
	putaway_time                           VARCHAR,
	last_manager                           VARCHAR,
	ip                                     VARCHAR,
	adjust_price                           DECIMAL,
	contract_price                         DECIMAL,
	except_tax_price                       DECIMAL,
	expiration_date                        INT,
	lowest_price                           VARCHAR,
	rate                                   DOUBLE,
	wholesale_price                        DECIMAL,
	middle_name                            VARCHAR,
	`month`                                VARCHAR,
	supplier_name                          VARCHAR,
	subdivide                              VARCHAR,
	distribution_type                      VARCHAR,
	supplier                               VARCHAR,
	business_practice                      VARCHAR,
	product_status_code                    VARCHAR,
	unit                                   VARCHAR,
	`year`                                 VARCHAR,
	big_code                               VARCHAR,
	distribution_specification             VARCHAR,
	updated                                VARCHAR,
	small_code                             VARCHAR,
	product_type                           VARCHAR,
	simple_name                            VARCHAR,
	specification                          VARCHAR,
	unique index (second_code),
	PERIOD FOR SYSTEM_TIME
) WITH (
	type= 'rds',
	url = 'jdbc:mysql://xxxx:36628/dcxxxx',
	userName = 'guangyu',
	password = 'qwer#123',
	tableName = 'product'
);




-- 解析订单
CREATE VIEW src_youzan_order as
select 
-- 主订单信息:
-- 有赞订单类型
JSON_VALUE(`data`,'$.type') as `type`,
-- 重发的次数
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.sendCount') as int) as `sendCount`,
-- 重发的次数
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.test') as boolean) as `test`,
-- 最终支付价格 payment=orders.payment的总和
CAST(JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.pay_info'),'$.payment') as DOUBLE) as payment,
-- 主订单id
JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.tid') as tid,
-- 支付时间
REPLACE(JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.pay_time'),'+',' ') as pay_time,
-- 订单创建时间
REPLACE(JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.created'),'+',' ') as created,
-- 订单修改时间
REPLACE(JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.update_time'),'+',' ') as update_time,
-- 订单状态
JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.status') as `status`,
-- 支付类型
CAST(JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.pay_type') as int) as pay_type,
-- 子订单信息:
-- 商品编码
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.outer_item_id') as outer_item_id,
-- 单商品现价,减去了商品的优惠金额
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.discount_price') as DOUBLE) as discount_price,
-- 商品数量
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.num') as int) as num,
-- 交易明细号
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.oid') as oid,
-- 商品名称
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.title') as title,
-- 是否赠品
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.is_present') as is_present,
-- 单商品原价
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.price') as DOUBLE) as price,
-- 分销价格
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.fenxiao_price') as DOUBLE) as fenxiao_price,
-- 商品优惠后总价
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.total_fee') as DOUBLE) as total_fee,
-- 商品别名
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.alias') as alias,
-- 商品最终均摊价
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.payment') as DOUBLE) as order_payment,
-- 商家编码
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.outer_sku_id') as outer_sku_id,
-- 商品详情链接
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.goods_url') as goods_url,
-- 商品id
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.item_id') as int) as item_id,
-- 海淘口岸编码
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.customs_code') as customs_code,
-- 商品积分价(非积分商品则为0)
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.points_price') as DOUBLE) as points_price,
-- 订单类型 0:普通类型商品; 1:拍卖商品; 5:餐饮商品; 10:分销商品; 20:会员卡商品; 21:礼品卡商品; 23:有赞会议商品; 24:周期购; 30:收银台商品; 31:知识付费商品; 35:酒店商品; 40:普通服务类商品; 182:普通虚拟商品; 183:电子卡券商品; 201:外部会员卡商品; 202:外部直接收款商品; 203:外部普通商品; 205:mock不存在商品; 206:小程序二维码
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.item_type') as int) as item_type,
-- 是否海淘订单, 1是海淘
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.is_cross_border') as is_cross_border,

-- 退款类数据
-- 退款金额
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.refunded_fee') as DOUBLE ) as `refunded_fee`,
-- 退款订单
JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.refund_id') as `refund_id`
from src_youzan_data;
-- where REPLACE(JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.pay_time'),'+',' ') = CAST( CURRENT_DATE AS VARCHAR );


-- 已支付订单去重
-- create view order_distinct as 
-- SELECT * from (
-- SELECT
-- 			*,
-- 			row_number() over ( PARTITION BY tid ORDER BY update_time DESC ) AS rn 
-- 		FROM
-- 			src_youzan_order 
-- 		WHERE
-- 			`type` = 'trade_TradeBuyerPay' -- 类型为用户已支付
-- 			 and SUBSTRING( pay_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR ) 
-- )distinct_order
-- where distinct_order.rn = 1;


-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------








-- 今日支付,今日订单数

create view gmv_print as 
SELECT
		SUBSTRING( pay_time, 1, 10 ) as `date`,
		sum( gmv_tb.payment ) AS gmv,
		CAST(count( gmv_tb.tid ) as int) AS order_count
	FROM
		(
		SELECT
			*,
			row_number() over ( PARTITION BY tid ORDER BY update_time DESC ) AS rn 
		FROM
			src_youzan_order 
		WHERE
			`type` = 'trade_TradeBuyerPay' -- 类型为用户已支付
			and SUBSTRING( pay_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR ) 
			--  pay_time like '2020-06-18%'
		) gmv_tb where gmv_tb.rn=1
		group by SUBSTRING( pay_time, 1, 10 ) 
		having SUBSTRING( pay_time, 1, 10 )  = CAST( CURRENT_DATE AS VARCHAR );
-------------------------------------


-- 今日退款金额,退款订单

create view refund_print as 
SELECT
		SUBSTRING( update_time, 1, 10 ) as `date`,
		sum( refund.refunded_fee ) AS refund_fee_total,
		CAST(count( refund.refund_id ) as int) AS refund_order_total
	FROM
		(
		SELECT
			*,
			row_number() over ( PARTITION BY refund_id ORDER BY update_time DESC ) AS rn 
		FROM
			src_youzan_order 
		WHERE
			`type` = 'trade_refund_RefundSuccess' -- 类型为用户退款成功
			
			 and  SUBSTRING( update_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR ) 
			--  pay_time like '2020-06-18%'
		) refund where refund.rn=1
		group by SUBSTRING( update_time, 1, 10 )
		having SUBSTRING( update_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR ) ;
-------------------------------------

-- 实时金额表
CREATE TABLE dc_xxx_channel_real (
	`date`                  VARCHAR,
	channel_id              INT,
	sum_price               DOUBLE,
	sum_cnt                 DOUBLE,
	refund_cnt              BIGINT,
	refund_price            DOUBLE,
	-- updated_time            DATE,
	PRIMARY KEY (`date`,channel_id)
) WITH (
	type= 'rds',
	url = 'jdbc:mysql://xxxx:36628/dcxxxx',
	userName = 'dcsxxx',
	password = 'xxx',
	tableName = 'dc_zzzzz_channel_real'
);


-- 写入数据库
insert into dc_xxx_channel_real
select 
gp.`date`,
13 as channel_id,
gp.gmv as sum_price,
case when CAST(gp.order_count as DOUBLE) is null then cast(0 as double) else CAST(gp.order_count as DOUBLE) end as sum_cnt,
case when CAST(rp.refund_order_total as BIGINT) is null then cast(0 as bigint) else CAST(rp.refund_order_total as BIGINT) end as refund_cnt,
case when rp.refund_fee_total is null then cast(0 as double) else rp.refund_fee_total end as refund_price
from gmv_print gp
left join refund_print rp
on gp.`date` = rp.`date`;
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

-- 实时商品结果表
CREATE TABLE dc_xxx_product_real (
	`date`                        VARCHAR,
	channel_id                    INT,
	`group`                       VARCHAR,
	product_id                    VARCHAR,
	`ip`                            VARCHAR,
	big_category_name             VARCHAR,
	middle_name                   VARCHAR,
	product_type                  VARCHAR,
	series                        VARCHAR,
	subdivide                     VARCHAR,
	product_name                  VARCHAR,
	sum_price                     DOUBLE,
	sum_cnt                       BIGINT,
	cnt_order                     BIGINT, -- 订单数
	-- id                            INT,
	PRIMARY KEY (`date`,channel_id,`group`,product_id)
) WITH (
	type= 'rds',
	url = 'jdbc:mysql://xxxx:36628/dcxxx',
	userName = 'dcxxx',
	password = 'xxxxxxx',
	tableName = 'dc_xxxproduct_real'
);


-- 统计已支付商品分类,个数,写入数据库



insert into dc_xxxproduct_real
select 
 tmp.`date`,
 13 as channel_id,
 '无' as `group`,
tmp.outer_item_id as product_id,
case when `ip` is null then '无' else `ip` end as `ip`, 
case when big_category_name is null then '无' else big_category_name end as big_category_name, 
case when middle_name is null then '无' else middle_name end as middle_name, 
case when product_type is null then '无' else product_type end as product_type, 
case when simple_name is null then '无' else simple_name end as series, 
CASE WHEN subdivide is null then '无' else subdivide end as subdivide, 
case when dim_product.`name` is null then '无' else dim_product.`name` end as `product_name`,
tmp.sum_price as sum_price,
case when CAST(tmp.sum_cnt as BIGINT) is null then CAST(0 as BIGINT) else CAST(tmp.sum_cnt as BIGINT) end as  sum_cnt,
case when CAST(tmp.cnt_order as BIGINT) is null then cast(0 as bigint) else CAST(tmp.cnt_order as BIGINT) end as cnt_order
	from(
		select 
		
		odt.outer_item_id, -- 商品ID
		sum(odt.num) as sum_cnt, -- 商品累计个数
		sum(odt.order_payment) as sum_price, -- 商品合计金额
		count(odt.tid) as cnt_order, -- 主订单数,
		SUBSTRING( pay_time, 1, 10 ) as `date`
		from (
			SELECT
			*,
			row_number() over ( PARTITION BY oid ORDER BY update_time DESC ) AS rn 
		FROM
			src_youzan_order 
		WHERE
			`type` = 'trade_TradeBuyerPay' -- 类型为用户已支付
			and SUBSTRING( pay_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR ) 
		) odt where odt.rn=1
		group by odt.outer_item_id,SUBSTRING( pay_time, 1, 10 )
		having SUBSTRING( pay_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR )  ) tmp
	left join dim_product FOR SYSTEM_TIME AS OF PROCTIME () 
 	on 
	tmp.outer_item_id = dim_product.second_code
where tmp.outer_item_id is not null and tmp.outer_item_id  <> '';



-- create table print_item(
-- 	outer_item_id varchar,
-- 	sum_cnt int,
-- 	sum_price double,
-- 	cnt_order BIGINT
-- )with(
-- 	type='print'
-- );

-- insert into print_item 
-- select odt.outer_item_id, -- 商品ID
-- sum(odt.num) as sum_cnt, -- 商品累计个数
-- sum(odt.order_payment) as sum_price, -- 商品合计金额
-- count(odt.tid) as cnt_order -- 主订单数
-- from (
-- 			SELECT
-- 			*,
-- 			row_number() over ( PARTITION BY oid ORDER BY update_time DESC ) AS rn 
-- 		FROM
-- 			src_youzan_order 
-- 		WHERE
-- 			`type` = 'trade_TradeBuyerPay' -- 类型为用户已支付
-- 			and SUBSTRING( pay_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR ) 
-- ) odt where odt.rn=1
-- group by odt.outer_item_id;




-- insert into dc_xxx_product_real
-- select 
--  CAST( CURRENT_DATE AS VARCHAR )  as `date`,
--  13 as channel_id,
--  '无' as `group`,
-- youzan_product.outer_item_id as product_id,
-- case when `ip` is null then '无' else `ip` end as `ip`, 
-- case when big_category_name is null then '无' else big_category_name end as big_category_name, 
-- case when middle_name is null then '无' else middle_name end as middle_name, 
-- case when product_type is null then '无' else product_type end as product_type, 
-- case when simple_name is null then '无' else simple_name end as series, 
-- CASE WHEN subdivide is null then '无' else subdivide end as subdivide, 
-- case when dim_product.`name` is null then '无' else dim_product.`name` end as `product_name`,
-- youzan_product.sum_price as sum_price,
-- CAST(youzan_product.sum_cnt as BIGINT) as  sum_cnt,
-- CAST(youzan_product.cnt_order as BIGINT) as cnt_order
-- from(
-- select order_distinct.outer_item_id, -- 商品ID
-- sum(order_distinct.num) as sum_cnt, -- 商品累计个数
-- sum(order_distinct.order_payment) as sum_price, -- 商品合计金额
-- count(tid) as cnt_order -- 主订单数
-- from order_distinct group by order_distinct.outer_item_id
-- ) youzan_product
-- left join 
--  dim_product FOR SYSTEM_TIME AS OF PROCTIME () 
--  on youzan_product.outer_item_id = dim_product.second_code
--  where youzan_product.outer_item_id is NOT NULL;



-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------



-- create table test_pro (

-- outer_sku_id VARCHAR,
-- sum_cnt BIGINT,
-- sum_price DOUBLE,
-- cnt_order BIGINT
-- )WITH(
-- 	type = 'print'
-- );


-- -- 测试
-- insert into test_pro
-- select order_distinct.outer_sku_id, -- 商品ID
-- CAST(sum(order_distinct.num) as  BIGINT) as sum_cnt, -- 商品累计个数
-- sum(order_distinct.order_payment) as sum_price, -- 商品合计金额
-- CAST(count(tid) as BIGINT) as cnt_order -- 主订单数
-- from order_distinct group by order_distinct.outer_sku_id




 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值