基于doris实时数仓指标计算
1、流程图
2、计算字符uv字典表生成流程
涉及设备uv去重只能基于bigint类型
dau = to_bitmap(device_id)
需要建立设备字典表,将字符串设备转化为字典id做去重处理
因为常用指标周期只涉及近30日,因此字典表只保留近30日设备
3、涉及设备uv生成近N日聚合明细
4、doris聚合模型代码
--建表语句
create table log_platform_channel_dau(
ds date,
platform varchar(65535),
platform_name varchar(65535),
channel varchar(65535),
channel_name varchar(65535),
dau bitmap BITMAP_UNION,
new_dau bitmap BITMAP_UNION,
exp_dau bitmap BITMAP_UNION,
active_user bitmap BITMAP_UNION
)
aggregate key(ds,platform,platform_name,channel,channel_name)
partition by range(ds)
(start('20220428') end ('20220502') every (INTERVAL 1 day))
distributed by hash(channel) buckets 8
PROPERTIES(
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.start" = "-1",
"dynamic_partition.end" = "7",
"dynamic_partition.prefix" = "p",
"dynamic_partition.buckets" = "32"
);
--ROUTINE实时导入
CREATE ROUTINE LOAD routine_load_log_platform_channel_dau ON log_platform_channel_dau
COLUMNS TERMINATED BY ",",
COLUMNS (ds
,platform
,platform_name
,channel
,channel_name
,device_id
,user_id
,is_new_device_id
,event_type
,dau = to_bitmap(device_id)
,new_dau = to_bitmap(if(is_new_device_id = '1',device_id,null))
,exp_dau = to_bitmap(if(event_type = '1',device_id,null))
,active_user = to_bitmap(user_id)
)
PROPERTIES
(
"desired_concurrent_number"="3",
"max_error_number"="0",
"strict_mode" = "false",
"format" = "json"
)
FROM KAFKA
(
"kafka_broker_list"= "",
"kafka_topic" = "log_platform_channel_dau",
"property.group.id"="routine_load_log_platform_channel_dau"
);
--查看导入状态
SHOW ALL ROUTINE LOAD;
或通过fe查看
http://localhost:8030/system
--停止导入
STOP ROUTINE LOAD FOR routine_load_log_platform_channel_dau
;
--查询语句
select ds,
platform,
platform_name,
channel,
channel_name,
count(distinct dau),
count(distinct new_dau),
count(distinct exp_dau),
count(distinct active_user) from log_platform_channel_dau
where `ds`= '2022-04-28'
group by ds,
platform,
platform_name,
channel,
channel_name
;
--查看执行计划
explain
select ds,
platform,
platform_name,
channel,
channel_name,
count(distinct dau),
count(distinct new_dau),
count(distinct exp_dau),
count(distinct active_user) from log_platform_channel_dau
where `ds`= '2022-04-28'
group by ds,
platform,
platform_name,
channel,
channel_name
;
--查看关键字
PREAGGREGATION: ON
on代表通过预聚合查询
ps1.ROUTINE LOAD任务报错后,要先停止,再开启导入
ps2.分区要先存在,再写入数据
列字段做转换时,要将原列先列举出,再做转换逻辑