doris实现数据聚合的三种方式--多明细聚合、物化视图与rollup

23 篇文章 6 订阅
8 篇文章 0 订阅

doris实现数据聚合的三种方式

假设以
①ds、hour为维度计算pv
②ds为维度计算pv

1、三种聚合方式
(1)聚合模型+数据源多次写入

flink写入kafka代码

insert into log_exp_pv
select ds
hour,
 device_id
from
  dwd_kafka_log
;

insert into log_exp_pv
select ds
'ALL' hour,
 device_id
from
  dwd_kafka_log
;

doris建表语句

create table log_exp_pv(
ds date,
hour varchar(1000),
pv bigint sum DEFAULT '0'
)
AGGREGATE KEY(ds,hour)
partition by range(ds)
(start('20220514') end ('20220520') every (INTERVAL 1 day))
distributed by hash(platform) buckets 32
PROPERTIES(
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.start" = "-1",
"dynamic_partition.end" = "7",
"dynamic_partition.prefix" = "p",
"dynamic_partition.buckets" = "32",
"replication_num" = "1"
)
;

routine load 从kafka导入数据到doris

CREATE ROUTINE LOAD routine_load_log_exp_pv ON log_exp_pv
COLUMNS TERMINATED BY ",",
COLUMNS (
      ds
      ,hour
      ,pv = 1
      )
PROPERTIES
(
    "desired_concurrent_number"="3",
    "max_error_number"="0",
    "strict_mode" = "false",
    "format" = "json"
)
FROM KAFKA
(
    "kafka_broker_list"= "",
    "kafka_topic" = "log_exp_pv",
    "property.group.id"="routine_load_log_exp_pv"
);

(2)明细模型 + 物化视图
flink写入kafka代码

insert into log_exp_pv
select ds
hour,
 device_id
from
  dwd_kafka_log
;

doris建表语句

create table log_exp_detail(
ds date,
hour varchar(1000),
user_id bigint
)
duplicate key(ds,hour)
partition by range(ds)
(start('20220512') end ('20220520') every (INTERVAL 1 day))
distributed by hash(channel) buckets 32
PROPERTIES(
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.start" = "-1",
"dynamic_partition.end" = "7",
"dynamic_partition.prefix" = "p",
"dynamic_partition.buckets" = "32",
"replication_num" = "1"
);

routine load 从kafka导入数据到doris

CREATE ROUTINE LOAD routine_load_log_exp_detail ON log_exp_detail
COLUMNS TERMINATED BY ",",
COLUMNS (
      ds
      ,hour
      ,user_id
      )
PROPERTIES
(
    "desired_concurrent_number"="3",
    "max_error_number"="0",
    "strict_mode" = "false",
    "format" = "json"
)
FROM KAFKA
(
    "kafka_broker_list"= "",
    "kafka_topic" = "log_exp_detail",
    "property.group.id"="routine_load_log_exp_detail"
);

创建物化视图

##ds维度
CREATE MATERIALIZED VIEW mv_ds AS 
SELECT ds
,count(user_id) pv
FROM log_exp_detail 
GROUP BY ds;
##ds、hour维度
CREATE MATERIALIZED VIEW mv_ds_hour AS 
SELECT ds
,hour
,count(user_id) pv
FROM log_exp_detail 
GROUP BY ds
,hour;

查询计划

explain
SELECT ds
,count(user_id) pv
FROM log_job_exp_detail 
GROUP BY ds;

在这里插入图片描述
(3)聚合模型 + rollup
doris建表语句

create table log_exp_pv(
ds date,
hour varchar(1000),
pv bigint sum DEFAULT '0'
)
AGGREGATE KEY(ds,hour)
partition by range(ds)
(start('20220514') end ('20220520') every (INTERVAL 1 day))
distributed by hash(platform) buckets 32
rollup (
r1(ds)
,r2(ds,hour)
)
PROPERTIES(
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.start" = "-1",
"dynamic_partition.end" = "7",
"dynamic_partition.prefix" = "p",
"dynamic_partition.buckets" = "32",
"replication_num" = "1"
)
;

routine load 从kafka导入数据到doris

CREATE ROUTINE LOAD routine_load_log_exp_pv ON log_exp_pv
COLUMNS TERMINATED BY ",",
COLUMNS (
      ds
      ,hour
      ,user_id
      )
PROPERTIES
(
    "desired_concurrent_number"="3",
    "max_error_number"="0",
    "strict_mode" = "false",
    "format" = "json"
)
FROM KAFKA
(
    "kafka_broker_list"= "",
    "kafka_topic" = "log_exp_pv",
    "property.group.id"="routine_load_log_exp_pv"
);

查询计划

explain
SELECT ds
,count(user_id) pv
FROM log_exp_pv
GROUP BY ds;

在这里插入图片描述
2、三种导入方式对比

(1)聚合模型+数据源多次写入
优点:
①只需关心聚合模型即可
②存储为聚合模型,降低了数据量
缺点:
①假设维度为n,则数据量会膨胀2^n倍,导致数据导入压力增大
②维度过多时,明细数据开发复杂度增加
(2)明细模型 + 物化视图
优点:
①明细数据只需一份
②明细模型保留原始数据
缺点:
①存储为明细数据,导致存储数据量较大
②多维度时,物化视图需要建立多次
③物化视图语法有严格限制
(3)聚合模型 + rollup
优点:
①存储为聚合模型,降低了数据量
②建表时即可指定rollup,降低了开发复杂性
缺点:
①明细数据无法保留

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值