SummingMergeTree 只能实现相加,若要实现其他聚合效果则使用AggregatingMergeTree
- 使用ORDER BY排序键作为聚合数据的条件Key
- 以分区为单位进行聚合,只在分区合并时触发
- 数据不能通过普通的insert插入,而只能通过insert into select
一般使用
- 建表:AggregateFunction( ck 自带聚合函数,数据类型)
- 写入数据时,需要调用 xxState
- 读取数据时,需要调用 xxMerge
CREATE TABLE agg_table
(
id String,
code AggregateFunction(uniq, String),
v1 AggregateFunction(sum, UInt32),
time DateTime
) ENGINE = AggregatingMergeTree()
PARTITION BY toYYYYMM(time)
ORDER BY (id)
PRIMARY KEY id;
INSERT INTO TABLE agg_table SELECT '001', uniqState('C1'), sumState(toUInt32(10)), toDateTime('2021-09-21 14:00:00')
INSERT INTO TABLE agg_table SELECT '001', uniqState('C1'), sumState(toUInt32(20)), toDateTime('2021-09-22 15:00:00')
SELECT id, uniqMerge(code), sumMerge(v1) FROM agg_table group by id;
┌─id──┬─uniqMerge(code)─┬─sumMerge(v1)─┐
│ 001 │ 1 │ 30 │
└─────┴─────────────────┴──────────────┘
生产使用
生产环境通常会使用物化视图
# 创建一张基础表
CREATE TABLE agg_table_base
(
id String,
code String,
v1 UInt32,
time DateTime
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(time)
ORDER BY (id)
PRIMARY KEY id;
# 在基础表的基础上创建一张物化视图
CREATE MATERIALIZED VIEW view_agg_table
ENGINE = AggregatingMergeTree()
PARTITION BY toYYYYMM(time)
ORDER BY (id)
PRIMARY KEY id
AS
SELECT id, uniqState(code) as code, sumState(v1) as v1, time
FROM agg_table_base
GROUP BY id, time;
# 向基础表插入数据的同时,会更新物化视图
INSERT INTO agg_table_base
VALUES ('001', 'C1',10, toDateTime('2021-09-21 14:00:00')),
('001', 'C1',20, toDateTime('2021-09-22 15:00:00')),
('001', 'C2',20, toDateTime('2021-09-22 17:00:00')),
('001', 'C1',10, toDateTime('2021-10-01 17:00:00')),
('002', 'C3',30, toDateTime('2021-10-03 15:00:00'));
# 查询的方式一样
SELECT id, uniqMerge(code), sumMerge(v1) FROM view_agg_table group by id;
┌─id──┬─uniqMerge(code)─┬─sumMerge(v1)─┐
│ 002 │ 1 │ 30 │
│ 001 │ 2 │ 60 │
└─────┴─────────────────┴──────────────┘