详解 ClickHouse 的语法优化规则

ClickHouse 的 SQL 优化规则是基于 RBO(Rule Based Optimization)

一、count 优化

--1. count()、count(1) 和 count(*),且没有 where 条件,则会直接使用 system.tables 的 total_rows
EXPLAIN SELECT count()FROM datasets.hits_v1;

--2. count(column),没有 Optimized trivial count 优化
EXPLAIN SELECT count(CounterID) FROM datasets.hits_v1;

二、消除子查询重复字段

--编写的 sql 中查询了重复字段
EXPLAIN SYNTAX SELECT a.UserID, b.VisitID, a.URL, b.UserID FROM hits_v1 AS a
LEFT JOIN (
 SELECT
 	UserID,
 	UserID as uid,
 	VisitID
 FROM visits_v1
) AS b USING (UserID)
limit 3;

--在 hive 中会直接查询展示两个重复字段的值

--但在 clickhouse 中会优化去除重复字段
SELECT
 UserID,
 VisitID,
 URL,
 b.UserID
FROM hits_v1 AS a
ALL LEFT JOIN
(
 SELECT
 	UserID,
 	VisitID
 FROM visits_v1
) AS b USING (UserID)
LIMIT 3;

三、谓词下推

通俗的说是指提前过滤

--1. 当 group by 有 having 子句,但是没有 with cube、with rollup 或者 with totals 修饰的时,having 过滤会下推到 where 提前过滤
EXPLAIN SYNTAX SELECT UserID FROM hits_v1 GROUP BY UserID HAVING UserID = '8585742290196126178';

--返回优化语句
SELECT UserID FROM hits_v1 WHERE UserID = '8585742290196126178' GROUP BY UserID


--2. 子查询的谓词下推 (ps:hive 中子查询不会谓词下推)
EXPLAIN SYNTAX
SELECT *
FROM
(
 SELECT 
    UserID
 FROM visits_v1
)
WHERE UserID = '8585742290196126178'

--返回优化后的语句
SELECT UserID
FROM
(
 SELECT 
    UserID
 FROM visits_v1
 WHERE UserID = '8585742290196126178'
)
WHERE UserID = '8585742290196126178'


EXPLAIN SYNTAX
SELECT * FROM 
(
 SELECT
 	*
 FROM
 (
 	SELECT
 		UserID
 	FROM visits_v1
 )
 UNION ALL
 SELECT
 	* 
 FROM
 (
 	SELECT
 		UserID
 	FROM visits_v1
 )
)
WHERE UserID = '8585742290196126178'

--返回优化后的语句
SELECT UserID
FROM
(
 SELECT UserID
 FROM
 (
 	SELECT UserID
 	FROM visits_v1
 	WHERE UserID = '8585742290196126178'
 )
 WHERE UserID = '8585742290196126178'
 UNION ALL
 SELECT UserID
 FROM
 (
 	SELECT UserID
 	FROM visits_v1
 	WHERE UserID = '8585742290196126178'
 )
 WHERE UserID = '8585742290196126178'
)
WHERE UserID = '8585742290196126178'

四、聚合计算外推

--聚合函数内的计算,会外推 (ps:hive中不会外推)
EXPLAIN SYNTAX SELECT sum(UserID * 2) FROM visits_v1;

--返回优化后的语句
SELECT sum(UserID) * 2 FROM visits_v1

五、聚合函数消除

--对于无意义的聚合函数会优化消除
EXPLAIN SYNTAX
SELECT
 sum(UserID * 2),
 max(VisitID),
 max(UserID)
FROM visits_v1
GROUP BY UserID

--返回优化后的语句
SELECT
 sum(UserID) * 2,
 max(VisitID),
 UserID
FROM visits_v1
GROUP BY UserID

六、不同语法删除重复的 key

--order by
EXPLAIN SYNTAX
SELECT *
FROM visits_v1
ORDER BY
 UserID ASC,
 UserID ASC,
 VisitID ASC,
VisitID ASC

--返回优化后的语句:
select
	*
FROM visits_v1
ORDER BY
 UserID ASC,
VisitID ASC


--limit by
EXPLAIN SYNTAX
SELECT *
FROM visits_v1
LIMIT 3 BY
 VisitID,
 VisitID
LIMIT 10

--返回优化后的语句:
select
	*
FROM visits_v1
LIMIT 3 BY VisitID
LIMIT 10


--using
EXPLAIN SYNTAX
SELECT
 a.UserID,
 a.UserID,
 b.VisitID,
 a.URL,
 b.UserID
FROM hits_v1 AS a
LEFT JOIN visits_v1 AS b USING (UserID, UserID)

--返回优化后的语句:
SELECT
 UserID,
 UserID,
 VisitID,
 URL,
 b.UserID
FROM hits_v1 AS a
ALL LEFT JOIN visits_v1 AS b USING (UserID)

七、标量替换

--如果子查询只返回一行数据,在被引用的时候用标量替换
EXPLAIN SYNTAX
WITH
(
 SELECT sum(bytes)
 FROM system.parts
 WHERE active
) AS total_disk_usage
SELECT
 (sum(bytes) / total_disk_usage) * 100 AS table_disk_usage,
 table
FROM system.parts
GROUP BY table
ORDER BY table_disk_usage DESC
LIMIT 10;

--返回优化后的语句:
WITH CAST(0, 'UInt64') AS total_disk_usage
SELECT
 (sum(bytes) / total_disk_usage) * 100 AS table_disk_usage,
 table
FROM system.parts
GROUP BY table
ORDER BY table_disk_usage DESC
LIMIT 10

八、三元运算符优化

--开启 optimize_if_chain_to_multiif 参数,会进行三元运算符优化
EXPLAIN SYNTAX
SELECT number = 1 ? 'hello' : (number = 2 ? 'world' : 'atguigu')
FROM numbers(10)
settings optimize_if_chain_to_multiif = 1;

--返回优化后的语句:
SELECT multiIf(number = 1, 'hello', number = 2, 'world', 'atguigu')
FROM numbers(10) SETTINGS optimize_if_chain_to_multiif = 1
  • 10
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值