count(distinct ),在数据量大的情况下,容易数据倾斜,因为count(distinct)是按group by 字段分组,按distinct字段排序。
1.单个distinct
Select device_name,count(distinct imei) from TableA group by device_name;
使用group by替换:
Select device_name,count(imei)
from
(
select
device_name,
imei
from TableA
Group by device_name,imei
)a
group by device_name;
2.distinct 涉及两个字段:
select
device_name,
count(distinct imei)
count(distinct app_id)
from table1
group by device_name
第一种方法:
SELECT
device_name,
SUM(imei_cnt) AS imei_cnt,
SUM(app_id_cnt) AS app_id_cnt
FROM
(
select
device_name,
count(imei) as imei_cnt,
0, as app_id_cnt
from
(
select
device_name,
imei
from table1
group by device_name,imei
)a
group by device_name
UNION ALL
select
device_name,
0 as imei_cnt,
count(app_id) as app_id_cnt
from
(
select
device_name,
app_id
from table1
group by device_name,app_id
)a
group by device_name
)T
GROUP BY device_name;
第二种方法:
SELECT
a.device_name,
imei_cnt,
app_id_cnt
FROM
(
select
device_name,
count(imei) as imei_cnt
from
(
select
device_name,
imei
from table1
group by device_name,imei
)a
group by device_name
)a
JOIN
(
select
device_name,
count(app_id) as app_id_cnt
from
(
select
device_name,
app_id
from table1
group by device_name,app_id
)a
group by device_name
)b
ON a.device_name=b.device_name
亲测正确!!!