sql语句
create table temp.tmp_c1a62d85598c47e0a23a4620348385f4xray202011181 STORED AS TEXTFILE as
select userid
from
(select userid
,count(distinct poolid) as pool_cnt
from dwd_pay_market
where poolid in (1839,1840,1841,1842,1843,1844)
and refund=1
group by userid
) e
where pool_cnt>=6
报错信息:
Error: Error while compiling statement: FAILED: SemanticException Line 0:-1 Invalid column reference 'poolid' (state=42000,code=40000)
解决过程:
- 通过报错信息发现这个语句是在编译期间报错的,但是单独拉出来select语句执行并没有报错。怀疑是hql进行了编译期间的优化导致。
- 在网上google了一把并没有发现很多类似的情况。
- 通过多次尝试后发现在内层语句中使用count distinct ,distinct的时候才会发现这个问题。(外层查询的where条件中使用了内层查询的字段,用到的字段必须在外层select出来)
解决方案:
-
set hive.cbo.enable=false 避免在编译器进行sql优化
-
尽量用group by再count来代替count(distinct)
-
如果一定要用count(distinct) 可以使用having语句,或者在外层再嵌套一层把用到的字段在外层select出来。
-- 正确解法1
drop table temp.tmp_dmp_c1a62d85598c47e0a23a4620348385f4xray20201118;
create table temp.tmp_dmp_c1a62d85598c47e0a23a4620348385f4xray20201118 STORED AS TEXTFILE as
select userid, pool_cnt
from
(select userid
,count(distinct poolid) as pool_cnt
from dwd_pay_market
where poolid in (1839,1840,1841,1842,1843,1844)
and refund=1
group by userid
) e
where pool_cnt>=6
-- 正确解法2
drop table temp.tmp_dmp_c1a62d85598c47e0a23a4620348385f4xray20201118;
create table temp.tmp_dmp_c1a62d85598c47e0a23a4620348385f4xray20201118 STORED AS TEXTFILE as
select userid, pool_cnt
from
(select userid
,count(distinct poolid) as pool_cnt
from dwd_pay_market
where poolid in (1839,1840,1841,1842,1843,1844)
and refund=1
group by userid
having count(distinct poolid) >=6
) e