CASE...WHEN几种常见用法:
1. 编码转换
SELECT
IF(biz_type=1, '买卖', IF(biz_type=2, '租赁', '未知')) AS if_name
,CASE WHEN biz_type = 1 THEN '买卖'
WHEN biz_type = 2 THEN '租赁'
ELSE '未知'
END AS case_name
,COUNT(1) AS cnt
,SUM(1)
,MAX(start_time)
,MIN(start_time)
FROM tmp.my_example
GROUP BY
CASE WHEN biz_type = 1 THEN '买卖'
WHEN biz_type = 2 THEN '租赁'
ELSE '未知'
END
,IF(biz_type=1, '买卖', IF(biz_type=2, '租赁', '未知'))
这时查询结果如下图所示:
在进行分组时,对于自己指定的列,除了像上述代码进行分组,还可以根据分组条件所在的位置进行分组,这时需要在查询前设置hive相关参数,具体代码如下:
SET hive.groupby.orderby.position.alias = true;
SELECT
IF(biz_type=1, '买卖', IF(biz_type=2, '租赁', '未知')) AS if_name
,CASE WHEN biz_type = 1 THEN '买卖'
WHEN biz_type = 2 THEN '租赁'
ELSE '未知'
END AS case_name
,COUNT(1)
,SUM(1)
,MAX(start_time)
,MIN(start_time)
FROM tmp.my_example
GROUP BY 1, 2
这时结果如下图所示,
2.连续数据离散化
SELECT
CASE WHEN build_area > 0 and build_area < 90 THEN '小户型'
WHEN build_area >= 90 and build_area < 144 THEN '中户型'
WHEN build_area >= 144 THEN '大户型'
ELSE '未知'
END AS build_type_name
FROM tmp.my_table
WHERE ...
3. 分支计算
SELECT
pt
,SUM(CASE WHEN request_url regexp '/ershoufang/' THEN 1 END) ershoufang_pv
,SUM(CASE WHEN request_url regexp '^http://bj.fang.lianjia.com/' THEN 1 END) xinfang_pv
,SUM(CASE WHEN request_url regexp '/zufang/' THEN 1 END) zufang_pv
FROM tmp.mytable
WHERE pt = '20170425000000'
GROUP BY pt