5、场景题,如下所示为某APP软件用户的上线信息
姓名 24小时上线情况(从0点到23点)
张三 000000001001100000011000
李四 000000001001000000010011
王五 000000000001000000011110
…
使用hive获得结果如下
姓名 24小时上线情况
张三 8,11,12,19,20
李四 8,11,19,22,23
王五 11,19,20,21,22
使用posexplode
--建表
create table if not exists t9185
(
name string,
online string
)
row format delimited
fields terminated by ',';
load data local inpath '/root/hivedata/t9185.txt' into table t9185;
select *
from t9185;
--pos炸裂函数的使用
SELECT
name,concat_ws(',',collect_set(p)) hh
FROM (
SELECT
name,cast(p as string) p
FROM t9185 LATERAL VIEW POSEXPLODE(split(online,"")) t as p,h
WHERE h=1
) a
GROUP BY name
;
第二种解法
--需要注意使用一次窗口函数后顺序会颠倒所以里面用了两次row_number窗口函数
SELECT name,
concat_ws(',', collect_set(cast(rn1 as string))) hh
FROM (
SELECT *,
ROW_NUMBER() over (distribute by name sort by rn desc) - 1 rn1
FROM (
SELECT *,
row_number() over () rn
FROM (
SELECT *
FROM t9185 LATERAL VIEW EXPLODE(split(online, "")) t as h
) a
) b
) c
WHERE h = 1
GROUP BY name
;