question
tb1 : user_id, app_name, age,sex,online_time. 数据粒度是user_id + app_name。
app_name只有4个值:分别是淘宝、美团、拼多多、京东
使用一个sql计算出如下数据:
a.现在要按照这样一个规则计算用户年龄、性别:
根据app_name的值来确定——淘宝>美团>拼多多>京东,即如果用户在淘宝中的年龄性别不为NULL则取淘宝中的,如果为空则取美团中的,依此类推
数据:
a,淘宝,null,null,500
a,美团,28,m,400
a,拼多多,28,f,600
需求:
a,28,m
建表, 导入数据 :
create table if not exists tb1(
use_id string ,
app_names string ,
age int ,
gender string ,
sex string ,
online_time int
)
row format delimited fields terminated by "," ;
load data local inpath "/hive/data/1.txt" into table tb1 ;
1)按照需求对数据分类 判断
select
use_id ,
age ,
gender ,
case
when app_name = "淘宝" then 1
when app_name = "美团" then 2
when app_name = "拼多多" then 3
when app_name = "京东" then 4
as app_no
from
tb1
where age is not null and gender is not null;
2)对数据进行分区排序
select
use_id ,
age ,
gender ,
row_number() over(partition by use_id order by app_no) as aa
from
(select
use_id ,
age ,
gender ,
case
when app_names = "淘宝" then 1
when app_names = "美团" then 2
when app_names = "拼多多" then 3
when app_names = "京东" then 4
end as app_no
from
tb1
where age is not null and gender is not null ) t;
3)筛选出最终需求结果
select
use_id ,
age ,
gender
from
(select
use_id ,
age ,
gender ,
row_number() over(partition by use_id order by app_no) as aa
from
(select
use_id ,
age ,
gender ,
case
when app_names = "淘宝" then 1
when app_names = "美团" then 2
when app_names = "拼多多" then 3
when app_names = "京东" then 4
end as app_no
from
tb1
where age is not null and gender is not null ) t ) t2
where aa = 1 ;