hiveQL复习

1. 统计类

pv: count(f1)
uv: count(distinct f1) | count(1) from ( select f1 group by f1)
生于理想

2. 多表join

3. 窗口函数

over(),开窗,并可自由控制窗口大小,其可以操作分组前的数据

order表

nameorderdatecost
jack2017-01-0546
jack2017-01-0855
jack2017-01-0110
jack2017-04-0642
jack2017-02-0323
mart2017-04-1394
mart2017-04-1175
mart2017-04-0968
mart2017-04-0862
neil2017-05-1012
neil2017-06-1280
tony2017-01-0429
tony2017-01-0215
tony2017-01-0750
select
sum(cost)   -- 窗口范围是整个表
from
tb_order ;

select
* ,
sum(cost)  over()  -- 开窗口范围是整个表
-- sum(cost)  over(rows between unbounded preceding and unbounded following)  -- 开窗口范围是整个表
from
tb_order ;

select
* ,
sum(cost)  over(partition by name)  -- 开窗口范围是分区
from
tb_order ;

select
* ,
sum(cost)  over(partition by name rows)  -- 开窗口范围是分区
from
tb_order ;

select
* ,
sum(cost)  over(partition by name rows between 1 preceding and current row )  -- 开窗口范围是分区内,但前行前一行,和当前行
from
tb_order ;

select
* ,
sum(cost)  over(partition by name rows between 1 preceding and 1 following )  -- 开窗口范围是分区内,但前行前一行,当前行和当前行后一行
from
tb_order ;

select
* ,
sum(cost)  over(partition by name order by cost)  -- 开窗口范围是分区,sum结果在分区内逐条累加
from
tb_order ;

3.1 求订单的总金额

3.2 求订单明细和总金额

3.3 求 最近三笔订单的总金额

3.4 求每个人的消费总额

3.5 求订单的总金额

3.6 求订单的总金额

4. 编号函数

  • RANK() 排序相同时会重复,总数不会变

如:
数据:1,3,5,5,6,7
编号:1,2,3,3,5,6

  • DENSE_RANK() 排序相同时会重复,总数会减少

如:
数据:1,3,5,5,6,7
编号:1,2,3,3,4,5

  • ROW_NUMBER() 会根据顺序计算

如:
数据:1,3,5,5,6,7
编号:1,2,3,4,5,6

5. 行列转换

表studen

stude.namestude.subjectstude.score
小叶语文97.0
小叶数学88.0
小叶英语99.0
小军数学98.0
小军英语68.0
小军语文95.0

表student

student.namestudent.mathstudent.englishstudent.chinse
小军98.068.095.0
小叶88.099.097.0

列转行 stude --> student

套路:收集列数据(巧妙设计好数据结构),拆分收集的数据,(k,v)关系的数据可用stt_to_map巧妙利用map字段

with temp as (
   select
      name,
      str_to_map(concat_ws(",", collect_list(
          concat_ws(":",subject, cast(score as string))
          ))) as info
   from stude
   group by name
)
select
     name ,
     info["数学"] math ,
     info["英语"] english,
     info["语文"] chinse
from temp;

行转列 student —> stude

套路,收集列数据(巧妙设计好数据结构) —> 侧窗炸裂(later view explode() tempTable as field)将列数据与行固定字段多次匹配

create table student as 
with temp as (
    select
       name,
       str_to_map(concat_ws(",", collect_list(
           concat_ws(":",subject, cast(score as string))
           ))) as info
    from stude
    group by name
)
with temp as (
    select
        name,
        split(concat_ws(",",concat_ws("_","math",math),concat_ws("_","english",english),concat_ws("_","chines",chines)), ",") as scores
    from
        student
)
select
       name,
       case split(subjects, "_")[0]
            when "math" then "数学"
            when "english" then "英语"
            when "chines" then "语文"
            end as subject,
       split(subjects, "_")[1]
from temp lateral view explode(scores)  tt as subjects
;

6. 下压和上压(lag,lead)

LAG(col,n):往后第n行数据 lag 参数一 字段 n
LEAD(col,n):往前第n行数据 lead

7.面试题

7.1 编写两段SQL来计算女性员工占比

position

company_idpeople_idstart_dateend_date
2a0a0700a3cf1c40a45c92839694390fcdab114a8710180dbafc1f61e4980a622019-04-30 0:00:00null
2a0a0700a3cf1c40a45c92839694390f9bdc35b14a5cbebfb4d6a3167ab5d6a22019-04-30 0:00:00null
2a0a0700a3cf1c40a45c92839694390f9da257186d8f76a21d531d0a565a4ecf2019-04-30 0:00:00null
2a0a0700a3cf1c40a45c92839694390f33a7d11fc538b5314154b99b2d09361e2019-04-30 0:00:00null
2a0a0700a3cf1c40a45c92839694390fd220601176d02766f20094ec47fb7e982019-04-30 0:00:00null
2a0a0700a3cf1c40a45c92839694390fdbfd5a2b5c7d7a184f0a7cd43a70b2db2019-04-30 0:00:00null
8a507c27a1d64dd429c919e424031a5151fb80e9410e8b10baad7f98013fd5c62017-09-25 0:00:002019-09-24 0:00:00
8a507c27a1d64dd429c919e424031a519de35bedd084cbb7cb60df1902515b082017-09-25 0:00:002019-09-24 0:00:00
8a507c27a1d64dd429c919e424031a51fd86bc39627f18ec98842c2afb9f317a2017-09-25 0:00:002019-09-24 0:00:00
8a507c27a1d64dd429c919e424031a51bd72d1d6bd9d47505bfa22ef61d96ded2017-09-25 0:00:002020-09-24 0:00:00
8a507c27a1d64dd429c919e424031a51b4cbd8418939c862e97c769e7a8fd60e2017-09-25 0:00:002020-09-24 0:00:00
8a507c27a1d64dd429c919e424031a5172cfb720b6b6fe1d0101b131a2ecf1982017-09-25 0:00:002020-09-24 0:00:00
8a507c27a1d64dd429c919e424031a51fe9e348160edbdcc4363a2f9f4de87f32017-09-25 0:00:002020-09-24 0:00:00

people表:

people_idpeople_namegender
8710180dbafc1f61eefcsadfe4980a62孙鹏MALE
9bdc35b14a5cbebfb4d6a3167ab5d6a2董艳FEMALE
9da257186d8f76a21d531d0a565a4ecf刘鸿玲FEMALE
33a7d11fc538b5314154b99b2d09361e黄智锋MALE
d220601176d02766f20094ec47fb7e98陈方汉FEMALE
dbfd5a2b5c7d7a184f0a7cd43a70b2db王辅MALE
51fb80e9410e8b10baad7f98013fd5c6孙颖FEMALE
9de35bedd084cbb7cb60df1902515b08胥雪瑾FEMALE
fd86bc39627f18ec98842c2afb9f317a沈岚FEMALE
bd72d1d6bd9d47505bfa22ef61d96ded龚佩FEMALE
b4cbd8418939c862e97c769e7a8fd60e郝妍燕FEMALE
72cfb720b6b6fe1d0101b131a2ecf198张来娣FEMALE
fe9e348160edbdcc4363a2f9f4de87f3曹建成MALE

7.1.1计算每家公司所有时间内的女性员工占比

在这里插入图片描述

with temp as (
    select
       company_id,
       gender,
       count(1) as count
    from people  join position  on people.people_id = position.people_id

    group by company_id,gender
),
 temp1 as (
    select
       company_id,
       count(1) total
    from position
    group by company_id
)
select
       gender,
       temp.company_id,
       temp.count / total
from temp left join temp1
on temp.company_id = temp1.company_id
;
-- 男和女的都算了,算女的话可以先计算出每个公司的女员工,在left join上公司总数即可得到
select
tt.company_id,
totalPeople,
ft.FamleTotalPeople,
ft.FamleTotalPeople / tt.totalPeople
from
(
select
       ps.company_id,
       gender,
       count(1) FamleTotalPeople

from
Position ps left join People pp on ps.people_id = pp.people_id
GROUP BY ps.company_id, gender having gender = " FEMALE"
) as ft

left join
(
select
ps.company_id,
count(1) totalPeople
from
position ps
GROUP BY ps.company_id
) as tt
on tt.company_id = ft.company_id

7.1.2计算每家公司每年的女性员工占比(当年是否在职条件: 任职区间是否包含该年最后一天)

可能用到一张记录了所有年份的表
|Year |Series Table

years
2012
2013
2014
2015
2016
2017
在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值