hive(3)——高级-sql业务案例汇总

实战1 

1、连续7天登录的用户

创建表和加载数据

vim /home/hadoop/data/ulogin.dat
1 2019-07-11 1
1 2019-07-12 1
1 2019-07-13 1
1 2019-07-14 1
1 2019-07-15 1
1 2019-07-16 1
1 2019-07-17 1
1 2019-07-18 1
2 2019-07-11 1
2 2019-07-12 1
2 2019-07-13 0
2 2019-07-14 1
2 2019-07-15 1
2 2019-07-16 0
2 2019-07-17 1
2 2019-07-18 0
3 2019-07-11 1
3 2019-07-12 1
3 2019-07-13 1
3 2019-07-14 0
3 2019-07-15 1
3 2019-07-16 1
3 2019-07-17 1
3 2019-07-18 1

-- 建表语句
create table ulogin(
uid int,
dt date,
status int
)
row format delimited fields terminated by ' ';
-- 加载数据
load data local inpath '/home/hadoop/data/ulogin.dat' into
table ulogin;

sql 

with t as (
 select uid ,dt ,date_sub(dt,row_number() over(partition by uid order by dt)) gid from ulogin
where status =1
)
select  uid,count(1)  from t group by uid,gid 
having count(1)>=7

 

2、排名并求出分差

每班前三名,分数一样并列,同时求出前三名按名次排序的分差

创建表和加载数据

vim /home/hadoop/data/stu.dat

1 1901 90
2 1901 90
3 1901 83
4 1901 60
5 1902 66
6 1902 23
7 1902 99
8 1902 67
9 1902 87

create table stu(
sno int,
class string,
score int
)row format delimited fields terminated by ' ';
-- 加载数据
load data local inpath '/home/hadoop/data/stu.dat' into table
stu;

sql

with t as (
select sno ,class,score,dense_rank() over(partition by class order by score desc) rank  from stu
) 
select t.sno ,t.class,nvl(score-lag(score) over(partition by class order
by score desc),0) diff from t where t.rank<=3

 

3、行转列

创建表和加载数据

vim /home/hadoop/data/data1.dat

1 java
1 hadoop
1 hive
1 hbase
2 java
2 hive
2 spark
2 flink
3 java
3 hadoop
3 hive
3 kafka

create table row2line(
id string,
course string
)row format delimited fields terminated by ' ';
load data local inpath '/home/hadoop/data/data1.dat' into table
row2line
 

sql

select id,
sum(case when course="java" then 1 else 0 end) as java,
sum(case when course="hadoop" then 1 else 0 end) as hadoop,
sum(case when course="hive" then 1 else 0 end) as hive,
sum(case when course="hbase" then 1 else 0 end) as hbase,
sum(case when course="spark" then 1 else 0 end) as spark,
sum(case when course="flink" then 1 else 0 end) as flink,
sum(case when course="kafka" then 1 else 0 end) as kafka
from row2line
group by id;


4、列转行

创建表和加载数据

vim /home/hadoop/data/data2.dat

a b 2
a b 1
a b 3
c d 6
c d 8
c d 8

实现效果

id1 id2 flag
a b 2|1|3
c d 6|8

sql 

select id1, id2, concat_ws("|", collect_set(cast (flag as
string))) flag
from rowline2
group by id1, id2;

逆向

-- 创建表 rowline3
create table rowline3 as
select id1, id2, concat_ws("|", collect_set(cast (flag as
string))) flag
from rowline2
group by id1, id2;

select id1, id2, newflag
from rowline3 lateral view explode(split(flag, "\\|")) t1 as
newflag;

实战2

针对销售数据,完成统计:
1. 按年统计销售额
2. 销售金额在 10W 以上的订单
3. 每年销售额的差值
4. 年度订单金额前 10 位(年度、订单号、订单金额、排名)
5. 季度订单金额前 10 位(年度、季度、订单 id 、订单金额、排名)
6. 求所有交易日中订单金额最高的前 10
7. 每年度销售额最大的交易日
8. 年度最畅销的商品 ( 即每年销售金额最大的商品 )
表字段说明
日期表(dimdate)
dt date 日期
yearmonth int 年月
year smallint 年
month tinyint 月
day tinyint 日
week tinyint 周几
weeks tinyint 第几周
quat tinyint 季度
tendays tinyint 旬
halfmonth tinyint 半月

订单表(sale)
orderid string 订单号
locationid string 交易位置
dt date 交易日期

订单销售明细表(saledetail)
orderid string 订单号
rownum int 行号
itemid string 货品
num int 数量
price double 单价
amount double 金额
创建表

drop database sale cascade; 
create database if not exists sale;

create table sale.dimdate_ori( 
dt date, 
yearmonth int, 
year smallint, 
month tinyint, 
day tinyint, 
week tinyint, 
weeks tinyint, 
quat tinyint, 
tendays tinyint, 
halfmonth tinyint )
row format delimited fields terminated by ","; 

create table sale.sale_ori( 
orderid string, 
locationid string, 
dt date )
row format delimited fields terminated by ","; 

create table sale.saledetail_ori( 
orderid string, 
rownum int, 
goods string, 
num int, 
price double, 
amount double )
row format delimited fields terminated by ","; 

create table sale.dimdate( 
dt date,
yearmonth int, 
year smallint, 
month tinyint, 
day tinyint, 
week tinyint, 
weeks tinyint, 
quat tinyint, 
tendays tinyint, 
halfmonth tinyint ) 
stored as orc; 

create table sale.sale( 
orderid string, 
locationid string, 
dt date ) 
stored as orc; 

create table sale.saledetail( 
orderid string, 
rownum int, 
goods string, 
num int, 
price double, 
amount double )
stored as orc;
导入数据
load data local inpath "/home/hadoop/data/tbDate.txt" overwrite into table dimdate_ori; 

load data local inpath "/home/hadoop/data/tbSale.txt" overwrite into table sale_ori; 

load data local inpath "/home/hadoop/data/tbSaleDetail.txt" overwrite into table saledetail_ori;
1. 按年统计销售额
 
2. 销售金额在 10W 以上的订单
 
3. 每年销售额的差值
 
4. 年度订单金额前 10 位(年度、订单号、订单金额、排名)
 
5. 季度订单金额前 10 位(年度、季度、订单 id 、订单金额、排名)
 
6. 求所有交易日中订单金额最高的前 10
 
7. 每年度销售额最大的交易日
 
8. 年度最畅销的商品 ( 即每年销售金额最大的商品 )
 

实战3

vim /home/hadoop/data/t1.dat

活塞,1990
公牛,1991
公牛,1992
公牛,1993
火箭,1994
火箭,1995
公牛,1996
公牛,1997
公牛,1998
马刺,1999
湖人,2000
湖人,2001
湖人,2002
马刺,2003
活塞,2004
马刺,2005
热火,2006
马刺,2007
凯尔特人,2008
湖人,2009
湖人,2010

create table t11( 
team string, 
year int)
row format delimited fields terminated by ',';

load data local inpath "/home/hadoop/data/t1.dat" into table t11;
 
 
sh66688,9:35,29.48
sh66688,9:40,28.72
sh66688,9:45,27.74
sh66688,9:50,26.75
sh66688,9:55,27.13
sh66688,10:00,26.30
sh66688,10:05,27.09
sh66688,10:10,26.46
sh66688,10:15,26.11
sh66688,10:20,26.88
sh66688,10:25,27.49
sh66688,10:30,26.70
sh66688,10:35,27.57
sh66688,10:40,28.26
sh66688,10:45,28.03
sh66688,10:50,27.36
sh66688,10:55,26.48
sh66688,11:00,27.41
sh66688,11:05,26.70
sh66688,11:10,27.35
sh66688,11:15,27.35
sh66688,11:20,26.63
sh66688,11:25,26.35
sh66688,11:30,26.81
sh66688,13:00,29.45
sh66688,13:05,29.41
sh66688,13:10,29.10
sh66688,13:15,28.24
sh66688,13:20,28.20
sh66688,13:25,28.59
sh66688,13:30,29.49
sh66688,13:35,30.45
sh66688,13:40,30.31
sh66688,13:45,30.17
sh66688,13:50,30.55
sh66688,13:55,30.75
sh66688,14:00,30.03
sh66688,14:05,29.61
sh66688,14:10,29.96
sh66688,14:15,30.79
sh66688,14:20,29.82
sh66688,14:25,30.09
sh66688,14:30,29.61
sh66688,14:35,29.88
sh66688,14:40,30.36
sh66688,14:45,30.88
sh66688,14:50,30.73
sh66688,14:55,30.76
sh88888,9:35,67.23
sh88888,9:40,66.56
sh88888,9:45,66.73
sh88888,9:50,67.43
sh88888,9:55,67.49
sh88888,10:00,68.34
sh88888,10:05,68.13
sh88888,10:10,67.35
sh88888,10:15,68.13
sh88888,10:20,69.05
sh88888,10:25,69.82
sh88888,10:30,70.62
sh88888,10:35,70.59
sh88888,10:40,70.40
sh88888,10:45,70.29
sh88888,10:50,70.53
sh88888,10:55,70.92
sh88888,11:00,71.13
sh88888,11:05,70.24
sh88888,11:10,70.37
sh88888,11:15,69.79
sh88888,11:20,69.73
sh88888,11:25,70.52
sh88888,11:30,71.23
sh88888,13:00,72.85
sh88888,13:05,73.76 sh88888,13:10,74.72 sh88888,13:15,75.48 sh88888,13:20,75.80 sh88888,13:25,76.74 sh88888,13:30,77.22 sh88888,13:35,77.12 sh88888,13:40,76.90 sh88888,13:45,77.80 sh88888,13:50,78.75 sh88888,13:55,78.30 sh88888,14:00,78.68 sh88888,14:05,78.99 sh88888,14:10,78.35 sh88888,14:15,78.37 sh88888,14:20,78.07 sh88888,14:25,78.80 sh88888,14:30,79.78 sh88888,14:35,79.72 sh88888,14:40,80.71 sh88888,14:45,79.92 sh88888,14:50,80.49 sh88888,14:55,80.44
create table t12( 
id string, 
time string, 
price double )
row format delimited fields terminated by ','; 
load data local inpath "/root/data/t2.dat" into table t12;


select id, time,
first_value(price) over (partition by id order by price
rows between unbounded preceding and unbounded following) as
firstpv,
last_value(price) over (partition by id order by price
rows between unbounded preceding and unbounded following) as
lastpv
from t12

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值