实战1
1、连续7天登录的用户
创建表和加载数据
vim /home/hadoop/data/ulogin.dat
1 2019-07-11 1
1 2019-07-12 1
1 2019-07-13 1
1 2019-07-14 1
1 2019-07-15 1
1 2019-07-16 1
1 2019-07-17 1
1 2019-07-18 1
2 2019-07-11 1
2 2019-07-12 1
2 2019-07-13 0
2 2019-07-14 1
2 2019-07-15 1
2 2019-07-16 0
2 2019-07-17 1
2 2019-07-18 0
3 2019-07-11 1
3 2019-07-12 1
3 2019-07-13 1
3 2019-07-14 0
3 2019-07-15 1
3 2019-07-16 1
3 2019-07-17 1
3 2019-07-18 1-- 建表语句
create table ulogin(
uid int,
dt date,
status int
)
row format delimited fields terminated by ' ';
-- 加载数据
load data local inpath '/home/hadoop/data/ulogin.dat' into
table ulogin;
sql
with t as (
select uid ,dt ,date_sub(dt,row_number() over(partition by uid order by dt)) gid from ulogin
where status =1
)
select uid,count(1) from t group by uid,gid
having count(1)>=7
2、排名并求出分差
每班前三名,分数一样并列,同时求出前三名按名次排序的分差
创建表和加载数据
vim /home/hadoop/data/stu.dat
1 1901 90
2 1901 90
3 1901 83
4 1901 60
5 1902 66
6 1902 23
7 1902 99
8 1902 67
9 1902 87create table stu(
sno int,
class string,
score int
)row format delimited fields terminated by ' ';
-- 加载数据
load data local inpath '/home/hadoop/data/stu.dat' into table
stu;
sql
with t as (
select sno ,class,score,dense_rank() over(partition by class order by score desc) rank from stu
)
select t.sno ,t.class,nvl(score-lag(score) over(partition by class order
by score desc),0) diff from t where t.rank<=3
3、行转列
创建表和加载数据
vim /home/hadoop/data/data1.dat
1 java
1 hadoop
1 hive
1 hbase
2 java
2 hive
2 spark
2 flink
3 java
3 hadoop
3 hive
3 kafkacreate table row2line(
id string,
course string
)row format delimited fields terminated by ' ';
load data local inpath '/home/hadoop/data/data1.dat' into table
row2line
sql
select id,
sum(case when course="java" then 1 else 0 end) as java,
sum(case when course="hadoop" then 1 else 0 end) as hadoop,
sum(case when course="hive" then 1 else 0 end) as hive,
sum(case when course="hbase" then 1 else 0 end) as hbase,
sum(case when course="spark" then 1 else 0 end) as spark,
sum(case when course="flink" then 1 else 0 end) as flink,
sum(case when course="kafka" then 1 else 0 end) as kafka
from row2line
group by id;
4、列转行
创建表和加载数据
vim /home/hadoop/data/data2.dat
a b 2
a b 1
a b 3
c d 6
c d 8
c d 8
实现效果
id1 id2 flag
a b 2|1|3
c d 6|8
sql
select id1, id2, concat_ws("|", collect_set(cast (flag as
string))) flag
from rowline2
group by id1, id2;
逆向
-- 创建表 rowline3
create table rowline3 as
select id1, id2, concat_ws("|", collect_set(cast (flag as
string))) flag
from rowline2
group by id1, id2;
select id1, id2, newflag
from rowline3 lateral view explode(split(flag, "\\|")) t1 as
newflag;
实战2
针对销售数据,完成统计:1. 按年统计销售额2. 销售金额在 10W 以上的订单3. 每年销售额的差值4. 年度订单金额前 10 位(年度、订单号、订单金额、排名)5. 季度订单金额前 10 位(年度、季度、订单 id 、订单金额、排名)6. 求所有交易日中订单金额最高的前 10 位7. 每年度销售额最大的交易日8. 年度最畅销的商品 ( 即每年销售金额最大的商品 )
日期表(dimdate)
dt date 日期
yearmonth int 年月
year smallint 年
month tinyint 月
day tinyint 日
week tinyint 周几
weeks tinyint 第几周
quat tinyint 季度
tendays tinyint 旬
halfmonth tinyint 半月
订单表(sale)
orderid string 订单号
locationid string 交易位置
dt date 交易日期
订单销售明细表(saledetail)
orderid string 订单号
rownum int 行号
itemid string 货品
num int 数量
price double 单价
amount double 金额
drop database sale cascade;
create database if not exists sale;
create table sale.dimdate_ori(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint )
row format delimited fields terminated by ",";
create table sale.sale_ori(
orderid string,
locationid string,
dt date )
row format delimited fields terminated by ",";
create table sale.saledetail_ori(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double )
row format delimited fields terminated by ",";
create table sale.dimdate(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint )
stored as orc;
create table sale.sale(
orderid string,
locationid string,
dt date )
stored as orc;
create table sale.saledetail(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double )
stored as orc;
load data local inpath "/home/hadoop/data/tbDate.txt" overwrite into table dimdate_ori;
load data local inpath "/home/hadoop/data/tbSale.txt" overwrite into table sale_ori;
load data local inpath "/home/hadoop/data/tbSaleDetail.txt" overwrite into table saledetail_ori;
实战3
vim /home/hadoop/data/t1.dat
活塞,1990
公牛,1991
公牛,1992
公牛,1993
火箭,1994
火箭,1995
公牛,1996
公牛,1997
公牛,1998
马刺,1999
湖人,2000
湖人,2001
湖人,2002
马刺,2003
活塞,2004
马刺,2005
热火,2006
马刺,2007
凯尔特人,2008
湖人,2009
湖人,2010
create table t11(
team string,
year int)
row format delimited fields terminated by ',';
load data local inpath "/home/hadoop/data/t1.dat" into table t11;
sh66688,9:35,29.48
sh66688,9:40,28.72
sh66688,9:45,27.74
sh66688,9:50,26.75
sh66688,9:55,27.13
sh66688,10:00,26.30
sh66688,10:05,27.09
sh66688,10:10,26.46
sh66688,10:15,26.11
sh66688,10:20,26.88
sh66688,10:25,27.49
sh66688,10:30,26.70
sh66688,10:35,27.57
sh66688,10:40,28.26
sh66688,10:45,28.03
sh66688,10:50,27.36
sh66688,10:55,26.48
sh66688,11:00,27.41
sh66688,11:05,26.70
sh66688,11:10,27.35
sh66688,11:15,27.35
sh66688,11:20,26.63
sh66688,11:25,26.35
sh66688,11:30,26.81
sh66688,13:00,29.45
sh66688,13:05,29.41
sh66688,13:10,29.10
sh66688,13:15,28.24
sh66688,13:20,28.20
sh66688,13:25,28.59
sh66688,13:30,29.49
sh66688,13:35,30.45
sh66688,13:40,30.31
sh66688,13:45,30.17
sh66688,13:50,30.55
sh66688,13:55,30.75
sh66688,14:00,30.03
sh66688,14:05,29.61
sh66688,14:10,29.96
sh66688,14:15,30.79
sh66688,14:20,29.82
sh66688,14:25,30.09
sh66688,14:30,29.61
sh66688,14:35,29.88
sh66688,14:40,30.36
sh66688,14:45,30.88
sh66688,14:50,30.73
sh66688,14:55,30.76
sh88888,9:35,67.23
sh88888,9:40,66.56
sh88888,9:45,66.73
sh88888,9:50,67.43
sh88888,9:55,67.49
sh88888,10:00,68.34
sh88888,10:05,68.13
sh88888,10:10,67.35
sh88888,10:15,68.13
sh88888,10:20,69.05
sh88888,10:25,69.82
sh88888,10:30,70.62
sh88888,10:35,70.59
sh88888,10:40,70.40
sh88888,10:45,70.29
sh88888,10:50,70.53
sh88888,10:55,70.92
sh88888,11:00,71.13
sh88888,11:05,70.24
sh88888,11:10,70.37
sh88888,11:15,69.79
sh88888,11:20,69.73
sh88888,11:25,70.52
sh88888,11:30,71.23
sh88888,13:00,72.85
sh88888,13:05,73.76 sh88888,13:10,74.72 sh88888,13:15,75.48 sh88888,13:20,75.80 sh88888,13:25,76.74 sh88888,13:30,77.22 sh88888,13:35,77.12 sh88888,13:40,76.90 sh88888,13:45,77.80 sh88888,13:50,78.75 sh88888,13:55,78.30 sh88888,14:00,78.68 sh88888,14:05,78.99 sh88888,14:10,78.35 sh88888,14:15,78.37 sh88888,14:20,78.07 sh88888,14:25,78.80 sh88888,14:30,79.78 sh88888,14:35,79.72 sh88888,14:40,80.71 sh88888,14:45,79.92 sh88888,14:50,80.49 sh88888,14:55,80.44
create table t12(
id string,
time string,
price double )
row format delimited fields terminated by ',';
load data local inpath "/root/data/t2.dat" into table t12;
select id, time,
first_value(price) over (partition by id order by price
rows between unbounded preceding and unbounded following) as
firstpv,
last_value(price) over (partition by id order by price
rows between unbounded preceding and unbounded following) as
lastpv
from t12