表数据:https://pan.baidu.com/s/1giOajxNUaU_Lc4jzTL2UnA
提取码:cba4
hive练习题
- 建表:
- 1、找出顾客最常用的信用卡
- 2、找出客户资料中排名前五的的职位名称
- 3、在美国女性最常用的信用卡
- 4、按性别和国家进行客户统计/国家/工作/邮箱/语言/信用卡
- 5、计算每月总收入
- 6、计算每个季度的总收入
- 7、按年计算总收入
- 8、按工作日计算总收入
- 9、按时间段计算总收入
- 10、按时间段计算平均消费
- 11、按工作日计算平均消费
- 12、计算年、月、日的交易总数
- 13、找出交易量最大的10个客户
- 14、找出消费最多的前10位顾客
- 15、统计该期间交易数量最少的用户
- 16、计算每个季度的独立客户总数
- 17、计算每周的独立客户总数
- 18、计算整个活动客户平均花费的最大值
- 19、统计每月花费最多的客户
- 20、统计每月访问次数最多的客户
- 21、按总价找出最受欢迎的5种产品
- 22、根据购买频率找出最畅销的5种产品
- 23、根据客户数量找出最受欢迎的5种产品
- 24、按客流量找出最受欢迎的商店
- 25、根据顾客消费价格找出最受欢迎的商店
- 26、根据顾客交易情况找出最受欢迎的商店
- 27、根据商店和唯一的顾客id获取最受欢迎的产品
- 28、获取每个商店的员工与顾客比
- 29、按年和月计算每家店的收入
- 30、按店铺制作总收益饼图
- 31、找出每个商店最繁忙的时间段
- 32、找出每家店的忠实顾客
- 33、根据每位员工的最高收入找出明星商店
- 34、在ext_store_review中找出存在冲突的交易映射关系
- 35、了解客户评价的覆盖率
- 36、根据评分了解客户的分布情况
- 37、根据交易了解客户的分布情况
- 38、客户给出的最佳评价是否总是同一家门店
建表:
store_details表
create external table store_details(
store_id INT,
store_name STRING,
employee_number INT
)
row format delimited
fields terminated by ','
tblproperties("skip.header.line.count"="1");
load data local inpath '/root/kb08/hive/store/store_details.csv' into table store_details;
store_review表
create external table store_review(
transaction_id BIGINT,
store_id INT,
review_score INT
)
row format delimited
fields terminated by ','
tblproperties("skip.header.line.count"="1");
load data local inpath '/root/kb08/hive/store/store_review.csv' into table store_review;
transaction_details表
create external table transaction_details(
transaction_id BIGINT,
customer_id BIGINT,
store_id INT,
price DOUBLE,
product STRING,
date STRING,
time STRING
)
row format delimited
fields terminated by ','
tblproperties("skip.header.line.count"="1");
load data local inpath '/root/kb08/hive/store/transaction_details.csv' into table transaction_details;
customer_datails表
create table if not exists customer_datails(
customer_id INT,
first_name STRING,
last_name STRING,
email STRING,
gender STRING,
address STRING,
country string,
language STRING,
job STRING,
credit_type STRING,
credit_type STRING,
credit_no STRING
)
row format delimited fields terminated by ','
lines terminated by '\n'
tblproperties("skip.header.line.count"="1");
load data local inpath '/root/kb08/hive/store/customer_datails.csv' into table customer_datails;
练习题:
1、找出顾客最常用的信用卡
select credit_type from (
select credit_type,dense_rank() over(order by num DESC) rnk from (
select credit_type,count(1) num from customer_details group by credit_type order by num DESC
)T
)T
where rnk=1
2、找出客户资料中排名前五的的职位名称
select job from(
select job,dense_rank() over(order by num DESC) rnk from(
select job,count(1) num from customer_datails group by job order by num DESC
)T
)T
where rnk<=5
3、在美国女性最常用的信用卡
select credit_type from(
select credit_type,dense_rank() over(order by num DESC) rnk from (
select credit_type,count(1) num from customer_datails
where country='United States' and gender='Female'
group by credit_type order by num DESC
)T
)T
where rnk=1
4、按性别和国家进行客户统计/国家/工作/邮箱/语言/信用卡
select country,gender,concat_ws(',',collect_set(dn)) hdns from(
select country,gender,dn from(
select country,gender,dn,enum,rank() over(partition by country,gender) rnk from(
select country,gender,dn,count(1) enum from(
select country,gender,regexp_extract(email,'(\\w+)@(.+)',2) dn
from customer_datails
)T
group by country,gender,dn
)T
)T
where rnk=1
)T
group by country,gender
5、计算每月总收入
select month(to_date(regexp_replace(date,'/','-'))) month,sum(price) from transaction_details group by month(to_date(regexp_replace(date,'/','-'))) order by month
6、计算每个季度的总收入
select quarter,sum(price) from
(select ceil(month(to_date(regexp_replace(date,'/','-')))/3) quarter,price from transaction_details)r1
group by quarter
7、按年计算总收入
select year(to_date(regexp_replace(date,'/','-'))) year,sum(price) from transaction_details group by year(to_date(regexp_replace(date,'/','-'))) order by year
8、按工作日计算总收入
周日:1
周一:2
周二:3
周三:4
周四:5
周五:6
周六:7
select dayofweek,sum
from
(select dayofweek(to_date(regexp_replace(date,'/','-'))) dayofweek,sum(price) sum
from transaction_details
group by dayofweek(to_date(regexp_replace(date,'/','-'))))r1
where dayofweek>=2 and dayofweek<=6
9、按时间段计算总收入
select cast(substr(time,0,instr(time,':')-1) as INT) hour,sum(price) sum_prive from transaction_details group by substr(time,0,instr(time,':')-1) order by hour
10、按时间段计算平均消费
select cast(substr(time,0,instr(time,':')-1) as INT) hour,avg(price) avg_price from transaction_details group by substr(time,0,instr(time,':')-1) order by hour
11、按工作日计算平均消费
select dayofweek,sum
from
(select dayofweek(to_date(regexp_replace(date,'/','-'))) dayofweek,avg(price) avg_price
from transaction_details
group by dayofweek(to_date(regexp_replace(date,'/','-'))))r1
where dayofweek>=2 and dayofweek<=6
12、计算年、月、日的交易总数
with
r1 as (select year(to_date(regexp_replace(date,'/','-'))) time,count(transaction_id) from transaction_details group by year(to_date(regexp_replace(date,'/','-'))) order by time DESC),
r2 as (select month(to_date(regexp_replace(date,'/','-'))) time,count(transaction_id) from transaction_details group by month(to_date(regexp_replace(date,'/','-'))) order by time DESC),
r3 as (select day(to_date(regexp_replace(date,'/','-'))) time,count(transaction_id) from transaction_details group by day(to_date(regexp_replace(date,'/','-'))) order by time DESC)
select * from r1 union all select * from r2 union all select * from r3
13、找出交易量最大的10个客户
select customer_id,num,rnk from
(select customer_id,dense_rank() over(order by num DESC) rnk from
(select customer_id,count(1) num from transaction_details group by customer_id)r1)r2
where rnk<=10
14、找出消费最多的前10位顾客
select customer_id,,sum_pricernk from
(select customer_id,dense_rank() over(order by sum_price DESC) rnk from
(select customer_id,sum(price) sum_price from transaction_details group by customer_id)r1)r2
where rnk<=10
15、统计该期间交易数量最少的用户
select customer_id,rnk from
(select customer_id,dense_rank() over(order by num) rnk from
(select customer_id,count(1) num from transaction_details group by customer_id)r1)r2
where rnk=1
16、计算每个季度的独立客户总数
select quarter,count(customer_id) num from(
select case when month<=3 then 1 when month<=6 then 2 when month<=9 then 3 else 4 end quarter,customer_id from
(
select month(to_date(regexp_replace(date,'/','-'))) month,customer_id from transaction_details
)r1 group by month,customer_id)r2 group by quarter
17、计算每周的独立客户总数
select weekofyear,count(customer_id) num from
(select weekofyear,customer_id from
(select weekofyear(to_date(regexp_replace(date,'/','-'))) weekofyear,customer_id from transaction_details
)r1 group by weekofyear,customer_id)r2 group by weekofyear
18、计算整个活动客户平均花费的最大值
select customer_id,avg_price,rnk from
(select customer_id,avg_price,dense_rank() over(order by avg_price DESC) rnk from
(select customer_id,avg(price) avg_price from transaction_details group by customer_id)r1)r2 where rnk=1
19、统计每月花费最多的客户
select month,customer_id,sum_price,rnk
from
(
select month,customer_id,sum_price,dense_rank() over(partition by month order by sum_price DESC) rnk
from
(
select month,customer_id,sum(price) sum_price
from
(select month(to_date(regexp_replace(date,'/','-'))) month,customer_id,price from transaction_details
)r1 group by month,customer_id
)r2)r3
where rnk=1
20、统计每月访问次数最多的客户
select month,customer_id,rnk from
(
select month,customer_id,dense_rank() over(partition by month order by num DESC) rnk from
(
select month,customer_id,count(transaction_id) num from
(
select
month(to_date(regexp_replace(date,'/','-'))) month,
customer_id,transaction_id from
transaction_details
)r1
group by month,customer_id
)r2)r3
where rnk=1
21、按总价找出最受欢迎的5种产品
select product,sum_price,rnk from
(
select product,sum_price,dense_rank() over(order by sum_price DESC) rnk from
(
select product,sum(price) sum_price from transaction_details group by product
)r1)r2
where rnk<=5
22、根据购买频率找出最畅销的5种产品
select product,num,rnk from
(
select product,num,dense_rank() over(order by num DESC) rnk from
(
select product,count(transaction_id) num from transaction_details group by product
)r1)r2
where rnk<=5
23、根据客户数量找出最受欢迎的5种产品
select product,num,rnk from
(
select product,num,dense_rank() over(order by num DESC) rnk from
(
select product,count(customer_id) num from transaction_details group by product
)r1)r2
where rnk<=5
24、按客流量找出最受欢迎的商店
select store_id,num,rnk from
(select store_id,num,dense_rank() over(order by num DESC) rnk
from(
select store_id,count(transaction_id) num from transaction_details group by store_id
)r1)r2 where rnk=1
25、根据顾客消费价格找出最受欢迎的商店
select store_id,sum(price) sum_price from transaction_details group by store_id order by sum_price DESC limit 1
select store_id,sum_price,rnk from
(
select store_id,sum_price,dense_rank() over(order by sum_price) rnk from
(
select store_id,sum(price) sum_price from transaction_details group by store_id
)r1)r2
where rnk=1
26、根据顾客交易情况找出最受欢迎的商店
select store_id,rnk from
(select store_id,dense_rank() over(order by num) rnk from
(select store_id,count(transaction_id) num from transaction_details group by store_id)r1)r2
where rnk =1
27、根据商店和唯一的顾客id获取最受欢迎的产品
select store_id,product,rnk from
(select store_id,product,dense_rank() over(partition by store_id order by num DESC) rnk from
(select store_id,product,count(distinct customer_id) num from transaction_details group by store_id,product)r1)r2 where rnk = 1
28、获取每个商店的员工与顾客比
select S.store_id,customer_num,employee_number,customer_num/employee_number as `%` from
(
select store_id,count(customer_id) customer_num from transaction_details group by store_id
)r1
inner join store_details S
on S.store_id=r1.store_id
29、按年和月计算每家店的收入
select store_id,year,month,sum(price) sum_price from
(
select store_id,
year(to_date(regexp_replace(date,'/','-'))) year,
month(to_date(regexp_replace(date,'/','-'))) month,price
from transaction_details
)r1
group by store_id,year,month
30、按店铺制作总收益饼图
select store_id,sum(price) sum_price from transaction_details group by store_id
31、找出每个商店最繁忙的时间段
select store_id,hours,sum,rnk
from
(select store_id,hours,sum,dense_rank() over(partition by store_id order by sum DESC) rnk
from
(select
store_id,
case when hour<=6 then 1 when hour<=12 then 2 when hour<=18 then 3 else 4 end hours,
sum(num) sum
from
(select
store_id,
cast(substr(time,0,instr(time,':')-1)as INT) hour,
count(transaction_id) num
from
transaction_details
group by store_id,cast(substr(time,0,instr(time,':')-1)as INT)
)r1
group by
store_id,case when hour<=6 then 1 when hour<=12 then 2 when hour<=18 then 3 else 4 end)r2
)r3
where rnk=1
32、找出每家店的忠实顾客
select store_id,customer_id,rnk from
(
select store_id,customer_id,dense_rank() over(partition by store_id order by num DESC) rnk from
(
select store_id,customer_id,count(*) num from transaction_details group by store_id,customer_id
)r1)r2
where rnk=1
33、根据每位员工的最高收入找出明星商店
select r3.store_id,sum_price,employee_number,wage,rnk from
(select r2.store_id,sum_price,employee_number,wage,dense_rank() over(order by wage DESC) rnk from
(select S.store_id,sum_price,employee_number,(sum_price/employee_number)as wage from
(select store_id,sum(price) sum_price from transaction_details group by store_id)r1
inner join store_details S
on r1.store_id=S.store_id)r2)r3 where rnk=1
34、在ext_store_review中找出存在冲突的交易映射关系
select transaction_id from transaction_details group by transaction_id having count(1)>1
35、了解客户评价的覆盖率
select store_id,count(1) from v_store_review group by store_id
36、根据评分了解客户的分布情况
select store_id,review_score,count(1) from v_store_review group by store_id,review_score
37、根据交易了解客户的分布情况
select store_id,customer_id,count(1) from transaction_details group by store_id,customer_id
38、客户给出的最佳评价是否总是同一家门店
select store_id,count(1) from
(select store_id,review_score from v_store_review where review_score='5')r1
group by store_id