hive练习题(商店市场调查)

表数据:https://pan.baidu.com/s/1giOajxNUaU_Lc4jzTL2UnA
提取码:cba4

hive练习题

建表:

store_details表

create external table store_details(
store_id INT,
store_name STRING,
employee_number INT
)
row format delimited
fields terminated by ','
tblproperties("skip.header.line.count"="1");

load data local inpath '/root/kb08/hive/store/store_details.csv' into table store_details;

store_review表

create external table store_review(
transaction_id BIGINT,
store_id INT,
review_score INT
)
row format delimited
fields terminated by ','
tblproperties("skip.header.line.count"="1");

load data local inpath '/root/kb08/hive/store/store_review.csv' into table store_review;

transaction_details表

create external table transaction_details(
transaction_id BIGINT,
customer_id BIGINT,
store_id INT,
price DOUBLE,
product STRING,
date STRING,
time STRING
)
row format delimited
fields terminated by ','
tblproperties("skip.header.line.count"="1");

load data local inpath '/root/kb08/hive/store/transaction_details.csv' into table transaction_details;

customer_datails表

create table if not exists customer_datails(
	customer_id INT,
	first_name STRING,
	last_name STRING,
	email STRING,
	gender STRING,
	address STRING,
	country string,
	language STRING,
	job STRING,
	credit_type STRING,
	credit_type STRING,
	credit_no STRING
)
row format delimited fields terminated by ','
lines terminated by '\n'
tblproperties("skip.header.line.count"="1");

load data local inpath '/root/kb08/hive/store/customer_datails.csv' into table customer_datails;

练习题:

1、找出顾客最常用的信用卡

select credit_type from (
		select credit_type,dense_rank() over(order by num DESC) rnk from (
			select credit_type,count(1) num from customer_details group by credit_type order by num DESC
		)T
	)T
	where rnk=1

2、找出客户资料中排名前五的的职位名称

select job from(
	select job,dense_rank() over(order by num DESC) rnk from(
		select job,count(1) num from customer_datails group by job order by num DESC
		)T
	)T
where rnk<=5

3、在美国女性最常用的信用卡

select credit_type from(
	select credit_type,dense_rank() over(order by num DESC) rnk from (
		select credit_type,count(1) num from customer_datails 
		where country='United States' and gender='Female'
		group by credit_type order by num DESC
		)T
)T
where rnk=1

4、按性别和国家进行客户统计/国家/工作/邮箱/语言/信用卡

select country,gender,concat_ws(',',collect_set(dn)) hdns from(
	select country,gender,dn from(
		select country,gender,dn,enum,rank() over(partition by country,gender) rnk from(
			select country,gender,dn,count(1) enum from(
				select country,gender,regexp_extract(email,'(\\w+)@(.+)',2) dn
				from customer_datails
								)T
						group by country,gender,dn
					)T
			)T
where rnk=1
)T
group by country,gender

5、计算每月总收入

select month(to_date(regexp_replace(date,'/','-'))) month,sum(price) from transaction_details group by month(to_date(regexp_replace(date,'/','-'))) order by month

6、计算每个季度的总收入

select quarter,sum(price) from
	(select ceil(month(to_date(regexp_replace(date,'/','-')))/3) quarter,price from transaction_details)r1
	group by quarter

7、按年计算总收入

select year(to_date(regexp_replace(date,'/','-'))) year,sum(price) from transaction_details group by year(to_date(regexp_replace(date,'/','-'))) order by year

8、按工作日计算总收入

	周日:1
	周一:2
	周二:3
	周三:4
	周四:5
	周五:6
	周六:7
	select dayofweek,sum 
	from
	(select dayofweek(to_date(regexp_replace(date,'/','-'))) dayofweek,sum(price) sum 
	from transaction_details 
	group by dayofweek(to_date(regexp_replace(date,'/','-'))))r1 
	where dayofweek>=2 and dayofweek<=6

9、按时间段计算总收入

select cast(substr(time,0,instr(time,':')-1) as INT) hour,sum(price) sum_prive from transaction_details group by substr(time,0,instr(time,':')-1) order by hour

10、按时间段计算平均消费

select cast(substr(time,0,instr(time,':')-1) as INT) hour,avg(price) avg_price from transaction_details group by substr(time,0,instr(time,':')-1) order by hour

11、按工作日计算平均消费

select dayofweek,sum 
	from
	(select dayofweek(to_date(regexp_replace(date,'/','-'))) dayofweek,avg(price) avg_price 
	from transaction_details 
	group by dayofweek(to_date(regexp_replace(date,'/','-'))))r1 
	where dayofweek>=2 and dayofweek<=6

12、计算年、月、日的交易总数

with
	r1 as (select year(to_date(regexp_replace(date,'/','-'))) time,count(transaction_id) from transaction_details group by year(to_date(regexp_replace(date,'/','-'))) order by time DESC),
	r2 as (select month(to_date(regexp_replace(date,'/','-'))) time,count(transaction_id) from transaction_details group by month(to_date(regexp_replace(date,'/','-'))) order by time DESC),
	r3 as (select day(to_date(regexp_replace(date,'/','-'))) time,count(transaction_id) from transaction_details group by day(to_date(regexp_replace(date,'/','-'))) order by time DESC)
	select * from r1 union all select * from r2 union all select * from r3

13、找出交易量最大的10个客户

select customer_id,num,rnk from
	(select customer_id,dense_rank() over(order by num DESC) rnk from
	(select customer_id,count(1) num from transaction_details group by customer_id)r1)r2
	where rnk<=10

14、找出消费最多的前10位顾客

select customer_id,,sum_pricernk from
	(select customer_id,dense_rank() over(order by sum_price DESC) rnk from
	(select customer_id,sum(price) sum_price from transaction_details group by customer_id)r1)r2
	where rnk<=10

15、统计该期间交易数量最少的用户

select customer_id,rnk from
	(select customer_id,dense_rank() over(order by num) rnk from
	(select customer_id,count(1) num from transaction_details group by customer_id)r1)r2
	where rnk=1

16、计算每个季度的独立客户总数

select quarter,count(customer_id) num from(
	select case when month<=3 then 1 when month<=6 then 2 when month<=9 then 3 else 4 end quarter,customer_id from
	(
	select month(to_date(regexp_replace(date,'/','-'))) month,customer_id from transaction_details
	)r1 group by month,customer_id)r2 group by quarter

17、计算每周的独立客户总数

select weekofyear,count(customer_id) num from
	(select weekofyear,customer_id from
	(select weekofyear(to_date(regexp_replace(date,'/','-'))) weekofyear,customer_id from transaction_details
	)r1 group by weekofyear,customer_id)r2 group by weekofyear

18、计算整个活动客户平均花费的最大值

select customer_id,avg_price,rnk from
	(select customer_id,avg_price,dense_rank() over(order by avg_price DESC) rnk from
	(select customer_id,avg(price) avg_price from transaction_details group by customer_id)r1)r2 where rnk=1

19、统计每月花费最多的客户

select month,customer_id,sum_price,rnk
	from
	(
	select month,customer_id,sum_price,dense_rank() over(partition by month order by sum_price DESC) rnk
	from
	(
	select month,customer_id,sum(price) sum_price
	from
	(select month(to_date(regexp_replace(date,'/','-'))) month,customer_id,price from transaction_details
	)r1 group by month,customer_id
	)r2)r3
	where rnk=1

20、统计每月访问次数最多的客户

select month,customer_id,rnk from
	(
	select month,customer_id,dense_rank() over(partition by month order by num DESC) rnk from
	(
	select month,customer_id,count(transaction_id) num from
	(
	select 
	month(to_date(regexp_replace(date,'/','-'))) month,
	customer_id,transaction_id from 
	transaction_details
	)r1
	group by month,customer_id
	)r2)r3
	where rnk=1

21、按总价找出最受欢迎的5种产品

select product,sum_price,rnk from
	(
	select product,sum_price,dense_rank() over(order by sum_price DESC) rnk from
	(
	select product,sum(price) sum_price from transaction_details group by product
	)r1)r2
	where rnk<=5

22、根据购买频率找出最畅销的5种产品

select product,num,rnk from
	(
	select product,num,dense_rank() over(order by num DESC) rnk from
	(
	select product,count(transaction_id) num from transaction_details group by product
	)r1)r2
	where rnk<=5

23、根据客户数量找出最受欢迎的5种产品

select product,num,rnk from
	(
	select product,num,dense_rank() over(order by num DESC) rnk from
	(
	select product,count(customer_id) num from transaction_details group by product
	)r1)r2
	where rnk<=5

24、按客流量找出最受欢迎的商店

select store_id,num,rnk from
	(select store_id,num,dense_rank() over(order by num DESC) rnk
	from(
	select store_id,count(transaction_id) num from transaction_details group by store_id
	)r1)r2 where rnk=1

25、根据顾客消费价格找出最受欢迎的商店

select store_id,sum(price) sum_price from transaction_details group by store_id order by sum_price DESC limit 1
	
	select store_id,sum_price,rnk from 
	(
	select store_id,sum_price,dense_rank() over(order by sum_price) rnk from
	(
	select store_id,sum(price) sum_price from transaction_details group by store_id
	)r1)r2
	where rnk=1

26、根据顾客交易情况找出最受欢迎的商店

select store_id,rnk from
	(select store_id,dense_rank() over(order by num) rnk from
	(select store_id,count(transaction_id) num from transaction_details group by store_id)r1)r2
	where rnk =1

27、根据商店和唯一的顾客id获取最受欢迎的产品

select store_id,product,rnk from
	(select store_id,product,dense_rank() over(partition by store_id order by num DESC) rnk from
	(select store_id,product,count(distinct customer_id) num from transaction_details group by store_id,product)r1)r2 where rnk = 1

28、获取每个商店的员工与顾客比

select S.store_id,customer_num,employee_number,customer_num/employee_number as `%` from
	(
	select store_id,count(customer_id) customer_num from transaction_details group by store_id
	)r1
	inner join store_details S
	on S.store_id=r1.store_id

29、按年和月计算每家店的收入

select store_id,year,month,sum(price) sum_price from
	(
	select store_id,
	year(to_date(regexp_replace(date,'/','-'))) year,
	month(to_date(regexp_replace(date,'/','-'))) month,price
	from transaction_details 
	)r1
	group by store_id,year,month

30、按店铺制作总收益饼图

select store_id,sum(price) sum_price from transaction_details group by store_id

31、找出每个商店最繁忙的时间段

select store_id,hours,sum,rnk 
	from
		(select store_id,hours,sum,dense_rank() over(partition by store_id order by sum DESC) rnk
	from
		(select
			store_id,
			case when hour<=6 then 1 when hour<=12 then 2 when hour<=18 then 3 else 4 end hours,
			sum(num) sum
		from
			(select 
				store_id,
				cast(substr(time,0,instr(time,':')-1)as INT) hour,
				count(transaction_id) num 
			from 
				transaction_details
			group by store_id,cast(substr(time,0,instr(time,':')-1)as INT)
			)r1
		group by 
			store_id,case when hour<=6 then 1 when hour<=12 then 2 when hour<=18 then 3 else 4 end)r2
		)r3 
	where rnk=1

32、找出每家店的忠实顾客

select store_id,customer_id,rnk from
	(
	select store_id,customer_id,dense_rank() over(partition by store_id order by num DESC) rnk from
	(
	select store_id,customer_id,count(*) num from transaction_details group by store_id,customer_id
	)r1)r2
	where rnk=1

33、根据每位员工的最高收入找出明星商店

select r3.store_id,sum_price,employee_number,wage,rnk from
	(select r2.store_id,sum_price,employee_number,wage,dense_rank() over(order by wage DESC) rnk from
	(select S.store_id,sum_price,employee_number,(sum_price/employee_number)as wage from
	(select store_id,sum(price) sum_price from transaction_details group by store_id)r1
	inner join store_details S
	on r1.store_id=S.store_id)r2)r3 where rnk=1

34、在ext_store_review中找出存在冲突的交易映射关系

select transaction_id from transaction_details group by transaction_id having count(1)>1

35、了解客户评价的覆盖率

select store_id,count(1) from v_store_review group by store_id

36、根据评分了解客户的分布情况

select store_id,review_score,count(1) from v_store_review group by store_id,review_score

37、根据交易了解客户的分布情况

select store_id,customer_id,count(1) from transaction_details group by store_id,customer_id

38、客户给出的最佳评价是否总是同一家门店

select store_id,count(1) from
	(select store_id,review_score from v_store_review where review_score='5')r1
	group by store_id
©️2020 CSDN 皮肤主题: 1024 设计师:上身试试 返回首页