hive练习题(商店市场调查)

表数据:https://pan.baidu.com/s/1giOajxNUaU_Lc4jzTL2UnA
提取码:cba4

hive练习题

建表:

store_details表

create external table store_details(
store_id INT,
store_name STRING,
employee_number INT
)
row format delimited
fields terminated by ','
tblproperties("skip.header.line.count"="1");

load data local inpath '/root/kb08/hive/store/store_details.csv' into table store_details;

store_review表

create external table store_review(
transaction_id BIGINT,
store_id INT,
review_score INT
)
row format delimited
fields terminated by ','
tblproperties("skip.header.line.count"="1");

load data local inpath '/root/kb08/hive/store/store_review.csv' into table store_review;

transaction_details表

create external table transaction_details(
transaction_id BIGINT,
customer_id BIGINT,
store_id INT,
price DOUBLE,
product STRING,
date STRING,
time STRING
)
row format delimited
fields terminated by ','
tblproperties("skip.header.line.count"="1");

load data local inpath '/root/kb08/hive/store/transaction_details.csv' into table transaction_details;

customer_datails表

create table if not exists customer_datails(
	customer_id INT,
	first_name STRING,
	last_name STRING,
	email STRING,
	gender STRING,
	address STRING,
	country string,
	language STRING,
	job STRING,
	credit_type STRING,
	credit_type STRING,
	credit_no STRING
)
row format delimited fields terminated by ','
lines terminated by '\n'
tblproperties("skip.header.line.count"="1");

load data local inpath '/root/kb08/hive/store/customer_datails.csv' into table customer_datails;

练习题:

1、找出顾客最常用的信用卡

select credit_type from (
		select credit_type,dense_rank() over(order by num DESC) rnk from (
			select credit_type,count(1) num from customer_details group by credit_type order by num DESC
		)T
	)T
	where rnk=1

2、找出客户资料中排名前五的的职位名称

select job from(
	select job,dense_rank() over(order by num DESC) rnk from(
		select job,count(1) num from customer_datails group by job order by num DESC
		)T
	)T
where rnk<=5

3、在美国女性最常用的信用卡

select credit_type from(
	select credit_type,dense_rank() over(order by num DESC) rnk from (
		select credit_type,count(1) num from customer_datails 
		where country='United States' and gender='Female'
		group by credit_type order by num DESC
		)T
)T
where rnk=1

4、按性别和国家进行客户统计/国家/工作/邮箱/语言/信用卡

select country,gender,concat_ws(',',collect_set(dn)) hdns from(
	select country,gender,dn from(
		select country,gender,dn,enum,rank() over(partition by country,gender) rnk from(
			select country,gender,dn,count(1) enum from(
				select country,gender,regexp_extract(email,'(\\w+)@(.+)',2) dn
				from customer_datails
								)T
						group by country,gender,dn
					)T
			)T
where rnk=1
)T
group by country,gender

5、计算每月总收入

select month(to_date(regexp_replace(date,'/','-'))) month,sum(price) from transaction_details group by month(to_date(regexp_replace(date,'/','-'))) order by month

6、计算每个季度的总收入

select quarter,sum(price) from
	(select ceil(month(to_date(regexp_replace(date,'/','-')))/3) quarter,price from transaction_details)r1
	group by quarter

7、按年计算总收入

select year(to_date(regexp_replace(date,'/','-'))) year,sum(price) from transaction_details group by year(to_date(regexp_replace(date,'/','-'))) order by year

8、按工作日计算总收入

	周日:1
	周一:2
	周二:3
	周三:4
	周四:5
	周五:6
	周六:7
	select dayofweek,sum 
	from
	(select dayofweek(to_date(regexp_replace(date,'/','-'))) dayofweek,sum(price) sum 
	from transaction_details 
	group by dayofweek(to_date(regexp_replace(date,'/','-'))))r1 
	where dayofweek>=2 and dayofweek<=6

9、按时间段计算总收入

select cast(substr(time,0,instr(time,':')-1) as INT) hour,sum(price) sum_prive from transaction_details group by substr(time,0,instr(time,':')-1) order by hour

10、按时间段计算平均消费

select cast(substr(time,0,instr(time,':')-1) as INT) hour,avg(price) avg_price from transaction_details group by substr(time,0,instr(time,':')-1) order by hour

11、按工作日计算平均消费

select dayofweek,sum 
	from
	(select dayofweek(to_date(regexp_replace(date,'/','-'))) dayofweek,avg(price) avg_price 
	from transaction_details 
	group by dayofweek(to_date(regexp_replace(date,'/','-'))))r1 
	where dayofweek>=2 and dayofweek<=6

12、计算年、月、日的交易总数

with
	r1 as (select year(to_date(regexp_replace(date,'/','-'))) time,count(transaction_id) from transaction_details group by year(to_date(regexp_replace(date,'/','-'))) order by time DESC),
	r2 as (select month(to_date(regexp_replace(date,'/','-'))) time,count(transaction_id) from transaction_details group by month(to_date(regexp_replace(date,'/','-'))) order by time DESC),
	r3 as (select day(to_date(regexp_replace(date,'/','-'))) time,count(transaction_id) from transaction_details group by day(to_date(regexp_replace(date,'/','-'))) order by time DESC)
	select * from r1 union all select * from r2 union all select * from r3

13、找出交易量最大的10个客户

select customer_id,num,rnk from
	(select customer_id,dense_rank() over(order by num DESC) rnk from
	(select customer_id,count(1) num from transaction_details group by customer_id)r1)r2
	where rnk<=10

14、找出消费最多的前10位顾客

select customer_id,,sum_pricernk from
	(select customer_id,dense_rank() over(order by sum_price DESC) rnk from
	(select customer_id,sum(price) sum_price from transaction_details group by customer_id)r1)r2
	where rnk<=10

15、统计该期间交易数量最少的用户

select customer_id,rnk from
	(select customer_id,dense_rank() over(order by num) rnk from
	(select customer_id,count(1) num from transaction_details group by customer_id)r1)r2
	where rnk=1

16、计算每个季度的独立客户总数

select quarter,count(customer_id) num from(
	select case when month<=3 then 1 when month<=6 then 2 when month<=9 then 3 else 4 end quarter,customer_id from
	(
	select month(to_date(regexp_replace(date,'/','-'))) month,customer_id from transaction_details
	)r1 group by month,customer_id)r2 group by quarter

17、计算每周的独立客户总数

select weekofyear,count(customer_id) num from
	(select weekofyear,customer_id from
	(select weekofyear(to_date(regexp_replace(date,'/','-'))) weekofyear,customer_id from transaction_details
	)r1 group by weekofyear,customer_id)r2 group by weekofyear

18、计算整个活动客户平均花费的最大值

select customer_id,avg_price,rnk from
	(select customer_id,avg_price,dense_rank() over(order by avg_price DESC) rnk from
	(select customer_id,avg(price) avg_price from transaction_details group by customer_id)r1)r2 where rnk=1

19、统计每月花费最多的客户

select month,customer_id,sum_price,rnk
	from
	(
	select month,customer_id,sum_price,dense_rank() over(partition by month order by sum_price DESC) rnk
	from
	(
	select month,customer_id,sum(price) sum_price
	from
	(select month(to_date(regexp_replace(date,'/','-'))) month,customer_id,price from transaction_details
	)r1 group by month,customer_id
	)r2)r3
	where rnk=1

20、统计每月访问次数最多的客户

select month,customer_id,rnk from
	(
	select month,customer_id,dense_rank() over(partition by month order by num DESC) rnk from
	(
	select month,customer_id,count(transaction_id) num from
	(
	select 
	month(to_date(regexp_replace(date,'/','-'))) month,
	customer_id,transaction_id from 
	transaction_details
	)r1
	group by month,customer_id
	)r2)r3
	where rnk=1

21、按总价找出最受欢迎的5种产品

select product,sum_price,rnk from
	(
	select product,sum_price,dense_rank() over(order by sum_price DESC) rnk from
	(
	select product,sum(price) sum_price from transaction_details group by product
	)r1)r2
	where rnk<=5

22、根据购买频率找出最畅销的5种产品

select product,num,rnk from
	(
	select product,num,dense_rank() over(order by num DESC) rnk from
	(
	select product,count(transaction_id) num from transaction_details group by product
	)r1)r2
	where rnk<=5

23、根据客户数量找出最受欢迎的5种产品

select product,num,rnk from
	(
	select product,num,dense_rank() over(order by num DESC) rnk from
	(
	select product,count(customer_id) num from transaction_details group by product
	)r1)r2
	where rnk<=5

24、按客流量找出最受欢迎的商店

select store_id,num,rnk from
	(select store_id,num,dense_rank() over(order by num DESC) rnk
	from(
	select store_id,count(transaction_id) num from transaction_details group by store_id
	)r1)r2 where rnk=1

25、根据顾客消费价格找出最受欢迎的商店

select store_id,sum(price) sum_price from transaction_details group by store_id order by sum_price DESC limit 1
	
	select store_id,sum_price,rnk from 
	(
	select store_id,sum_price,dense_rank() over(order by sum_price) rnk from
	(
	select store_id,sum(price) sum_price from transaction_details group by store_id
	)r1)r2
	where rnk=1

26、根据顾客交易情况找出最受欢迎的商店

select store_id,rnk from
	(select store_id,dense_rank() over(order by num) rnk from
	(select store_id,count(transaction_id) num from transaction_details group by store_id)r1)r2
	where rnk =1

27、根据商店和唯一的顾客id获取最受欢迎的产品

select store_id,product,rnk from
	(select store_id,product,dense_rank() over(partition by store_id order by num DESC) rnk from
	(select store_id,product,count(distinct customer_id) num from transaction_details group by store_id,product)r1)r2 where rnk = 1

28、获取每个商店的员工与顾客比

select S.store_id,customer_num,employee_number,customer_num/employee_number as `%` from
	(
	select store_id,count(customer_id) customer_num from transaction_details group by store_id
	)r1
	inner join store_details S
	on S.store_id=r1.store_id

29、按年和月计算每家店的收入

select store_id,year,month,sum(price) sum_price from
	(
	select store_id,
	year(to_date(regexp_replace(date,'/','-'))) year,
	month(to_date(regexp_replace(date,'/','-'))) month,price
	from transaction_details 
	)r1
	group by store_id,year,month

30、按店铺制作总收益饼图

select store_id,sum(price) sum_price from transaction_details group by store_id

31、找出每个商店最繁忙的时间段

select store_id,hours,sum,rnk 
	from
		(select store_id,hours,sum,dense_rank() over(partition by store_id order by sum DESC) rnk
	from
		(select
			store_id,
			case when hour<=6 then 1 when hour<=12 then 2 when hour<=18 then 3 else 4 end hours,
			sum(num) sum
		from
			(select 
				store_id,
				cast(substr(time,0,instr(time,':')-1)as INT) hour,
				count(transaction_id) num 
			from 
				transaction_details
			group by store_id,cast(substr(time,0,instr(time,':')-1)as INT)
			)r1
		group by 
			store_id,case when hour<=6 then 1 when hour<=12 then 2 when hour<=18 then 3 else 4 end)r2
		)r3 
	where rnk=1

32、找出每家店的忠实顾客

select store_id,customer_id,rnk from
	(
	select store_id,customer_id,dense_rank() over(partition by store_id order by num DESC) rnk from
	(
	select store_id,customer_id,count(*) num from transaction_details group by store_id,customer_id
	)r1)r2
	where rnk=1

33、根据每位员工的最高收入找出明星商店

select r3.store_id,sum_price,employee_number,wage,rnk from
	(select r2.store_id,sum_price,employee_number,wage,dense_rank() over(order by wage DESC) rnk from
	(select S.store_id,sum_price,employee_number,(sum_price/employee_number)as wage from
	(select store_id,sum(price) sum_price from transaction_details group by store_id)r1
	inner join store_details S
	on r1.store_id=S.store_id)r2)r3 where rnk=1

34、在ext_store_review中找出存在冲突的交易映射关系

select transaction_id from transaction_details group by transaction_id having count(1)>1

35、了解客户评价的覆盖率

select store_id,count(1) from v_store_review group by store_id

36、根据评分了解客户的分布情况

select store_id,review_score,count(1) from v_store_review group by store_id,review_score

37、根据交易了解客户的分布情况

select store_id,customer_id,count(1) from transaction_details group by store_id,customer_id

38、客户给出的最佳评价是否总是同一家门店

select store_id,count(1) from
	(select store_id,review_score from v_store_review where review_score='5')r1
	group by store_id
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
### 回答1: Hive是一个基于Hadoop数据仓库工具,提供了类似于SQL的查询语言HiveQL,可以方便地进行数据分析和查询,适合大规模的数据处理。 在Hive的案例教程,通常会包含一些习,以帮助学习者更好地理解和掌握Hive的使用。这些习可以涉及到不同的数据仓库场景和问,下面是一个示例: 假设我们有一个电商平台的销售数据,其包含了订单表和商品表。订单表包括了订单ID、用户ID、商品ID、购买数量和购买日期等信息;商品表包括了商品ID、商品名称、商品价格和商品分类等信息。 1. 统计每个用户的购买总金额,并按照金额从高到低排序。 可以通过使用Hive的聚合函数和排序功能来实现这个需求。首先,需要使用JOIN操作将订单表和商品表进行关联,根据商品ID找到对应的商品价格。然后,通过GROUP BY用户ID进行分组,使用SUM函数计算每个用户的购买总金额。最后,使用ORDER BY将结果按照金额从高到低排序。 2. 统计每个商品分类的销售数量和销售金额。 同样地,可以通过使用JOIN操作将订单表和商品表进行关联。然后,通过GROUP BY商品分类进行分组,分别使用COUNT和SUM函数计算每个商品分类的销售数量和销售金额。 3. 找出销售金额最高的5个商品。 可以通过使用ORDER BY和LIMIT子句来实现这个需求。先通过JOIN操作将订单表和商品表进行关联,然后使用SUM函数计算每个商品的销售金额。最后,使用ORDER BY对销售金额进行降序排序,并使用LIMIT 5获取5个商品。 通过完成这些习,可以深入理解Hive数据仓库功能,掌握基本的数据查询和分析。同时,也能够提升对数据仓库设计和实践的理解和能力。 ### 回答2: Hive数据仓库案例教程习是一种用于学习和实践Hive数据仓库的教育资源。它通过提供一系列的案例和习,帮助学习者深入理解Hive数据仓库的概念与应用,并通过实践加深对Hive的掌握。 该教程习通常包括以下内容: 1. 数据准备:习提供了一些数据资料,学习者需要根据具体的案例要求将数据导入到Hive。这一步通常涉及到创建、管理表格、上传数据等操作。 2. 数据分析:习会提供一些具体的问或需求,学习者需要通过Hive的查询语句进行数据分析。这一步要求学习者熟悉Hive的查询语法和常用函数,并能够编写出符合要求的查询语句。 3. 结果验证:学习者需要将自己查询得到的结果与提供的标准答案进行对比。这一步能够检验学习者对Hive查询的准确性和熟练程度。 通过完成这些案例教程习,学习者可以逐步提升对Hive数据仓库的理解和实践能力。同时,这些习也能够帮助学习者巩固理论知识,拓宽在实际应用场景的思考和解决问能力。 总之,Hive数据仓库案例教程习是一种有助于学习者掌握Hive数据仓库技术的教育资源,通过实践案例和习,提升学习者的理论实践能力和问解决能力。 ### 回答3: Hive数据仓库案例教程习是为了帮助学习者更好地理解和掌握Hive数据仓库的相关知识和技能而设计的一系列练习题。 这些习通常会根据实际的数据仓库案例进行设计,包括数据的导入和清洗、数据的转换和处理、数据的查询和分析等方面的内容。学习者可以通过完成这些习,加深对Hive数据仓库的理解,并且提高自己在实际应用能力。 在这些习,学习者需要运用到Hive的相关操作语句和函数,例如创建和管理表、加载和导入数据、定义表结构和分区等。同时,学习者还需要了解Hive的查询语言,如SQL,以及Hive的高级功能,如UDF自定义函数等。通过这些习,学习者可以从实际操作不断提高和完善自己的技能。 总的来说,Hive数据仓库案例教程习是一种亲身实践的学习方法,通过实际的案例和练习,帮助学习者更好地掌握和应用Hive数据仓库的技术,达到提升数据分析和处理能力的目的。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值