综合案例一

需求:按照区域 求最受欢迎的商品的 top3
1.建表
首先mysql上建立city_info和product_info并导入数据
(1)在mysql上建立city_info表并导入数据

CREATE TABLE `city_info` (
  `city_id` int(11) DEFAULT NULL,
  `city_name` varchar(255) DEFAULT NULL,
  `area` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

insert  into `city_info`(`city_id`,`city_name`,`area`) values (1,'BEIJING','NC'),(2,'SHANGHAI','EC'),(3,'NANJING','EC'),(4,'GUANGZHOU','SC'),(5,'SANYA','SC'),(6,'WUHAN','CC'),(7,'CHANGSHA','CC'),(8,'XIAN','NW'),(9,'CHENGDU','SW'),(10,'HAERBIN','NE');

(2)在mysql上建立product_info表并导入数据

CREATE TABLE `product_info` (
  `product_id` int(11) DEFAULT NULL,
  `product_name` varchar(255) DEFAULT NULL,
  `extend_info` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

insert  into product_info(product_id,product_name,extend_info) values (1,'product1','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (2,'product2','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (3,'product3','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (4,'product4','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (5,'product5','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (6,'product6','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (7,'product7','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (8,'product8','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (9,'product9','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (10,'product10','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (11,'product11','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (12,'product12','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (13,'product13','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (14,'product14','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (15,'product15','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (16,'product16','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (17,'product17','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (18,'product18','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (19,'product19','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (20,'product20','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (21,'product21','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (22,'product22','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (23,'product23','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (24,'product24','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (25,'product25','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (26,'product26','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (27,'product27','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (28,'product28','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (29,'product29','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (30,'product30','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (31,'product31','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (32,'product32','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (33,'product33','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (34,'product34','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (35,'product35','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (36,'product36','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (37,'product37','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (38,'product38','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (39,'product39','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (40,'product40','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (41,'product41','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (42,'product42','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (43,'product43','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (44,'product44','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (45,'product45','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (46,'product46','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (47,'product47','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (48,'product48','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (49,'product49','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (50,'product50','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (51,'product51','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (52,'product52','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (53,'product53','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (54,'product54','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (55,'product55','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (56,'product56','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (57,'product57','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (58,'product58','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (59,'product59','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (60,'product60','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (61,'product61','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (62,'product62','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (63,'product63','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (64,'product64','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (65,'product65','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (66,'product66','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (67,'product67','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (68,'product68','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (69,'product69','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (70,'product70','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (71,'product71','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (72,'product72','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (73,'product73','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (74,'product74','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (75,'product75','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (76,'product76','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (77,'product77','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (78,'product78','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (79,'product79','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (80,'product80','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (81,'product81','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (82,'product82','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (83,'product83','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (84,'product84','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (85,'product85','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (86,'product86','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (87,'product87','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (88,'product88','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (89,'product89','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (90,'product90','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (91,'product91','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (92,'product92','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (93,'product93','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (94,'product94','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (95,'product95','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (96,'product96','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (97,'product97','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (98,'product98','{"product_status":1}');
insert  into product_info(product_id,product_name,extend_info) values (99,'product99','{"product_status":0}');
insert  into product_info(product_id,product_name,extend_info) values (100,'product100','{"product_status":1}');

在hive上建立user_click并导入数据,再建立city_info和product_info表并通过sqoop从mysql上导入数据
(3)在hive上建立user_click并导入数据

create table user_click(
user_id string,
session_id string,
dt string,
city_id int,
shop_id int
)
row format delimited fields terminated by ","

load data local inpath'/home/hadoop/tmp/user_click.txt' into table user_click

(4)在hive上建立city_info表,并通过sqoop从mysql上导入数据

create table city_info (
city_id int,
city_name string,
area string
)
row format delimited fields terminated by '\t'

sqoop import \
--connect jdbc:mysql://bigdata12:3306/bigdata  \
--username root  \
--password 123456 \
--mapreduce-job-name 'city' \
--direct \
--fields-terminated-by '\t' \
-m 1 \
--columns "city_id,city_name,area" \
--table "city_info" \
--hive-import \
--hive-overwrite \
--hive-database bigdata \
--hive-table city_info

(5)在hive上建立product_info表,并通过sqoop从mysql上导入数据

create table product_info (
product_id int,
product_name string,
extend_info string
)
row format delimited fields terminated by '\t'

sqoop import \
--connect jdbc:mysql://bigdata12:3306/bigdata  \
--username root  \
--password 123456 \
--mapreduce-job-name 'product_info' \
--direct \
--fields-terminated-by '\t' \
-m 1 \
--columns "product_id,product_name,extend_info" \
--table "product_info" \
--hive-import \
--hive-overwrite \
--hive-database bigdata \
--hive-table product_info

2.数据分析
table:city_info product_info user_click()主表
维度:区域 商品
指标:商品点击次数
区域 商品 商品点击次数
user_click :
city_id left join city_info =>area
product_id left join product_info =>product_name
(1)创建一个临时表,用来暂存三表合并取所需字段

-- 需要的大表
create table dws_user_click_area_product_name as 
select
  a.*,
  area,
  product_name
from
  (
    -- 主表
    select
      *
    from
      user_click
  ) as a
  left join (
    --  city_info
    select
      city_id,
      city_name,
      area
    from
      city_info
  ) as b on a.city_id = b.city_id
  left join (
    -- product_info
    select
      product_id,
      product_name,
      extend_info
    from
      product_info
  ) as c on a.product_id = c.product_id;

(2)创建一个临时表,用来暂存 商品名 区域 和点击次数

create table rpt_area_product_name_cnt as 
select  
area,
product_name,
count(1) as cnt
from dws_user_click_area_product_name
group by 
area,
product_name

(3)区域 商品 商品点击次数 top3

create table rpt_cnt_top3 as 
select
area,
product_name,
cnt,
rk
from 
(
select 
area,
product_name,
cnt,
rank() over(partition by area order by cnt desc ) as rk
from rpt_area_product_name_cnt
) as a
where rk <=3;

3.从hive上导出到mysql上,并作数据可视化
(1)导出前提是mysql上有表,所以mysql建表:

create table `rpt_cnt_top3`(
  `area` varchar(20), 
  `product_name` varchar(50), 
  `cnt` bigint, 
  `rk` int
)ENGINE=InnoDB DEFAULT CHARSET=utf8;

(2)把求出的结果从hive上导出到mysql上

sqoop export \
--connect jdbc:mysql://bigdata12:3306/bigdata  \
--username root  \
--password 123456 \
--table rpt_cnt_top3 \
--mapreduce-job-name 'hdfs2mysql' \
--fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/bigdata.db/rpt_cnt_top3/*
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值