Sqoop+Hive+MySQL用户某时间范围,区域最受欢迎的Top N的产品 初级版

5 篇文章 0 订阅
3 篇文章 0 订阅

数据展示:
1.city_info

 mysql> select * from city_info;
+---------+-----------+--------+
| city_id | city_name | area   |
+---------+-----------+--------+
|       1 | 北京      | 华北   |
|       2 | 上海      | 华东   |
|       3 | 南京      | 华东   |
|       4 | 广州      | 华南   |
|       5 | 三亚      | 华南   |
|       6 | 武汉      | 华中   |
|       7 | 长沙      | 华中   |
|       8 | 西安      | 西北   |
|       9 | 成都      | 西南   |
|      10 | 哈尔滨    | 东北   |
+---------+-----------+--------+
10 rows in set (24.52 sec)

2.user_info

mysql> select * from user_info limit 20;
+---------+--------+------+---------+------+
| user_id | name   | age  | city_id | sex  |
+---------+--------+------+---------+------+
|       1 | name1  |   25 |       5 | M    |
|       2 | name2  |    8 |       9 | M    |
|       3 | name3  |    8 |       9 | M    |
|       4 | name4  |   26 |       3 | M    |
|       5 | name5  |    8 |       6 | F    |
|       6 | name6  |   48 |       2 | M    |
|       7 | name7  |   53 |       6 | M    |
|       8 | name8  |    7 |       1 | M    |
|       9 | name9  |    5 |       1 | M    |
|      10 | name10 |   26 |       9 | M    |
|      11 | name11 |    4 |       6 | M    |
|      12 | name12 |   41 |       1 | M    |
|      13 | name13 |   56 |       7 | M    |
|      14 | name14 |    4 |       8 | F    |
|      15 | name15 |   38 |       8 | M    |
|      16 | name16 |   49 |       4 | F    |
|      17 | name17 |   32 |       6 | F    |
|      18 | name18 |   47 |       9 | F    |
|      19 | name19 |   35 |       6 | F    |
|      20 | name20 |   11 |       6 | M    |
+---------+--------+------+---------+------+
20 rows in set (0.00 sec)

3.user.click

hive> select * from user_click limit 20;
OK
95  2bf501a7637549c89cf55342331b15db    2016-05-05 21:01:56 1   72  2016.5.5
95  2bf501a7637549c89cf55342331b15db    2016-05-05 21:52:26 1   68  2016.5.5
95  2bf501a7637549c89cf55342331b15db    2016-05-05 21:17:03 1   40  2016.5.5
95  2bf501a7637549c89cf55342331b15db    2016-05-05 21:32:07 1   21  2016.5.5
95  2bf501a7637549c89cf55342331b15db    2016-05-05 21:26:06 1   63  2016.5.5
95  2bf501a7637549c89cf55342331b15db    2016-05-05 21:03:11 1   60  2016.5.5
95  2bf501a7637549c89cf55342331b15db    2016-05-05 21:43:43 1   30  2016.5.5
95  2bf501a7637549c89cf55342331b15db    2016-05-05 21:09:58 1   96  2016.5.5
95  2bf501a7637549c89cf55342331b15db    2016-05-05 21:18:45 1   71  2016.5.5
95  2bf501a7637549c89cf55342331b15db    2016-05-05 21:42:39 1   8   2016.5.5
95  2bf501a7637549c89cf55342331b15db    2016-05-05 21:24:30 1   6   2016.5.5
95  2bf501a7637549c89cf55342331b15db    2016-05-05 21:29:49 1   26  2016.5.5
95  5b8cdcb0b18645a19f4e3e34a241012e    2016-05-05 20:24:12 1   83  2016.5.5
95  5b8cdcb0b18645a19f4e3e34a241012e    2016-05-05 20:07:50 1   62  2016.5.5
95  5b8cdcb0b18645a19f4e3e34a241012e    2016-05-05 20:19:31 1   61  2016.5.5
95  5b8cdcb0b18645a19f4e3e34a241012e    2016-05-05 20:40:51 1   46  2016.5.5
95  5b8cdcb0b18645a19f4e3e34a241012e    2016-05-05 20:19:07 1   99  2016.5.5
95  5b8cdcb0b18645a19f4e3e34a241012e    2016-05-05 20:46:26 1   69  2016.5.5
95  5b8cdcb0b18645a19f4e3e34a241012e    2016-05-05 20:16:10 1   4   2016.5.5
95  dd01372f92654525a758a9625f728158    2016-05-05 15:37:02 1   67  2016.5.5
  • user_click外部 分区表建立
create external table user_click(
user_id int,
session_id string,
action_time string,
city_id int,
product_id int
)
partitioned by (day string)
row format delimited fields terminated by "," location '/project/userclick'
  • user_click导入表
load data local inpath "/home/hadoop/project1/user_click.txt"
overwrite into table user_click
partition(day='2016.5.5')
  • 创建产品表 product_info
create table product_info(
product_id int,
product_name varchar(20),
extend_info varchar(20),
)charset=utf8
  • 插入产品表
    见product_info.sql

  • 创建城市信息表

CREATE TABLE city_info (
city_id int(11) DEFAULT NULL,
city_name varchar(255) DEFAULT NULL,
area varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
  • 导入信息
 insert  into `city_info`(`city_id`,`city_name`,`area`) values (1,'北京','华北'),(2,'上海','华东'),(3,'南京','华东'),(4,'广州','华南'),(5,'三亚','华南'),(6,'武汉','华中'),(7,'长沙','华中'),(8,'西安','西北'),(9,'成都','西南'),(10,'哈尔滨','东北');
  • 创建user_info表mysql

create table user_info(
user_id int primary key,
name varchar(128),
age int,
city_id int,
sex varchar(5)
)charset=utf8;

  • 导入信息

见sql文件

  • 创建 hive表 product_info
 create external table product_info(
product_id int,
product_name string,
extend_info string
)
row format delimited fields terminated by "\t" location '/project/productinfo';
  • sqoop product_info导入
sqoop import \
--connect jdbc:mysql://localhost:3306/wl \
--username root --password 123456 \
--table product_info \
--hive-import \
--hive-database hivedb \
--hive-overwrite \
--hive-table product_info \
--delete-target-dir \
--fields-terminated-by '\t' \
-m 1
  • 创建 hive表 city_info
CREATE external TABLE city_info (
city_id int,
city_name string,
area string
) 
row format delimited fields terminated by "\t" location "/project/cityinfo";
  • sqoop city_info导入
sqoop import \
--connect jdbc:mysql://localhost:3306/wl \
--username root --password 123456 \
--table city_info \
--hive-import \
--hive-database hivedb \
--hive-overwrite \
--hive-table city_info \
--delete-target-dir \
--fields-terminated-by '\t' \
-m 1
  • 创建hive表user_info
create external table user_info(
user_id int,
name string,
age int,
city_id int,
sex string
)
row format delimited fields terminated by "\t" location "/project/userinfo";
  • sqoop 导入hive表user_info
sqoop import \
--connect jdbc:mysql://localhost:3306/wl \
--username root --password 123456 \
--table user_info \
--hive-import \
--hive-database hivedb \
--hive-overwrite \
--hive-table user_info \
--delete-target-dir \
--fields-terminated-by '\t' \
-m 1
  • 书写hql 并将查询的结果集导入到HDFS文件中
insert overwrite directory '/project/2016.5.5'
row format delimited fields terminated by "\t"
select aa.area,aa.pname,aa.pnum,row_number() over(partition by aa.area order by aa.pnum desc),aa.day
from(select a.area as area,a.pname as pname,a.pnum as pnum,a.day as day,a.nu as nu2
from(select ci.area as area,pi.product_name as pname,count(pi.product_name) as pnum,uc.day as day,row_number() over(partition by ci.area order by count(pi.product_name) desc) as nu
from city_info ci join (select * from user_click where day='2016.5.5') uc on ci.city_id=uc.city_id join product_info pi on
pi.product_id=uc.product_id
group by ci.area,pi.product_name,uc.day) a
where a.nu<4
group by a.area,a.pname,a.pnum,a.day,a.nu) aa;
  • 创建hive的结果表
create external table endhive_table(
area string,
pname string,
pnum int,
rank int,
day string
)
partitioned by (days string)
row format delimited fields terminated by "\t" location "/project/endhivetable"
  • 向结果表中导入数据
load data inpath "/project/2016.5.5/000000_0"
 into table endhive_table
partition (days="2016.5.5");
  • 创建最终的sql表
create table endsql_table(
area varchar(100),
pname varchar(100),
pnum int,
rank int,
day varchar(100)
)charset=utf8;
  • sqoop从hive表到mysql
sqoop export --connect "jdbc:mysql://localhost:3306/wl?useUnicode=true&characterEncoding=utf-8" --username root --password 123456 --table endsql_table --mapreduce-job-name sql_end  --export-dir /project/endhivetable/days=2016.5.5/000000_0 --fields-terminated-by '\t'
  • 查询结果
mysql> select * from endsql_table 
    -> ;
+--------+-----------+------+------+----------+
| area   | pname     | pnum | rank | day      |
+--------+-----------+------+------+----------+
| 华东   | product4  |   40 |    1 | 2016.5.5 |
| 西北   | product48 |   19 |    3 | 2016.5.5 |
| 华东   | product96 |   32 |    2 | 2016.5.5 |
| 华东   | product5  |   31 |    3 | 2016.5.5 |
| 华中   | product26 |   39 |    1 | 2016.5.5 |
| 华中   | product7  |   39 |    2 | 2016.5.5 |
| 华中   | product70 |   38 |    3 | 2016.5.5 |
| 华北   | product40 |   16 |    1 | 2016.5.5 |
| 华北   | product9  |   16 |    2 | 2016.5.5 |
| 华北   | product94 |   13 |    3 | 2016.5.5 |
| 华南   | product38 |   35 |    1 | 2016.5.5 |
| 西南   | product16 |   20 |    1 | 2016.5.5 |
| 西南   | product60 |   19 |    2 | 2016.5.5 |
| 西南   | product95 |   19 |    3 | 2016.5.5 |
| 华南   | product33 |   34 |    2 | 2016.5.5 |
| 华南   | product88 |   34 |    3 | 2016.5.5 |
| 西北   | product56 |   20 |    1 | 2016.5.5 |
| 西北   | product67 |   20 |    2 | 2016.5.5 |
+--------+-----------+------+------+----------+
18 rows in set (0.00 sec)

若泽大数据交流群:671914634

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值