数据展示:
1.city_info
mysql> select * from city_info;
+---------+-----------+--------+
| city_id | city_name | area |
+---------+-----------+--------+
| 1 | 北京 | 华北 |
| 2 | 上海 | 华东 |
| 3 | 南京 | 华东 |
| 4 | 广州 | 华南 |
| 5 | 三亚 | 华南 |
| 6 | 武汉 | 华中 |
| 7 | 长沙 | 华中 |
| 8 | 西安 | 西北 |
| 9 | 成都 | 西南 |
| 10 | 哈尔滨 | 东北 |
+---------+-----------+--------+
10 rows in set (24.52 sec)
2.user_info
mysql> select * from user_info limit 20;
+---------+--------+------+---------+------+
| user_id | name | age | city_id | sex |
+---------+--------+------+---------+------+
| 1 | name1 | 25 | 5 | M |
| 2 | name2 | 8 | 9 | M |
| 3 | name3 | 8 | 9 | M |
| 4 | name4 | 26 | 3 | M |
| 5 | name5 | 8 | 6 | F |
| 6 | name6 | 48 | 2 | M |
| 7 | name7 | 53 | 6 | M |
| 8 | name8 | 7 | 1 | M |
| 9 | name9 | 5 | 1 | M |
| 10 | name10 | 26 | 9 | M |
| 11 | name11 | 4 | 6 | M |
| 12 | name12 | 41 | 1 | M |
| 13 | name13 | 56 | 7 | M |
| 14 | name14 | 4 | 8 | F |
| 15 | name15 | 38 | 8 | M |
| 16 | name16 | 49 | 4 | F |
| 17 | name17 | 32 | 6 | F |
| 18 | name18 | 47 | 9 | F |
| 19 | name19 | 35 | 6 | F |
| 20 | name20 | 11 | 6 | M |
+---------+--------+------+---------+------+
20 rows in set (0.00 sec)
3.user.click
hive> select * from user_click limit 20;
OK
95 2bf501a7637549c89cf55342331b15db 2016-05-05 21:01:56 1 72 2016.5.5
95 2bf501a7637549c89cf55342331b15db 2016-05-05 21:52:26 1 68 2016.5.5
95 2bf501a7637549c89cf55342331b15db 2016-05-05 21:17:03 1 40 2016.5.5
95 2bf501a7637549c89cf55342331b15db 2016-05-05 21:32:07 1 21 2016.5.5
95 2bf501a7637549c89cf55342331b15db 2016-05-05 21:26:06 1 63 2016.5.5
95 2bf501a7637549c89cf55342331b15db 2016-05-05 21:03:11 1 60 2016.5.5
95 2bf501a7637549c89cf55342331b15db 2016-05-05 21:43:43 1 30 2016.5.5
95 2bf501a7637549c89cf55342331b15db 2016-05-05 21:09:58 1 96 2016.5.5
95 2bf501a7637549c89cf55342331b15db 2016-05-05 21:18:45 1 71 2016.5.5
95 2bf501a7637549c89cf55342331b15db 2016-05-05 21:42:39 1 8 2016.5.5
95 2bf501a7637549c89cf55342331b15db 2016-05-05 21:24:30 1 6 2016.5.5
95 2bf501a7637549c89cf55342331b15db 2016-05-05 21:29:49 1 26 2016.5.5
95 5b8cdcb0b18645a19f4e3e34a241012e 2016-05-05 20:24:12 1 83 2016.5.5
95 5b8cdcb0b18645a19f4e3e34a241012e 2016-05-05 20:07:50 1 62 2016.5.5
95 5b8cdcb0b18645a19f4e3e34a241012e 2016-05-05 20:19:31 1 61 2016.5.5
95 5b8cdcb0b18645a19f4e3e34a241012e 2016-05-05 20:40:51 1 46 2016.5.5
95 5b8cdcb0b18645a19f4e3e34a241012e 2016-05-05 20:19:07 1 99 2016.5.5
95 5b8cdcb0b18645a19f4e3e34a241012e 2016-05-05 20:46:26 1 69 2016.5.5
95 5b8cdcb0b18645a19f4e3e34a241012e 2016-05-05 20:16:10 1 4 2016.5.5
95 dd01372f92654525a758a9625f728158 2016-05-05 15:37:02 1 67 2016.5.5
- user_click外部 分区表建立
create external table user_click(
user_id int,
session_id string,
action_time string,
city_id int,
product_id int
)
partitioned by (day string)
row format delimited fields terminated by "," location '/project/userclick'
- user_click导入表
load data local inpath "/home/hadoop/project1/user_click.txt"
overwrite into table user_click
partition(day='2016.5.5')
- 创建产品表 product_info
create table product_info(
product_id int,
product_name varchar(20),
extend_info varchar(20),
)charset=utf8
插入产品表
见product_info.sql创建城市信息表
CREATE TABLE city_info (
city_id int(11) DEFAULT NULL,
city_name varchar(255) DEFAULT NULL,
area varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
- 导入信息
insert into `city_info`(`city_id`,`city_name`,`area`) values (1,'北京','华北'),(2,'上海','华东'),(3,'南京','华东'),(4,'广州','华南'),(5,'三亚','华南'),(6,'武汉','华中'),(7,'长沙','华中'),(8,'西安','西北'),(9,'成都','西南'),(10,'哈尔滨','东北');
- 创建user_info表mysql
create table user_info(
user_id int primary key,
name varchar(128),
age int,
city_id int,
sex varchar(5)
)charset=utf8;
- 导入信息
见sql文件
- 创建 hive表 product_info
create external table product_info(
product_id int,
product_name string,
extend_info string
)
row format delimited fields terminated by "\t" location '/project/productinfo';
- sqoop product_info导入
sqoop import \
--connect jdbc:mysql://localhost:3306/wl \
--username root --password 123456 \
--table product_info \
--hive-import \
--hive-database hivedb \
--hive-overwrite \
--hive-table product_info \
--delete-target-dir \
--fields-terminated-by '\t' \
-m 1
- 创建 hive表 city_info
CREATE external TABLE city_info (
city_id int,
city_name string,
area string
)
row format delimited fields terminated by "\t" location "/project/cityinfo";
- sqoop city_info导入
sqoop import \
--connect jdbc:mysql://localhost:3306/wl \
--username root --password 123456 \
--table city_info \
--hive-import \
--hive-database hivedb \
--hive-overwrite \
--hive-table city_info \
--delete-target-dir \
--fields-terminated-by '\t' \
-m 1
- 创建hive表user_info
create external table user_info(
user_id int,
name string,
age int,
city_id int,
sex string
)
row format delimited fields terminated by "\t" location "/project/userinfo";
- sqoop 导入hive表user_info
sqoop import \
--connect jdbc:mysql://localhost:3306/wl \
--username root --password 123456 \
--table user_info \
--hive-import \
--hive-database hivedb \
--hive-overwrite \
--hive-table user_info \
--delete-target-dir \
--fields-terminated-by '\t' \
-m 1
- 书写hql 并将查询的结果集导入到HDFS文件中
insert overwrite directory '/project/2016.5.5'
row format delimited fields terminated by "\t"
select aa.area,aa.pname,aa.pnum,row_number() over(partition by aa.area order by aa.pnum desc),aa.day
from(select a.area as area,a.pname as pname,a.pnum as pnum,a.day as day,a.nu as nu2
from(select ci.area as area,pi.product_name as pname,count(pi.product_name) as pnum,uc.day as day,row_number() over(partition by ci.area order by count(pi.product_name) desc) as nu
from city_info ci join (select * from user_click where day='2016.5.5') uc on ci.city_id=uc.city_id join product_info pi on
pi.product_id=uc.product_id
group by ci.area,pi.product_name,uc.day) a
where a.nu<4
group by a.area,a.pname,a.pnum,a.day,a.nu) aa;
- 创建hive的结果表
create external table endhive_table(
area string,
pname string,
pnum int,
rank int,
day string
)
partitioned by (days string)
row format delimited fields terminated by "\t" location "/project/endhivetable"
- 向结果表中导入数据
load data inpath "/project/2016.5.5/000000_0"
into table endhive_table
partition (days="2016.5.5");
- 创建最终的sql表
create table endsql_table(
area varchar(100),
pname varchar(100),
pnum int,
rank int,
day varchar(100)
)charset=utf8;
- sqoop从hive表到mysql
sqoop export --connect "jdbc:mysql://localhost:3306/wl?useUnicode=true&characterEncoding=utf-8" --username root --password 123456 --table endsql_table --mapreduce-job-name sql_end --export-dir /project/endhivetable/days=2016.5.5/000000_0 --fields-terminated-by '\t'
- 查询结果
mysql> select * from endsql_table
-> ;
+--------+-----------+------+------+----------+
| area | pname | pnum | rank | day |
+--------+-----------+------+------+----------+
| 华东 | product4 | 40 | 1 | 2016.5.5 |
| 西北 | product48 | 19 | 3 | 2016.5.5 |
| 华东 | product96 | 32 | 2 | 2016.5.5 |
| 华东 | product5 | 31 | 3 | 2016.5.5 |
| 华中 | product26 | 39 | 1 | 2016.5.5 |
| 华中 | product7 | 39 | 2 | 2016.5.5 |
| 华中 | product70 | 38 | 3 | 2016.5.5 |
| 华北 | product40 | 16 | 1 | 2016.5.5 |
| 华北 | product9 | 16 | 2 | 2016.5.5 |
| 华北 | product94 | 13 | 3 | 2016.5.5 |
| 华南 | product38 | 35 | 1 | 2016.5.5 |
| 西南 | product16 | 20 | 1 | 2016.5.5 |
| 西南 | product60 | 19 | 2 | 2016.5.5 |
| 西南 | product95 | 19 | 3 | 2016.5.5 |
| 华南 | product33 | 34 | 2 | 2016.5.5 |
| 华南 | product88 | 34 | 3 | 2016.5.5 |
| 西北 | product56 | 20 | 1 | 2016.5.5 |
| 西北 | product67 | 20 | 2 | 2016.5.5 |
+--------+-----------+------+------+----------+
18 rows in set (0.00 sec)
若泽大数据交流群:671914634