1. 创建表

--用户信息表
create table if not exists userinfo(
userid string,
username string,
address string,
sex string,
birthday string)
row format delimited fields terminated by ',';

--用户行为表
create table if not exists action(

userid string,
goodid string,
action int,
month int,
day int)
row format delimited fields terminated by ',';

--用户评论表
create table if not exists comment(
goodid string,
content string,
time string,
userid string,
username string,
userlevel string,
color string,
reviews int,
score int,
source string)
row format delimited fields terminated by ',';

--销售信息表
create table if not exists iphone(
goodid string,
goodname string,
attribute string,
shopname string,
shopid string,
url string,
price double,
keywords string,
commentcount int,
goodrepu int,
brand string,
type string,
color string,
markettime string,
os string
)
row format delimited fields terminated by ',';

2. 加载数据

--加载数据
load data local inpath 'action.csv' into table action;
load data local inpath 'iphone.csv' into table iphone;
load data local inpath 'userinfo.csv' into table userinfo;
load data local inpath 'comment.csv' into table comment;

3. 查询已加载的数据

select * limit userinfo limit 10;

4. 处理手机销售信息表数据

5. 处理用户行为表数据

要求:将用户行为源表中交易月份与交易日拼接为新字段存储到新表。

--用户行为(日期连接之后)
create table if not exists user_action_db(
userid string,
goodid string,
action int,
adate string)
row format delimited fields terminated by ',';
--日期连接存储到新表
insert into table user_action_db select userid,goodid,action,concat(month,'-',day) from action;

6. 处理用户信息表数据

要求:关联用户评论表和用户信息表将用户等级、用户年龄段划分后存
入新表。
首先创建临时表

--连接后临时表
create table if not exists user_temp(
userid string,
username string,
address string,
sex string,
age int,
userlevel string)
row format delimited fields terminated by ',';
--临时表中插入数据
insert into user_temp select userinfo.userid,userinfo.username,userinfo.address,sex,floor(datediff('2019-9-8', birthday) / 365) age,userlevel 
from userinfo join comment on userinfo.userid = comment.userid;

再根据年龄段划分

--新用户信息表
create table if not exists user_info_tb(
userid string,
username string,
address string,
sex string,
age_region int,
age_region_alias string,
userlevel string)
row format delimited fields terminated by ',';

--新用户信息表中插入数据
insert into user_info_tb select userid,username,address,sex,
case when age < 18 then 1
when 18 <= age and age <= 24 then 2
when 25 <= age and age <= 29 then 3
when 30 <= age and age <= 34 then 4
when 35 <= age and age <= 39 then 5
when 40 <= age and age <= 49 then 6
when age >= 50 then 7 end age_region,
case when age < 18 then '18岁以下'
when 18 <= age and age <= 24 then '18-24'
when 25 <= age and age <= 29 then '25-29'
when 30 <= age and age <= 34 then '30-34'
when 35 <= age and age <= 39 then '35-39'
when 40 <= age and age <= 49 then '40-49'
when age >= 50 then 7 end age_region_alias,
userlevel 
from user_temp;

查询一下

select * from user_info_db limit 10;

图片