Hadoop实现电商商单分析(2)
上一篇文章链接:
Hadoop实现电商商单分析(1)
二、SQL
show databases ;
use test;
-- 创建日志信息表
describe trackinfo;
drop table if exists trackinfo;
create table trackInfo(
id string,
url string,
sessionId string,
ip string,
`time` string
)row format delimited fields terminated by ',';
load data local inpath "/root/SX_data/trackInfo.txt" overwrite into table trackinfo;
-- 创建ip映射信息表
drop table if exists ipInfo;
create table ipInfo(
ip string,
country string,
province string,
city string
)row format delimited fields terminated by ',';
load data local inpath "/root/SX_data/ipInfo.txt" overwrite into table ipInfo;
-- 创建结果信息表
truncate table track_log;
create table track_log(
url string,
pageId string,
ip string,
country string,
province string,
city string
)row format delimited fields terminated by ',';
insert into track_log
select
url,
regexp_extract(url,'topicId=(\\d{5})') as pageId,
t.ip,
i.country,
i.province,
i.city
from trackinfo t
left join ipinfo i
on t.ip = i.ip
;
-- 查询省份浏览量
select
from track_log
group by province;
select
url,
regexp_extract(url,'topicId=(\\d{5})') as pageId
from trackinfo t
where id = '20960991758'
;
select regexp_extract("http://www.yihaodian.com/cms/view.do?topicId=18970dgf",'topicId=(\\d{5})')