Hive日志分析案例二

1、创建日志原始表
-- create table
drop table if exists hive_ycong.track_log ;
create table hive_ycong.track_log(
id                string,
url               string,
referer           string,
keyword           string,
type              string,
guid              string,
pageId            string,
moduleId          string,
linkId            string,
attachedInfo      string,
sessionId         string,
trackerU          string,
trackerType       string,
ip                string,
trackerSrc        string,
cookie            string,
orderCode         string,
trackTime         string,
endUserId         string,
firstLink         string,
sessionViewNo     string,
productId         string,
curMerchantId     string,
provinceId        string,
cityId            string,
fee               string,
edmActivity       string,
edmEmail          string,
edmJobId          string,
ieVersion         string,
platform          string,
internalKeyword   string,
resultSum         string,
currentPage       string,
linkPosition      string,
buttonPosition    string
)
partitioned by(date string, hour string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' 
STORED AS PARQUET ;
2、导入数据
--load data
set parquet.compression=SNAPPY ;
load data local inpath '/data/tracklogs/20160321/2016032119' overwrite 
into table hive_ycong.track_log partition(date = '20160321' ,hour = '19'); 
load data local inpath '/data/tracklogs/20160321/2016032120' overwrite 
into table hive_ycong.track_log partition(date = '20160321' ,hour = '20');
3、创建会话临时表session_info_temp
drop table if exists hive_ycong.session_info_temp ;

create table hive_ycong.session_info_temp as 
select 
a.sessionid session_id ,
max(a.guid) guid, 
max(a.enduserid) user_id ,
count(a.url) pv ,
(unix_timestamp(max(a.tracktime)) - unix_timestamp(min(a.tracktime))) stay_time,
min(a.tracktime) min_trackTime ,
max(a.ip) ip,
max(a.provinceid) provinceId 
from hive_ycong.track_log a where date = '20160321' 
group by sessionid;**
4、创建会话临时表session_url_temp
drop table if exists hive_ycong.session_url_temp ;

create table hive_ycong.session_url_temp as 
select 
sessionid,
tracktime,
trackeru,
url,
referer
from hive_ycong.track_log where date = '20160321';
5、创建会话表session_info,并加载数据
create table hive_ycong.session_info(
session_id string ,
guid string ,
trackerU string ,
landing_url string ,
landing_url_ref string ,
user_id string ,
pv string ,
stay_time string ,
min_trackTime string ,
ip string ,
provinceId string 
)
partitioned by (date string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ;

insert overwrite table hive_ycong.session_info partition(date = '20160321')
select 
a.session_id session_id ,
max(a.guid) guid ,
max(b.trackeru) trackerU ,
max(b.url) landing_url ,
max(b.referer) landing_url_ref ,
max(a.user_id) user_id ,
max(a.pv) pv ,
max(a.stay_time / 1000) stay_time ,
max(a.min_trackTime) min_trackTime ,
max(a.ip) ip  ,
max(a.provinceId) provinceId 
from hive_ycong.session_info_temp a
join hive_ycong.session_url_temp b on a.session_id = b.sessionid and a.min_trackTime = b.tracktime
group by a.session_id;
6、结果分析:日期 UV PV 登陆人数 游客人数 平均访问时长 二跳率 独立IP数
drop table if exists hive_ycong.visit_daily_temp ;

create table hive_ycong.visit_daily_temp as 
select 
date,
count(distinct guid) uv,
sum(pv) pv,
count(distinct case when user_id != '' then user_id else null end) login_users,
count(distinct case when user_id = '' then guid else null end) visit_users,
avg(stay_time) avg_stay_time,
count(case when pv >= 2 then session_id else null end) / count(session_id) second_rate,
count(distinct ip) ip_number
from hive_ycong.session_info where date = '20160321'
group by date;

结果

  • 没有权限的用户
    grant all on *.* to root@'bigdata.eclipse.com' identified by '123456'
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值