hive sql

insert overwrite table AD_MATE_FT PARTITION (pt='2012-03-05 10')
select
temp0.mrid as material_id ,
temp9.name as material_name,
'pv' as AD_EFT_MATE_CAL_TYPE,
temp0.uid as temp0,
temp0.bussiness_id as bussiness_id ,
'HOURLY' as time_type,
'2012-03-05 10' as date_desc,
temp0.effect_sum as ad_eft_mate_cnt,
temp1.EXPOSE_sum as ad_show_cnt,
temp2.click_sum as ad_click_cnt,
temp3.ad_percent as ad_click_rate,
temp4.ad_browse as pageview_cnt,
temp5.ad_clibro as pvcnt_div_adclick,
temp7.ad_step as ad_bounce_rate,
temp6.ad_percon as ad_conv_rate,
temp8.ad_avgtime as avg_stay_time,
to_date(unix_timestamp())
from (select * from sum0_effect_hour_temp where pt='2012-03-05 10')temp0
join (select * from sum0_expose_hour_temp where pt='2012-03-05 10')temp1
on (temp0.mrid=temp1.mrid and temp1.pt=temp0.pt and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
join(select * from sum0_click_hour_temp where pt='2012-03-05 10')temp2
on(temp2.mrid=temp1.mrid and temp1.pt=temp2.pt and temp2.uid=temp1.uid and temp2.bussiness_id=temp1.bussiness_id)
join(select * from sum0_click_percent_hour_temp where pt='2012-03-05 10')temp3
on(temp2.mrid=temp3.mrid and temp3.pt=temp2.pt and temp2.uid=temp3.uid and temp2.bussiness_id=temp3.bussiness_id)
join(select * from sum0_click_browse_hour_temp where pt='2012-03-05 10')temp4
on(temp4.mrid=temp3.mrid and temp3.pt=temp4.pt and temp4.uid=temp3.uid and temp4.bussiness_id=temp3.bussiness_id)
join(select * from sum0_clibro_hour_rate_temp where pt='2012-03-05 10')temp5
on(temp4.mrid=temp5.mrid and temp5.pt=temp4.pt and temp4.uid=temp5.uid and temp4.bussiness_id=temp5.bussiness_id)
join(select * from sum0_per_con_hour_temp where pt='2012-03-05 10')temp6
on(temp6.mrid=temp5.mrid and temp5.pt=temp6.pt and temp6.uid=temp5.uid and temp6.bussiness_id=temp5.bussiness_id)
join(select * from sum0_steprate_hour_temp where pt='2012-03-05 10')temp7
on(temp6.mrid=temp7.mrid and temp7.pt=temp6.pt and temp6.uid=temp7.uid and temp6.bussiness_id=temp7.bussiness_id)
join(select * from avg0_time_hour_temp where pt='2012-03-05 10')temp8
on(temp8.mrid=temp7.mrid and temp7.pt=temp8.pt and temp8.uid=temp7.uid and temp8.bussiness_id=temp7.bussiness_id)
join(select * from AD_MATERIAL_DIM)temp9
on(temp8.mrid=temp9.id and temp8.uid=temp9.uid )
where temp0.pt=temp1.pt =temp2.pt=temp3.pt=temp4.pt=temp5.pt=temp6.pt=temp7.pt=temp8.pt;





按小时计算效果表的总记录
create table if not exists sum0_effect_hour_temp
(

effect_sum int,
MRID string,
uid string,
bussiness_id string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;


insert overwrite table sum0_effect_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(effect_id) as effect_sum ,
temp0.mrid as mrid,
temp0.bussiness_id as bussiness_id,
temp0.uid as uid
from
(
select * from ad_PATH where pt='2012-03-05 10'
) temp0
left outer join
(
select effect_id AS effect_id ,PAGE_ID AS PAGE_ID ,pt as pt
from ad_effect
where pt='2012-03-05 10'
) temp1
on(temp1.PAGE_ID=temp0.PAGE_ID)
where temp0.pt=temp1.pt and bussiness_id is not NULL
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;

按小时计算点击表的曝光数

create table if not exists sum0_expose_hour_temp
(
EXPOSE_sum int,
MRID string,
uid string,
bussiness_id string

)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_expose_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(temp1.PAGE_ID) as EXPOSE_sum,
temp0.mrid as mrid,
temp0.bussiness_id as bussiness_id,
temp0.uid as uid
from (select * from ad_PATH where pt='2012-03-05 10' and BUSSINESS_ID is not NULL ) temp0
left outer join(
select PAGE_ID ,pt from ad_expose_click WHERE pt='2012-03-05 10' AND action='expose'
) temp1
on(temp1.PAGE_ID=temp0.PAGE_ID)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;


按小时计算点击表的点击数

create table if not exists sum0_click_hour_temp
(
click_sum int,
MRID string,
uid string,
bussiness_id string

)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_click_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(temp1.PAGE_ID) as click_sum,
temp0.mrid as mrid,
temp0.bussiness_id as bussiness_id,
temp0.uid as uid
from (select * from ad_PATH where pt='2012-03-05 10' and BUSSINESS_ID is not NULL ) temp0
left outer join(
select PAGE_ID ,pt from ad_expose_click WHERE pt='2012-03-05 10' AND action='click'
) temp1
on(temp1.PAGE_ID=temp0.PAGE_ID)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;


按小时的点击率
create table if not exists sum0_click_percent_hour_temp
(
ad_percent double,
MRID string,
uid string,
bussiness_id string

)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_click_percent_hour_temp PARTITION (pt='2012-03-05 10')
select
click.click_sum/expose.EXPOSE_sum as ad_percent,
click.mrid as mrid,
click.uid as uid,
click.bussiness_id as bussiness_id
from
(select * from sum0_click_hour_temp where pt='2012-03-05 10') click
JOIN(select pt as pt ,uid as uid,bussiness_id as bussiness_id ,mrid as mrid ,EXPOSE_sum as EXPOSE_sum
from sum0_expose_hour_temp WHERE pt ='2012-03-05 10' )expose
on (click.MRID=expose.mrid and click.uid=expose.uid and click.bussiness_id=expose.bussiness_id)
where expose.pt=click.pt;


小时级页面浏览数
create table if not exists sum0_click_browse_hour_temp
(
ad_browse int,
MRID string,
uid string,
bussiness_id string

)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_click_browse_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(distinct(tmp2.page_id)) as ad_browse,
tmp2.mrid as mrid,
tmp2.uid as uid,
tmp2.bussiness_id as bussiness_id
from (select * from ad_effect where pt ='2012-03-05 10') tmp1
join (select page_id, pt, SESSION_ID, bussiness_id, mrid ,uid from ad_PATH where pt='2012-03-05 10'
and bussiness_id is not NULL) tmp2
on (tmp1.page_id = tmp2.page_id)
join (select pt as pt,SESSION_ID as SESSION_ID, bussiness_id, uid from ad_SESSION
where SOURCE_TYPE ='Direct' and pt = '2012-03-05 10') tmp3
on (tmp2.SESSION_ID=tmp3.SESSION_ID and tmp2.uid=tmp3.uid and tmp2.bussiness_id=tmp3.bussiness_id)
where tmp1.pt=tmp2.pt=tmp3.pt
GROUP BY tmp2.mrid,tmp2.uid,tmp2.bussiness_id;

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

安小时网页浏览/广告点击

create table if not exists sum0_clibro_hour_rate_temp
(
ad_clibro double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_clibro_hour_rate_temp PARTITION (pt='2012-03-05 10')
select
temp0.ad_browse/temp1.click_sum as ad_clibro,
temp0.mrid as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from sum0_click_browse_hour_temp where pt = '2012-03-05 10')temp0
join(select bussiness_id,uid,pt,click_sum as click_sum, mrid as mrid from sum0_click_hour_temp where pt='2012-03-05 10') temp1
on(temp0.mrid=temp1.mrid and temp1.pt=temp0.pt and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id )
where temp0.pt=temp1.pt;


转化率

create table if not exists sum0_per_con_hour_temp
(
ad_percon double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_per_con_hour_temp PARTITION (pt='2012-03-05 10')
select
temp0.effect_sum/temp1.click_sum as ad_clibro,
temp0.mrid as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from
(select * from sum0_effect_hour_temp where pt ='2012-03-05 10' ) temp0
join(select * from sum0_click_hour_temp where pt ='2012-03-05 10') temp1
on(temp0.mrid=temp1.mrid and temp1.pt=temp0.pt and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id );
where temp0.pt=temp1.pt;


跳出率:

进入1步的离开数量
create table if not exists sum0_step1_hour_temp
(
ad_step1 int ,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;



insert overwrite table sum0_step1_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(distinct(temp0.session_id)) as ad_step1,
temp0.MRID as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from AD_PATH where pt ='2012-03-05 10' and session_step='1') temp0
join ( select * from ad_session where pt ='2012-03-05 10' and source_type='3') temp1
on(temp0.SESSION_ID=temp1.SESSION_ID and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
进入所有步的离开数量
create table if not exists sum0_stepn_hour_temp
(
ad_stepn int ,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;



insert overwrite table sum0_stepn_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(distinct(temp0.session_id)) as ad_stepn,
temp0.MRID as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from AD_PATH where pt ='2012-03-05 10' ) temp0
join ( select * from ad_session where pt ='2012-03-05 10' and source_type='3') temp1
on(temp0.SESSION_ID=temp1.SESSION_ID and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;


// 计算跳出率
create table if not exists sum0_steprate_hour_temp
(
ad_step double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_steprate_hour_temp PARTITION (pt='2012-03-05 10')
select
temp0.ad_step1/temp1.ad_stepn as ad_step,
temp0.MRID as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from sum0_step1_hour_temp where pt ='2012-03-05 10' ) temp0
join ( select * from sum0_stepn_hour_temp where pt ='2012-03-05 10' )temp1
on(
temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id and temp0.mrid =temp0.mrid
)

where temp0.pt=temp1.pt;


平均停留时间:

create table if not exists avg0_time_hour_temp
(
ad_avgtime double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table avg0_time_hour_temp PARTITION (pt='2012-03-05 10')
select
avg(temp1.session_stay_seconds) as ad_avgtime,
temp0.MRID as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from ad_path where pt ='2012-03-05 10') temp0
join
(select * from ad_session where pt ='2012-03-05 10') temp1
on (temp0.SESSION_ID=temp1.SESSION_ID and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值