需求:
针对同一个广告位拆分出deeplink数据与非deeplink数据
错误代码:
select ${day},
case when tb3.adslot_type=1 then '横幅'when tb3.adslot_type=2 then '插屏' when tb3.adslot_type=3 then '开屏' when tb3.adslot_type=4 then '原生' end ,
tb3.adslot_id ,
tb3.ext_adslotid,
tb3.dsp_is_dplk,
tb3.request_dau ,
tb3.valid_request_cnt,
tb3.bid_cnt ,
tb3.ret_cnt ,
tb3.without_anti_imp_cnt ,
tb3.imp_cnt ,
tb3.without_anti_click_cnt,
tb3.click_cnt ,
case when tb3.valid_request_cnt != 0 then CONCAT(round((tb3.bid_cnt / tb3.valid_request_cnt),2)*100,'%') else 0 end ,
case when tb3.bid_cnt != 0 then CONCAT(round((tb3.ret_cnt / tb3.bid_cnt),2)*100,'%') else 0 end ,
case when tb3.ret_cnt != 0 then CONCAT(round(tb3.imp_cnt / tb3.ret_cnt,2)*100,'%') else 0 end ,
case when tb3.without_anti_imp_cnt != 0 then CONCAT(round(tb3.without_anti_click_cnt / tb3.without_anti_imp_cnt,2)*100,'%') else 0 end,
case when tb3.imp_cnt != 0 then CONCAT(round(tb3.click_cnt / tb3.imp_cnt,2)*100,'%') else 0 end
from
(select
tb.adslot_id,
tb.adslot_type,tb.dsp_is_dplk,tb.ext_adslotid,
sum(case when tb.bid_flag = 0 and tb.tracker_imp = 1 and tb.tracker_imp_time-tb.event_time<1800 then 1 else 0 end) as without_anti_imp_cnt,
sum(case when tb.bid_flag = 0 and tb.tracker_imp = 1 and tb.tracker_click = 1 then 1 else 0 end) as without_anti_click_cnt,
count(distinct(case when tb.flow_status = 0 then tb.bid_request_id else null end)) as valid_request_cnt,
count(distinct(case when tb.bid_flag = 0 then tb.bid_request_id else null end)) as bid_cnt,
count(distinct(case when tb.bid_flag = 0 and tb.bid_dsp_id = tb.dsp_id then tb.bid_request_id else null end)) as ret_cnt,
count(distinct (tb1.device_imei_md5)) as request_dau,
sum(case when tb1.bid_flag = 0 and tb1.tracker_imp = 1 then 1 else 0 end) as imp_cnt,
sum(case when tb1.bid_flag = 0 and tb1.tracker_imp = 1 and tb1.tracker_click = 1 then 1 else 0 end) as click_cnt
from (select bid_request_id, adslot_id,adslot_type,ext_adslotid,dsp_id,bid_flag,tracker_imp,tracker_imp_time,event_time,tracker_click,
flow_status,bid_dsp_id,dsp_is_dplk from adx.adx_bid_info_orc
where date = '${day}' and adslot_id != '' and adslot_type is not null and dsp_id is not null ) tb
left join (select bid_request_id,adslot_id,dsp_is_dplk,ext_adslotid,bid_flag,tracker_imp,tracker_click,device_imei_md5
from adx.adx_bid_orc
where date = '${day}' and adslot_id != '' and adslot_type is not null )tb1
on (tb.adslot_id=tb1.adslot_id and tb.dsp_is_dplk=tb1.dsp_is_dplk and tb.bid_request_id=tb1.bid_request_id)
group by tb.adslot_id,tb.ext_adslotid,tb.adslot_type,tb.dsp_is_dplk
)tb3 where tb3.adslot_id in('3234','3169','3190')"
结果:
问题:当广告位类型为插屏,id为3234,平台广告位空的展示率为133%,这个结果明显不对。
正确代码:
select ${day},
case when tb.adslot_type=1 then '横幅'when tb.adslot_type=2 then '插屏' when tb.adslot_type=3 then '开屏' when tb.adslot_type=4 then '原生' end ,
tb.adslot_id ,
tb.ext_adslotid,
tb.dsp_is_dplk,
tb1.request_dau ,
tb.valid_request_cnt,
tb.bid_cnt ,
tb.ret_cnt ,
tb.without_anti_imp_cnt ,
tb1.imp_cnt ,
tb.without_anti_click_cnt,
tb1.click_cnt ,
case when tb.valid_request_cnt != 0 then CONCAT(round((tb.bid_cnt / tb.valid_request_cnt),2)*100,'%') else 0 end ,
case when tb.bid_cnt != 0 then CONCAT(round((tb.ret_cnt / tb.bid_cnt),2)*100,'%') else 0 end ,
case when tb.ret_cnt != 0 then CONCAT(round(tb1.imp_cnt / tb.ret_cnt,2)*100,'%') else 0 end ,
case when tb.without_anti_imp_cnt != 0 then CONCAT(round(tb.without_anti_click_cnt / tb.without_anti_imp_cnt,2)*100,'%') else 0 end,
case when tb1.imp_cnt != 0 then CONCAT(round(tb1.click_cnt / tb1.imp_cnt,2)*100,'%') else 0 end
from
(select adslot_id,adslot_type,ext_adslotid,dsp_is_dplk,
sum(case when bid_flag = 0 and tracker_imp = 1 and tracker_imp_time-event_time<1800 then 1 else 0 end) as without_anti_imp_cnt,
sum(case when bid_flag = 0 and tracker_imp = 1 and tracker_click = 1 then 1 else 0 end) as without_anti_click_cnt,
count(distinct(case when flow_status = 0 then bid_request_id else null end)) as valid_request_cnt,
count(distinct(case when bid_flag = 0 then bid_request_id else null end)) as bid_cnt,
count(distinct(case when bid_flag = 0 and bid_dsp_id = dsp_id then bid_request_id else null end)) as ret_cnt
from adx.adx_bid_info_orc
where date = '${day}' and adslot_id in ('3234','3169','3190') and adslot_type is not null and dsp_id is not null
group by adslot_id,adslot_type,ext_adslotid,dsp_is_dplk) tb
left join
(select adslot_id,adslot_type,ext_adslotid,dsp_is_dplk,
count(distinct (device_imei_md5)) as request_dau,
sum(case when bid_flag = 0 and tracker_imp = 1 then 1 else 0 end) as imp_cnt,
sum(case when bid_flag = 0 and tracker_imp = 1 and tracker_click = 1 then 1 else 0 end) as click_cnt
from adx.adx_bid_orc
where date = '${day}' and adslot_id in('3234','3169','3190') and adslot_type is not null
group by adslot_id,adslot_type,ext_adslotid,dsp_is_dplk)tb1
on (tb.adslot_id=tb1.adslot_id and tb.adslot_type = tb1.adslot_type and tb.ext_adslotid=tb1.ext_adslotid and tb1.dsp_is_dplk=tb.dsp_is_dplk)
错误原因分析:
在错误代码中,我使用的三个连接字段并不能唯一定位出一行,最后导致在联接时出现了多对多问题。
在写连接时,如何防止多对多?
在连接之前用连接字段进行分组。