网站浏览日志分析(5)漏斗模型分析
load data inpath’/hivedata/pageview/part-r-00000’ INTO TABLE ods_click_pageviews PARTITION(datestr=‘20130918’);
load data inpath’/hivedata/click-part-r-00000’ INTO TABLE ods_click_pageviews PARTITION(datestr=‘20130920’);
SELECT *FROM ods_click_pageviews;
1.需求:查询每一个步骤的总访问人数【这里将request字段中的 item/category/order/index 作为四步计算每一步有多少人访问】
【疑问:如果步骤非常多 我们要一个一个手写 step1/2/3/4/5…吗 自己想到可以动态+1 比如时间】
create table dw_oute_numbs as
select ‘step1’ as step,count(distinct remote_addr) as numbs from ods_click_pageviews where datestr=‘20130920’ and request like ‘/item%’
union
select ‘step2’ as step,count(distinct remote_addr) as numbs from ods_click_pageviews where datestr=‘20130920’ and request like ‘/category%’
union
select ‘step3’ as step,count(distinct remote_addr) as numbs from ods_click_pageviews where datestr=‘20130920’