1. 流量分析
a. 基础指标多维度统计分析
统计 PageView 浏览次数(pv)
select count(*) from ods_weblog_detail where datestr ="20181101" and valid = "true"; 排除静态资源
View Code
统计Unique Visitor 独立访客(UV)
select count(distinct remote_addr) as uvs from ods_weblog_detail where datestr ="20181101";
View Code
统计访问次数(VV)
select count(distinct session) as vvs from ods_click_stream_visit where datestr ="20181101";
View Code
ip
select count(distinct remote_addr) as ips from ods_weblog_detail where datestr ="20181101";
View Code
结果表
create table dw_webflow_basic_info(month string,daystring,
pvbigint,uv bigint ,ip bigint, vv bigint) partitioned by(datestr string);insert into table dw_webflow_basic_info partition(datestr="20181101")select '201811','01',a.*,b.* from(select count(*) as pv,count(distinct remote_addr) as uv,count(distinct remote_addr) asipsfromods_weblog_detailwhere datestr ='20181101') a join(select count(distinct session) as vvs from ods_click_stream_visit where datestr ="20181101") b;
View Code
多维度分析--按照时间
方式一:直接在ods_weblog_detail单表上进行查询--计算该处理批次(一天)中的各小时pvs
drop tabledw_pvs_everyhour_oneday;create table dw_pvs_everyhour_oneday(month string,day string,hour string,pvs bigint) partitioned by(datestr string);insert into table dw_pvs_everyhour_oneday partition(datestr='20130918')select a.month as month,a.day as day,a.hour as hour,count(*) as pvs fromods_weblog_detail awhere a.datestr='20130918' group by a.month,a.day,a.hour;--计算每天的pvs
drop tabledw_pvs_everyday;create table dw_pvs_everyday(pvs bigint,month string,daystring);insert into tabledw_pvs_everydayselect count(*) as pvs,a.month as month,a.day as day fromods_weblog_detail agroup by a.month,a.day;
方式二:与时间维表关联查询--维度:日
drop tabledw_pvs_everyday;create table dw_pvs_everyday(pvs bigint,month string,daystring);insert into tabledw_pvs_everydayselect count(*) as pvs,a.month as month,a.day as day from (select distinct month, day fromt_dim_time) ajoinods_weblog_detail bon a.month=b.month and a.day=b.day
group by a.month,a.day;--维度:月
drop tabledw_pvs_everymonth;create table dw_pvs_everymonth (pvs bigint,monthstring);insert into tabledw_pvs_everymonthselect count(*) as pvs,a.month from (select distinct month fromt_dim_time) ajoin ods_weblog_detail b on a.month=b.month group by a.month;--另外,也可以直接利用之前的计算结果。比如从之前算好的小时结果中统计每一天的
Insert into tabledw_pvs_everydaySelect sum(pvs) as pvs,month,day from dw_pvs_everyhour_oneday group by month,day having day='18';
View Code
按照referer、时间维度