web网站访问日志(四) 根据业务需求 书写hive sql
1.ods层(事实表)建表语句
原始数据表:对应mr清洗完之后的数据,而不是原始日志数据
drop table if exists ods_weblog_origin;
create table ods_weblog_origin(
valid string,
remote_addr string,
remote_user string,
time_local string,
request string,
status string,
body_bytes_sent string,
http_referer string,
http_user_agent string)
partitioned by (datestr string)
row format delimited
fields terminated by ‘\001’;
点击流pageview表
drop table if exists ods_click_pageviews;
create table ods_click_pageviews(
session string,
remote_addr string,
remote_user string,
time_local string,
request string,
visit_step string,
page_staylong string,
http_referer string,
http_user_agent string,
body_bytes_sent string,
status string)
partitioned by (datestr string)
row format delimited
fields terminated by ‘\001’;
点击流visit表
drop table if exists ods_click_stream_visit;
create table ods_click_stream_visit(
session string,
remote_addr string,
inTime string,
outTime string,
inPage string,
outPage string,
referal string,
pageVisits int)
partitioned by (datestr string)
row format delimited
fields terminated by ‘\001’;
维度表示例:
drop table if exists t_dim_time;
create table t_dim_time(date_key int,year string,month string,day string,hour string) row format delimited fields terminated by ‘,’;
2.ods层数据导入
hdfs dfs -mkdir -p /weblog/preprocessed
hdfs dfs -mkdir -p /weblog/clickstream/pageviews
hdfs dfs -mkdir -p /weblog/clickstream/visits
hdfs dfs -mkdir -p /weblog/dim_time
hdfs dfs -put part-m-00000 /weblog/preprocessed
hdfs dfs -put part-r-00000 /weblog/clickstream/pageviews
hdfs dfs -put part-r-00000 /weblog/clickstream/visits
hdfs dfs -put dim_time_dat.txt /weblog/dim_time
set hive.exec.mode.local.auto=true;
导入清洗结果数据到贴源数据表ods_weblog_origin
load data inpath ‘/weblog/preprocessed/’ overwrite into table ods_weblog_origin partition(datestr=‘20130918’);
show partitions ods_weblog_origin;
select count(*) from ods_weblog_origin;
导入点击流模型pageviews数据到ods_click_pageviews表
load data local inpath ‘/weblog/clickstream/pageviews’ overwrite into table ods_click_pageviews partition(datestr=‘20130918’);
select count(*) from ods_click_pageviews;
导入点击流模型visit数据到ods_click_stream_visit表
load data inpath ‘/weblog/clickstream/visits’ overwrite into table ods_click_stream_visit partition(datestr=‘20130918’);