4.3.12 DWD层事件表加载数据脚本
1)在hadoop102的/home/atguigu/bin目录下创建脚本
[atguigu@hadoop102 bin]$ vim dwd_event_log.sh
在脚本中编写如下内容
#!/bin/bash
定义变量方便修改
APP=gmall
hive=/opt/module/hive/bin/hive
如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n “$1” ] ;then
do_date=$1
else
do_date=date -d "-1 day" +%F
fi
sql="
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table “
A
P
P
"
.
d
w
d
d
i
s
p
l
a
y
l
o
g
P
A
R
T
I
T
I
O
N
(
d
t
=
′
APP".dwd_display_log PARTITION (dt='
APP".dwddisplaylogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’
.
k
v
.
a
c
t
i
o
n
′
)
a
c
t
i
o
n
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.action') action, get_json_object(event_json,'
.kv.action′)action,getjsonobject(eventjson,′.kv.goodsid’) goodsid,
get_json_object(event_json,’
.
k
v
.
p
l
a
c
e
′
)
p
l
a
c
e
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.place') place, get_json_object(event_json,'
.kv.place′)place,getjsonobject(eventjson,′.kv.extend1’) extend1,
get_json_object(event_json,'
.
k
v
.
c
a
t
e
g
o
r
y
′
)
c
a
t
e
g
o
r
y
,
s
e
r
v
e
r
t
i
m
e
f
r
o
m
"
.kv.category') category, server_time from "
.kv.category′)category,servertimefrom"APP”.dwd_base_event_log
where dt=’$do_date’ and event_name=‘display’;
insert overwrite table "
A
P
P
"
.
d
w
d
n
e
w
s
d
e
t
a
i
l
l
o
g
P
A
R
T
I
T
I
O
N
(
d
t
=
′
APP".dwd_newsdetail_log PARTITION (dt='
APP".dwdnewsdetaillogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’
.
k
v
.
e
n
t
r
y
′
)
e
n
t
r
y
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.entry') entry, get_json_object(event_json,'
.kv.entry′)entry,getjsonobject(eventjson,′.kv.action’) action,
get_json_object(event_json,’
.
k
v
.
g
o
o
d
s
i
d
′
)
g
o
o
d
s
i
d
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.goodsid') goodsid, get_json_object(event_json,'
.kv.goodsid′)goodsid,getjsonobject(eventjson,′.kv.showtype’) showtype,
get_json_object(event_json,’
.
k
v
.
n
e
w
s
s
t
a
y
t
i
m
e
′
)
n
e
w
s
s
t
a
y
t
i
m
e
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.news_staytime') news_staytime, get_json_object(event_json,'
.kv.newsstaytime′)newsstaytime,getjsonobject(eventjson,′.kv.loading_time’) loading_time,
get_json_object(event_json,’
.
k
v
.
t
y
p
e
1
′
)
t
y
p
e
1
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.type1') type1, get_json_object(event_json,'
.kv.type1′)type1,getjsonobject(eventjson,′.kv.category’) category,
server_time
from "
A
P
P
"
.
d
w
d
b
a
s
e
e
v
e
n
t
l
o
g
w
h
e
r
e
d
t
=
′
APP".dwd_base_event_log where dt='
APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘newsdetail’;
insert overwrite table “
A
P
P
"
.
d
w
d
l
o
a
d
i
n
g
l
o
g
P
A
R
T
I
T
I
O
N
(
d
t
=
′
APP".dwd_loading_log PARTITION (dt='
APP".dwdloadinglogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’
.
k
v
.
a
c
t
i
o
n
′
)
a
c
t
i
o
n
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.action') action, get_json_object(event_json,'
.kv.action′)action,getjsonobject(eventjson,′.kv.loading_time’) loading_time,
get_json_object(event_json,’
.
k
v
.
l
o
a
d
i
n
g
w
a
y
′
)
l
o
a
d
i
n
g
w
a
y
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.loading_way') loading_way, get_json_object(event_json,'
.kv.loadingway′)loadingway,getjsonobject(eventjson,′.kv.extend1’) extend1,
get_json_object(event_json,’
.
k
v
.
e
x
t
e
n
d
2
′
)
e
x
t
e
n
d
2
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.extend2') extend2, get_json_object(event_json,'
.kv.extend2′)extend2,getjsonobject(eventjson,′.kv.type’) type,
get_json_object(event_json,'
.
k
v
.
t
y
p
e
1
′
)
t
y
p
e
1
,
s
e
r
v
e
r
t
i
m
e
f
r
o
m
"
.kv.type1') type1, server_time from "
.kv.type1′)type1,servertimefrom"APP”.dwd_base_event_log
where dt=’$do_date’ and event_name=‘loading’;
insert overwrite table "
A
P
P
"
.
d
w
d
a
d
l
o
g
P
A
R
T
I
T
I
O
N
(
d
t
=
′
APP".dwd_ad_log PARTITION (dt='
APP".dwdadlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’
.
k
v
.
e
n
t
r
y
′
)
e
n
t
r
y
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.entry') entry, get_json_object(event_json,'
.kv.entry′)entry,getjsonobject(eventjson,′.kv.action’) action,
get_json_object(event_json,’
.
k
v
.
c
o
n
t
e
n
t
′
)
c
o
n
t
e
n
t
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.content') content, get_json_object(event_json,'
.kv.content′)content,getjsonobject(eventjson,′.kv.detail’) detail,
get_json_object(event_json,’
.
k
v
.
s
o
u
r
c
e
′
)
a
d
s
o
u
r
c
e
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.source') ad_source, get_json_object(event_json,'
.kv.source′)adsource,getjsonobject(eventjson,′.kv.behavior’) behavior,
get_json_object(event_json,’
.
k
v
.
n
e
w
s
t
y
p
e
′
)
n
e
w
s
t
y
p
e
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.newstype') newstype, get_json_object(event_json,'
.kv.newstype′)newstype,getjsonobject(eventjson,′.kv.show_style’) show_style,
server_time
from "
A
P
P
"
.
d
w
d
b
a
s
e
e
v
e
n
t
l
o
g
w
h
e
r
e
d
t
=
′
APP".dwd_base_event_log where dt='
APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘ad’;
insert overwrite table "
A
P
P
"
.
d
w
d
n
o
t
i
f
i
c
a
t
i
o
n
l
o
g
P
A
R
T
I
T
I
O
N
(
d
t
=
′
APP".dwd_notification_log PARTITION (dt='
APP".dwdnotificationlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’
.
k
v
.
a
c
t
i
o
n
′
)
a
c
t
i
o
n
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.action') action, get_json_object(event_json,'
.kv.action′)action,getjsonobject(eventjson,′.kv.noti_type’) noti_type,
get_json_object(event_json,’
.
k
v
.
a
p
t
i
m
e
′
)
a
p
t
i
m
e
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.ap_time') ap_time, get_json_object(event_json,'
.kv.aptime′)aptime,getjsonobject(eventjson,′.kv.content’) content,
server_time
from "
A
P
P
"
.
d
w
d
b
a
s
e
e
v
e
n
t
l
o
g
w
h
e
r
e
d
t
=
′
APP".dwd_base_event_log where dt='
APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘notification’;
insert overwrite table "
A
P
P
"
.
d
w
d
a
c
t
i
v
e
f
o
r
e
g
r
o
u
n
d
l
o
g
P
A
R
T
I
T
I
O
N
(
d
t
=
′
APP".dwd_active_foreground_log PARTITION (dt='
APP".dwdactiveforegroundlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’
.
k
v
.
p
u
s
h
i
d
′
)
p
u
s
h
i
d
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.push_id') push_id, get_json_object(event_json,'
.kv.pushid′)pushid,getjsonobject(eventjson,′.kv.access’) access,
server_time
from "
A
P
P
"
.
d
w
d
b
a
s
e
e
v
e
n
t
l
o
g
w
h
e
r
e
d
t
=
′
APP".dwd_base_event_log where dt='
APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘active_foreground’;
insert overwrite table “
A
P
P
"
.
d
w
d
a
c
t
i
v
e
b
a
c
k
g
r
o
u
n
d
l
o
g
P
A
R
T
I
T
I
O
N
(
d
t
=
′
APP".dwd_active_background_log PARTITION (dt='
APP".dwdactivebackgroundlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,'
.
k
v
.
a
c
t
i
v
e
s
o
u
r
c
e
′
)
a
c
t
i
v
e
s
o
u
r
c
e
,
s
e
r
v
e
r
t
i
m
e
f
r
o
m
"
.kv.active_source') active_source, server_time from "
.kv.activesource′)activesource,servertimefrom"APP”.dwd_base_event_log
where dt=’$do_date’ and event_name=‘active_background’;
insert overwrite table "
A
P
P
"
.
d
w
d
c
o
m
m
e
n
t
l
o
g
P
A
R
T
I
T
I
O
N
(
d
t
=
′
APP".dwd_comment_log PARTITION (dt='
APP".dwdcommentlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’
.
k
v
.
c
o
m
m
e
n
t
i
d
′
)
c
o
m
m
e
n
t
i
d
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.comment_id') comment_id, get_json_object(event_json,'
.kv.commentid′)commentid,getjsonobject(eventjson,′.kv.userid’) userid,
get_json_object(event_json,’
.
k
v
.
p
c
o
m
m
e
n
t
i
d
′
)
p
c
o
m
m
e
n
t
i
d
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.p_comment_id') p_comment_id, get_json_object(event_json,'
.kv.pcommentid′)pcommentid,getjsonobject(eventjson,′.kv.content’) content,
get_json_object(event_json,’
.
k
v
.
a
d
d
t
i
m
e
′
)
a
d
d
t
i
m
e
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.addtime') addtime, get_json_object(event_json,'
.kv.addtime′)addtime,getjsonobject(eventjson,′.kv.other_id’) other_id,
get_json_object(event_json,’
.
k
v
.
p
r
a
i
s
e
c
o
u
n
t
′
)
p
r
a
i
s
e
c
o
u
n
t
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.praise_count') praise_count, get_json_object(event_json,'
.kv.praisecount′)praisecount,getjsonobject(eventjson,′.kv.reply_count’) reply_count,
server_time
from "
A
P
P
"
.
d
w
d
b
a
s
e
e
v
e
n
t
l
o
g
w
h
e
r
e
d
t
=
′
APP".dwd_base_event_log where dt='
APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘comment’;
insert overwrite table "
A
P
P
"
.
d
w
d
f
a
v
o
r
i
t
e
s
l
o
g
P
A
R
T
I
T
I
O
N
(
d
t
=
′
APP".dwd_favorites_log PARTITION (dt='
APP".dwdfavoriteslogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’
.
k
v
.
i
d
′
)
i
d
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.id') id, get_json_object(event_json,'
.kv.id′)id,getjsonobject(eventjson,′.kv.course_id’) course_id,
get_json_object(event_json,’
.
k
v
.
u
s
e
r
i
d
′
)
u
s
e
r
i
d
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.userid') userid, get_json_object(event_json,'
.kv.userid′)userid,getjsonobject(eventjson,′.kv.add_time’) add_time,
server_time
from "
A
P
P
"
.
d
w
d
b
a
s
e
e
v
e
n
t
l
o
g
w
h
e
r
e
d
t
=
′
APP".dwd_base_event_log where dt='
APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘favorites’;
insert overwrite table “
A
P
P
"
.
d
w
d
p
r
a
i
s
e
l
o
g
P
A
R
T
I
T
I
O
N
(
d
t
=
′
APP".dwd_praise_log PARTITION (dt='
APP".dwdpraiselogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’
.
k
v
.
i
d
′
)
i
d
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.id') id, get_json_object(event_json,'
.kv.id′)id,getjsonobject(eventjson,′.kv.userid’) userid,
get_json_object(event_json,’
.
k
v
.
t
a
r
g
e
t
i
d
′
)
t
a
r
g
e
t
i
d
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.target_id') target_id, get_json_object(event_json,'
.kv.targetid′)targetid,getjsonobject(eventjson,′.kv.type’) type,
get_json_object(event_json,'
.
k
v
.
a
d
d
t
i
m
e
′
)
a
d
d
t
i
m
e
,
s
e
r
v
e
r
t
i
m
e
f
r
o
m
"
.kv.add_time') add_time, server_time from "
.kv.addtime′)addtime,servertimefrom"APP”.dwd_base_event_log
where dt=’$do_date’ and event_name=‘praise’;
insert overwrite table "
A
P
P
"
.
d
w
d
e
r
r
o
r
l
o
g
P
A
R
T
I
T
I
O
N
(
d
t
=
′
APP".dwd_error_log PARTITION (dt='
APP".dwderrorlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’
.
k
v
.
e
r
r
o
r
B
r
i
e
f
′
)
e
r
r
o
r
B
r
i
e
f
,
g
e
t
j
s
o
n
o
b
j
e
c
t
(
e
v
e
n
t
j
s
o
n
,
′
.kv.errorBrief') errorBrief, get_json_object(event_json,'
.kv.errorBrief′)errorBrief,getjsonobject(eventjson,′.kv.errorDetail’) errorDetail,
server_time
from "
A
P
P
"
.
d
w
d
b
a
s
e
e
v
e
n
t
l
o
g
w
h
e
r
e
d
t
=
′
APP".dwd_base_event_log where dt='
APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘error’;
"
h
i
v
e
−
e
"
hive -e "
hive−e"sql"
2)增加脚本执行权限
[atguigu@hadoop102 bin]$ chmod 777 dwd_event_log.sh
3)脚本使用
[atguigu@hadoop102 module]$ dwd_event_log.sh 2019-02-11
4)查询导入结果
hive (gmall)>
select * from dwd_comment_log where dt=‘2019-02-11’ limit 2;
5)脚本执行时间
企业开发中一般在每日凌晨30分~1点