其中dt_playtime在shell中的变量赋值为dt_playtime=`date -d "$dt" +"%Y-%m-%d"` ##需要传递为字符串,加引号才能被识别
## 先查苏打播放器的时间 原语句为:
SELECT
did,uuid,
if( cast(duration as float) is NULL, 0.0, cast(duration as float) ) as duration
FROM
mds_suda_video_playtime_hour -- 这边日志表,是播放器的日志表,“小时级别的播放器日志表”,一小时一个日志,按小时分区存的
WHERE dt='${hiveconf:dt_playtime}'
改写为
hive -e "SELECT suda_inter.did,
suda_inter.uuid,
max(suda_inter.duration) as suda_durseconds
FROM(
SELECT
did,uuid,
if( cast(duration as float) is NULL, 0.0, cast(duration as float) ) as duration
FROM
mds_suda_video_playtime_hour
WHERE dt='2020-07-16') suda_inter
GROUP BY suda_inter.did,suda_inter.uuid
limit 100;"
## 这上面的dt必须要加引号才能有结果。其中dt_playtime在shell中的变量赋值为dt_playtime=`date -d "$dt" +"%Y-%m-%d"` ##需要传递为字符串才能被识别 #f2817e2796dbf018 ef6814f2a063378aa020cfa6fb3e66e3 11.0 #e1d82800867fca03 fcefd08cd9083c43a7822a7a2cd169e2 6.0 #3ef38f5a2d17f708 8e1112b61d3e35fe9a977071c65400e6 37.0 ## 再查apache播放器的时间 ## 原语句为:
SELECT
device_id,uuid,req_id,
collect_set(play_duration) as play_duration, -- 播放时长
FROM
mds_apache_code_v3 -- web的后台日志表,web 端的所有行为数据日志都有,
WHERE dt='${hiveconf:dt}'
GROUP BY device_id,uuid,req_id,timestamp
--上面的dt传时间格式为20200716,可加引号可不加
hive -e "SELECT
device_id,uuid,
collect_set(play_duration) as play_duration
FROM mds_apache_code_v3
WHERE dt=20200716
GROUP BY device_id,uuid,req_id,timestamp
limit 100"
#上面的dt 是没有加引号的,但是有结果 #结果: 是有非空时长的,即apache这的duration 是有播放时长的 #000168c99f29f3b5 60920638655b3ff6b5c3103741081a97 [] #000168c99f29f3b5 fb3046a7d0743b15909d9a7872e651af ["9"] #00016cfb8a33e617 62a447a637553f4c85b3b9f642b0adce []
hive -e "SELECT
device_id,uuid,
collect_set(play_duration) as play_duration
FROM mds_apache_code_v3
WHERE dt='20200716'
GROUP BY device_id,uuid,req_id,timestamp
limit 100"
#加引号也有结果