计算数据库中的数据之后,以键值对的形式存储在一个表里面

技术思路整理:
【1】先根据表中数据计算出需要的值,存储在临时表里面,比如计算日活跃度的语句:

with tbl_dau as 
(
select 'dau' as name, count(distinct deviceid) as cnt
from
(
select deviceid from src_huidu_zl.event3 where hdfs_par = '20170108'
union all
select deviceid from src_huidu_zl.page3 where hdfs_par = '20170108'
) as tmp
)


另外,计算历史峰值的逻辑比较复杂,例子如下:
historymaxnewdev as(
select if(regexp_extract(json_value, '\"maxnewdev\"\:(\\d+(\\.\\d+)*)',1)='','1',regexp_extract(json_value, '\"maxnewdev\"\:(\\d+(\\.\\d+)*)',1)) as historymaxvalue,if(regexp_extract(json_value, '\"maxnewdev\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)='','19710101',regexp_extract(json_value, '\"maxnewdev\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)) as historydate from ana_fx_goal.zl_daily_kv where json_key = 'daily_static' and hdfs_par='20170108'
)

该行语句先从zl_daily_kv 表里面读取出历史峰值,通过正则表达式提取出新设备的历史峰值(提取前的格式为:677777||20161026,前面的数值可能带小数点),然后将历史峰值与今天的新数据对比,找出新的最大值,并更新相应的峰值。

【2】拼接JSON字符串

【3】以‘daily_static’为键值存储第二步的结果

下面是完整的程序:

    
insert overwrite ana_fx_goal.zl_daily_kv partition(hdfs_par='20170108')
select * from
(
select cast(
concat(substr('20170108', 1, 4), '-', substr('20170108', 5, 2), '-', substr('20170108', 7, 2)) as timestamp) as day_begin,
NOW() as generate_time,
'daily_static' as json_key,
json_output as json_value
from
(
select concat( '{',
group_concat(
concat(wrapped_column_key, ':', wrapped_column_value),
','),
'}'
) as json_output
from
(
select
concat('"', name, '"') as wrapped_column_key,
case
when cnt is null then 'null'
when 'double' = 'string' then concat('"', cast(cnt as string), '"')
else cast(cnt as string)
end as wrapped_column_value
from
(
with tbl_dau as
(
select 'dau' as name, count(distinct deviceid) as cnt
from
(
select deviceid from src_huidu_zl.event3 where hdfs_par = '20170108'
union all
select deviceid from src_huidu_zl.page3 where hdfs_par = '20170108'
) as tmp
),
tbl_launchrecords as
(
select 'launchrecords' as name, count(*) as cnt
from
src_huidu_zl.launch2
where hdfs_par = '20170108'
),
tbl_newdev as
(
select 'newdev' as name, count(distinct deviceid) as cnt
from ana_fx_middle.devappear2
where first_appear_hdfspar = '20170108' and hdfs_par = '20170108'
),
tbl_avgusetime as
(
select 'avgusetime' as name, avg(use_time) as cnt
from src_huidu_zl.launch2
where hdfs_par = '20170108'
),
tbl_avgdevtime as
(
select 'avgdevtime' as name, sum(use_time)/count(distinct deviceid) as cnt
from src_huidu_zl.launch2
where hdfs_par = '20170108'
),
tbl_alldev as
(
select 'alldev' as name, count(distinct deviceid) as cnt
from ana_fx_middle.devappear2
where hdfs_par = '20170108'
),
tbl_mau as
(
select 'mau' as name, count(distinct deviceid) as cnt
from
(
select deviceid from src_huidu_zl.event3 where hdfs_par between '20161211' and '20170108'
union all
select deviceid from src_huidu_zl.page3 where hdfs_par between '20161211' and '20170108'
) as tmp
),
tbl_avglaunchrecords as
(
select 'avglaunchrecords' as name, count(*)/count(distinct deviceid) as cnt
from src_huidu_zl.launch2
where hdfs_par = '20170108'
),
tbl_launchdays as
(
select sum(cnt) as cnt
from
(
select hdfs_par, count(distinct deviceid) as cnt
from src_huidu_zl.launch2
where hdfs_par between '20161211' and '20170108'
group by hdfs_par
) as tmp
),
tbl_launchdevs as
(
select count(distinct deviceid) as cnt
from src_huidu_zl.launch2
where hdfs_par between '20161211' and '20170108'
),
tbl_28new7retain as
(
select '28new7retain' as name, count(distinct tbl_7day.deviceid) as cnt
from
(
select deviceid from src_huidu_zl.event3 where hdfs_par between '20170101' and '20170108'
union all
select deviceid from src_huidu_zl.page3 where hdfs_par between '20170101' and '20170108'
) as tbl_7day
left join
(
select distinct deviceid as deviceid
from ana_fx_middle.devappear2
where first_appear_hdfspar = '20161211'
) as tbl_28day
on tbl_7day.deviceid = tbl_28day.deviceid
where tbl_28day.deviceid != 'NULL'
),
tbl_28new as
(
select '28new' as name, count(distinct deviceid) as cnt
from ana_fx_middle.devappear2
where first_appear_hdfspar = '20161211'
),
historymaxdauvalue as(
select if(regexp_extract(json_value, '\"maxdau\"\:(\\d+(\\.\\d+)*)',1)='','1',regexp_extract(json_value, '\"maxdau\"\:(\\d+(\\.\\d+)*)',1)) as historymaxvalue,if(regexp_extract(json_value, '\"maxdau\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)='','19710101',regexp_extract(json_value, '\"maxdau\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)) as historydate from ana_fx_goal.zl_daily_kv where json_key = 'daily_static' and hdfs_par='20170108'
),
historymaxlaunchrecords as(
select if(regexp_extract(json_value, '\"maxlaunchrecords\"\:(\\d+(\\.\\d+)*)',1)='','1',regexp_extract(json_value, '\"maxlaunchrecords\"\:(\\d+(\\.\\d+)*)',1)) as historymaxvalue,if(regexp_extract(json_value, '\"maxlaunchrecords\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)='','19710101',regexp_extract(json_value, '\"maxlaunchrecords\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)) as historydate from ana_fx_goal.zl_daily_kv where json_key = 'daily_static' and hdfs_par='20170108'
),
historymaxnewdev as(
select if(regexp_extract(json_value, '\"maxnewdev\"\:(\\d+(\\.\\d+)*)',1)='','1',regexp_extract(json_value, '\"maxnewdev\"\:(\\d+(\\.\\d+)*)',1)) as historymaxvalue,if(regexp_extract(json_value, '\"maxnewdev\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)='','19710101',regexp_extract(json_value, '\"maxnewdev\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)) as historydate from ana_fx_goal.zl_daily_kv where json_key = 'daily_static' and hdfs_par='20170108'
),
historymaxavgusetime as (
select if(regexp_extract(json_value, '\"maxavgusetime\"\:(\\d+(\\.\\d+)*)',1)='','1',regexp_extract(json_value, '\"maxavgusetime\"\:(\\d+(\\.\\d+)*)',1)) as historymaxvalue,if(regexp_extract(json_value, '\"maxavgusetime\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)='','19710101',regexp_extract(json_value, '\"maxavgusetime\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)) as historydate from ana_fx_goal.zl_daily_kv where json_key = 'daily_static' and hdfs_par='20170108'
),
historymaxavgdevtime as (
select if(regexp_extract(json_value, '\"maxavgdevtime\"\:(\\d+(\\.\\d+)*)',1)='','1',regexp_extract(json_value, '\"maxavgdevtime\"\:(\\d+(\\.\\d+)*)',1)) as historymaxvalue,if(regexp_extract(json_value, '\"maxavgdevtime\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)='','19710101',regexp_extract(json_value, '\"maxavgdevtime\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)) as historydate from ana_fx_goal.zl_daily_kv where json_key = 'daily_static' and hdfs_par='20170108'
)
select name,cast(cnt as string) as cnt from tbl_dau
union all
select name,cast(cnt as string) as cnt from tbl_launchrecords
union all
select name,cast(cnt as string) as cnt from tbl_newdev
union all
select name,cast(cnt as string) as cnt from tbl_avgusetime
union all
select name,cast(cnt as string) as cnt from tbl_avgdevtime
union all
select name,cast(cnt as string) as cnt from tbl_alldev
union all
select name,cast(cnt as string) as cnt from tbl_mau
union all
select name,cast(cnt as string) as cnt from tbl_avglaunchrecords
union all
select 'avglaunchdays' as name, cast((tbl_launchdays.cnt/tbl_launchdevs.cnt) as string) as cnt from tbl_launchdays, tbl_launchdevs
union all
select 'retainratio' as name, cast((tbl_28new7retain.cnt/tbl_28new.cnt) as string) as cnt from tbl_28new, tbl_28new7retain
union all
select 'maxdau' as name, if(tbl_dau.cnt>cast(historymaxdauvalue.historymaxvalue as bigint),concat(cast(tbl_dau.cnt as string),'||','20170108'),concat(historymaxdauvalue.historymaxvalue,'||',historydate)) as cnt from tbl_dau,historymaxdauvalue
union all
select 'maxlaunchrecords' as name, if(tbl_launchrecords.cnt>cast(historymaxlaunchrecords.historymaxvalue as bigint),concat(cast(tbl_launchrecords.cnt as string),'||','20170108'),concat(historymaxlaunchrecords.historymaxvalue,'||',historydate)) as cnt from tbl_launchrecords,historymaxlaunchrecords
union all
select 'maxnewdev' as name, if(tbl_newdev.cnt>cast(historymaxnewdev.historymaxvalue as bigint),concat(cast(tbl_newdev.cnt as string),'||','20170108'),concat(historymaxnewdev.historymaxvalue,'||',historydate)) as cnt from tbl_newdev,historymaxnewdev
union all
select 'maxavgusetime' as name, if(tbl_avgusetime.cnt>cast(historymaxavgusetime.historymaxvalue as bigint),concat(cast(tbl_avgusetime.cnt as string),'||','20170108'),concat(historymaxavgusetime.historymaxvalue,'||',historydate)) as cnt from tbl_avgusetime,historymaxavgusetime
union all
select 'maxavgusetime' as name, if(tbl_avgdevtime.cnt>cast(historymaxavgdevtime.historymaxvalue as bigint),concat(cast(tbl_avgdevtime.cnt as string),'||','20170108'),concat(historymaxavgdevtime.historymaxvalue,'||',historydate)) as cnt from tbl_avgdevtime,historymaxavgdevtime



) as tmp1_B
) as tmp2_B




) as tmp
limit 1
) as t1
union all
select * from
(
select
day_begin,
generate_time,
json_key,
json_value
from ana_fx_goal.zl_daily_kv
where hdfs_par = '20170108'
and json_key != 'daily_static'
) as t2
;
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
1. 关系型数据:关系型数据库是指采用了关系模型来组织数据数据库系统。它通过格的形式数据,每个格包含多个行和多个列。行数据记录,列数据属性。关系型数据库最大的优点是数据结构清晰、易于理解和管理,广泛应用于企业级应用。 2. 数据数据库是一种非关系型数据库,它的数据结构是由一个和一个组成的简单数据结构,对之间没有固定的关系。这种数据库适合于存储非结构化数据和缓存数据,比如Web应用的Session数据和缓存。 3. 列式数据:列式数据库是一种非关系型数据库,它将数据按列存储,而不是按行存储。列式数据库适用于对大量数据进行聚合计算和分析,因为它可以快速地读取一列数据,并对该列进行操作。列式数据库数据仓库、报告和分析等场景得到广泛应用。 4. 图数据:图数据库是一种非关系型数据库,它的数据结构是由节点和边组成的图形数据结构。图数据库适用于存储复杂的关系型数据,比如社交网络、物流网络、知识图谱等。图数据库可以快速地查询节点之间的关系和路径,以及进行复杂的图形数据分析。 5. 文档数据:文档数据库是一种非关系型数据库,它采用了类似于JSON的文档格式来存储数据。文档数据库适用于存储半结构化和非结构化的数据,比如日志、博客、文章等。文档数据库支持丰富的查询和索引功能,可以快速地查找和处理文档数据

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值