前言
目前数仓是基于MaxCompute实现,基于时间维表的实现每个公司方式不一样,之前公司是通过PY封装存储过程,通过函数获取相应时间维度,为了方便后续开发人员,落地一张时间维表
提示:以下是本篇文章正文内容,下面案例可供参考
一、时间维表模型设计
二、SQL实现
代码如下(示例):
insert overwrite table tb_dim_pub_date_info_yi partition (yt = '${years}')
select date_format(date_id,'yyyyMMdd') day_id,
date_id day_id_iso,
date_format(date_id,'yyyy')||'年'||date_format(date_id,'MM')||'月'||date_format(date_id,'dd')||'日' day_id_desc,
date_format(date_id,'yyyyMM') month_id,
date_format(date_id,'yyyy-MM') month_id_iso,
date_format(date_id,'yyyy')||'年'||date_format(date_id,'MM')||'月' month_id_desc,
date_format(date_id,'yyyy')||'年' year_desc,
weekofyear(TO_DATE(date_id,"yyyy-MM-dd")) week_id,
case when date_format(date_id,'MM') = '01' and weekofyear(TO_DATE(date_id,"yyyy-MM-dd")) > 50
then to_char(cast(date_format(date_id,'yyyy') as int) - 1)||'年第'||weekofyear(TO_DATE(date_id,"yyyy-MM-dd"))||'周'
else date_format(date_id,'yyyy')||'年第'||weekofyear(TO_DATE(date_id,"yyyy-MM-dd"))||'周' end week_desc, --处理跨年周显示问题
case when date_format(date_id,'dd') <= 10 then 1
when date_format(date_id,'dd') >= 21 then 3
else 2 end ten_days,
weekday(TO_DATE(date_id,"yyyy-MM-dd"))+1 dt_week,
cast(date_format(date_id,'dd') as int) dt_month,
DATEDIFF(TO_DATE(date_id,"yyyy-MM-dd"),TO_DATE(date_format(date_id,'yyyy')||'-01'||'-01',"yyyy-MM-dd"))+1 dt_year,
case when date_id = date_format(lastday((TO_DATE(date_id,"yyyy-MM-dd"))),'yyyy-MM-dd') then 1
else 0 end is_lastday,
case when weekday(TO_DATE(date_id,"yyyy-MM-dd"))+1 = 6 or weekday(TO_DATE(date_id,"yyyy-MM-dd"))+1 = 7 then 1 else 0 end is_weekend,
null is_holiday,
quarter(TO_DATE(date_id,"yyyy-MM-dd")) quarter_id,
date_format(date_id,'yyyy')||'年第'||quarter(TO_DATE(date_id,"yyyy-MM-dd"))||'季度' quarter_desc,
date_format(date_id,'yyyyMM')||'01' day_month01,
date_format(date_id,'yyyy-MM')||'-01' day_month01_iso,
date_format(lastday((TO_DATE(date_id,"yyyy-MM-dd"))),'yyyyMMdd') day_monthend,
date_format(lastday((TO_DATE(date_id,"yyyy-MM-dd"))),'yyyy-MM-dd') day_monthend_iso,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-1,'dd'),'yyyyMMdd') day_last01,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-1,'dd'),'yyyy-MM-dd') day_last01_iso,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-3,'dd'),'yyyyMMdd') day_last03,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-3,'dd'),'yyyy-MM-dd') day_last03_iso,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-7,'dd'),'yyyyMMdd') day_last07,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-7,'dd'),'yyyy-MM-dd') day_last07_iso,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-30,'dd'),'yyyyMMdd') day_last30,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-30,'dd'),'yyyy-MM-dd') day_last30_iso,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-1,'MM'),'yyyyMMdd') day_lastmonth,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-1,'MM'),'yyyy-MM-dd') day_lastmonth_iso,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-2,'MM'),'yyyyMMdd') day_last2month,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-2,'MM'),'yyyy-MM-dd') day_last2month_iso,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-3,'MM'),'yyyyMMdd') day_last3month,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-3,'MM'),'yyyy-MM-dd') day_last3month_iso,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-12,'MM'),'yyyyMMdd') day_lastyear,
date_format(dateadd(TO_DATE(date_id,"yyyy-MM-dd"),-12,'MM'),'yyyy-MM-dd') day_lastyear_iso,
date_format(date_id,'yyyy')||'01'||'01' day_year01,
date_format(date_id,'yyyy')||'-01'||'-01' day_year01_iso,
date_format(date_id,'yyyy')||'12'||'31' day_yearend,
date_format(date_id,'yyyy')||'-12'||'-31' day_yearend_iso,
case when date_format(date_id,'dd') between '01' and '07' then 1
when date_format(date_id,'dd') between '08' and '14' then 2
when date_format(date_id,'dd') between '15' and '21' then 3
else 4 end month_weeks
from (select date_format(DATEADD(TO_DATE("${years}"||"-01-01","yyyy-MM-dd"),a.pos,"dd"),'yyyy-MM-dd') as date_id
from (select posexplode(split(repeat("x",DATEDIFF(TO_DATE("${years}"||"-12-31","yyyy-MM-dd"),TO_DATE("${years}"||"-01-01","yyyy-MM-dd")))||"a","x"))) a);
此处MaxCompute(下称MC)是hive兼容模式,部分函数是hive的用法,MC文档中没有提到的函数也是可以使用的,需要自己摸索,split函数默认空数组分割是空,所以拼接了一个字符
总结
各种拼接实现,很LOW很好用哟!!!!