创建日期维表
--日粒度维表
drop table dim.dim_date;
CREATE TABLE IF NOT EXISTS dim.dim_date(
`datetimestr` STRING COMMENT '当前时间(yyyy-MM-dd HH:mm:ss)'
,`yearstr` STRING COMMENT '当前年份'
,`quarterid` BIGINT COMMENT '当前季度'
,`quarter_cn` STRING COMMENT '当前季度中文'
,`monthstr` STRING COMMENT '当前月份'
,`weekofyearid` BIGINT COMMENT '当年第几周'
,`dayofyearid` BIGINT COMMENT '当年第几天'
,`dayofmonthstr` STRING COMMENT '当月日期(dd)'
,`dayofweekid` BIGINT COMMENT '当前周几(0-6)'
,`dayofweek_cn` STRING COMMENT '当前周中文'
--,`hourstr` STRING COMMENT '当前小时(HH)'
--,`minutestr` STRING COMMENT '当前分钟(mm)'
,`workday_flag` BIGINT COMMENT '是否为工作日(1,是;0,不是)'
,`create_time` STRING COMMENT '数据创建日期(yyyy-MM-dd HH:mm:ss.SSS)'
)
COMMENT '日期粒度度维表'
--PARTITIONED BY(yearstr STRING COMMENT '当前年分区(yyyy)',monthstr STRING COMMENT '当前月分区(MM)')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
STORED AS ORC
TBLPROPERTIES('orc.compress' = 'SNAPPY','transactional'='false');
--日分钟粒度维表
drop table dim.dim_datetime;
CREATE TABLE IF NOT EXISTS dim.dim_datetime(
`timestr` STRING COMMENT '当前时间(HH:mm:ss)'
,`hourstr` STRING COMMENT '当前小时(HH)'
,`minutestr` STRING COMMENT '当前分钟(mm)'
)
COMMENT '日分钟粒度维表'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
STORED AS ORC
TBLPROPERTIES('orc.compress' = 'SNAPPY','transactional'='false');
生成数据
--生成日粒度维表数据
WITH datetime_t AS(
SELECT FROM_UNIXTIME(UNIX_TIMESTAMP('2000-01-01 00:00:00') + idx_t.pos*60) as datetimestr
FROM
(
SELECT
POSEXPLODE(
SPLIT(
REPEAT('o',CAST((UNIX_TIMESTAMP('2000-01-01 23:59:00') - UNIX_TIMESTAMP('2000-01-01 00:00:00'))/60 AS INT))
,'o'
)
)
) idx_t
),
date_t AS (
select date_add('2000-01-01',idx_t.pos) as datetimestr
from
(
select
posexplode(
split(
repeat('o',datediff(from_unixtime(unix_timestamp('2040-12-31 00:00:00'),'yyyy-MM-dd'), '2000-01-01'))
,'o'
)
)
) idx_t
),
tmp AS(
select
datetimestr AS datetimestr
,substr(datetimestr,1,4) AS yearstr
,CASE WHEN MONTH(datetimestr) <= 3 then 1
WHEN MONTH(datetimestr) <= 6 then 2
WHEN MONTH(datetimestr) <= 9 then 3
ELSE 4
END AS quarterid
,substr(datetimestr,6,2) AS monthstr
,WEEKOFYEAR(datetimestr) AS weekofyearid
,DATEDIFF(datetimestr,CONCAT(YEAR(datetimestr),'-01-01')) + 1 AS dayofyearid
,substr(datetimestr,9,2) AS dayofmonthstr
,PMOD(DATEDIFF(datetimestr, '2000-01-01')-1, 7) AS dayofweekid
--,substr(datetimestr,12,2) AS hourstr
--,substr(datetimestr,15,2) AS minutestr
FROM date_t
)
INSERT OVERWRITE TABLE dim.dim_date --PARTITION(yearstr,monthstr)
SELECT
datetimestr
,yearstr
,quarterid
,CASE WHEN quarterid = 1 then '第一季度'
WHEN quarterid = 2 then '第二季度'
WHEN quarterid = 3 then '第三季度'
WHEN quarterid = 4 then '第四季度'
END AS quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,CASE WHEN dayofweekid = 0 then '星期日'
WHEN dayofweekid = 1 then '星期一'
WHEN dayofweekid = 2 then '星期二'
WHEN dayofweekid = 3 then '星期三'
WHEN dayofweekid = 4 then '星期四'
WHEN dayofweekid = 5 then '星期五'
WHEN dayofweekid = 6 then '星期六'
END AS dayofweek_cn
--,hourstr
--,minutestr
,CASE WHEN dayofweekid = 0 or dayofweekid = 6 then 0
ELSE 1
END AS workday_flag
,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8') AS create_time
--,yearstr
--,monthstr
FROM tmp;
--生成分钟粒度维表数据
WITH datetime_t AS(
SELECT FROM_UNIXTIME(UNIX_TIMESTAMP('2000-01-01 00:00:00') + idx_t.pos*60) as datetimestr
FROM
(
SELECT
POSEXPLODE(
SPLIT(
REPEAT('o',CAST((UNIX_TIMESTAMP('2000-01-01 23:59:00') - UNIX_TIMESTAMP('2000-01-01 00:00:00'))/60 AS INT))
,'o'
)
)
) idx_t
),
tmp AS(
select
substr(datetimestr,12) AS timestr
,substr(datetimestr,12,2) AS hourstr
,substr(datetimestr,15,2) AS minutestr
FROM datetime_t
)
INSERT OVERWRITE TABLE dim.dim_datetime
select
timestr
,hourstr
,minutestr
from tmp
变更节假日
变更节假日sql
INSERT INTO TABLE dim.dim_date
SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,0 AS workday_flag --变更为非工作日
--,1 AS workday_flag --变更为工作日
,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8') AS create_time
FROM dim.dim_date
where yearstr='2000' and monthstr='01' and dayofmonthstr='31';
变更节假日脚本
变更节假日脚本(单个)
#/bin/bash env
#update_workday_flag.sh
USAGE="Usage: $0 [yearstr] [monthstr] [dayofmonthstr] [workday_flag]
e.g: $0 2000 01 01 0"
function main(){
if [[ $# -lt 4 ]] ; then
echo "${USAGE}";
exit 1;
fi
SQL_UPDATE="INSERT INTO TABLE cdm.dim_date
SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,${4} AS workday_flag
,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8') AS create_time
FROM cdm.dim_date
where yearstr='${1}' and monthstr='${2}' and dayofmonthstr='${3}'"
SQL_QUERY="SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,workday_flag
,create_time
FROM cdm.dim_date
where yearstr='${1}' and monthstr='${2}' and dayofmonthstr='${3}'"
beeline --silent=true -n hive -p 123456 -u jdbc:hive2://hadoop01:10000/cdm -e "${SQL_UPDATE};${SQL_QUERY}"
}
main "$@"
变更节假日脚本(批量)
#/bin/bash env
#update_workday_flag_batch.sh
USAGE="agrs: [yearstr] [monthstr] [dayofmonthstr] [workday_flag]
e.g: 2000 01 01 0"
function generate_sql(){
if [[ $# -lt 4 ]] ; then
echo "Invalid arguments !!!"
echo "${USAGE}";
exit 1;
fi
SQL_UPDATE="INSERT INTO TABLE cdm.dim_date
SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,${4} AS workday_flag
,FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP,'GMT+8') AS create_time
FROM cdm.dim_date
where yearstr='${1}' and monthstr='${2}' and dayofmonthstr='${3}'"
SQL_QUERY="SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,workday_flag
,create_time
FROM cdm.dim_date
where yearstr='${1}' and monthstr='${2}' and dayofmonthstr='${3}'"
}
for line in `cat days_*.csv`
do
OLD_IFS="${IFS}"
IFS=","
ARR=(${line})
IFS="{OLD_IFS}"
generate_sql ${ARR[@]}
SQL="${SQL};${SQL_UPDATE};${SQL_QUERY}"
done
beeline --silent=true -n hive -p 123456 -u jdbc:hive2://hadoop01:10000/cdm -e "${SQL}"
变更日期数据
tee days_2022.csv <<EOF
2022,01,03,0
2022,01,29,1
2022,01,30,1
2022,01,31,0
2022,02,01,0
2022,02,02,0
2022,02,03,0
2022,02,04,0
2022,04,02,1
2022,04,04,0
2022,04,05,0
2022,04,24,1
2022,05,02,0
2022,05,03,0
2022,05,04,0
2022,05,07,1
2022,06,03,0
2022,09,12,0
2022,10,03,0
2022,10,04,0
2022,10,05,0
2022,10,06,0
2022,10,07,0
2022,10,08,1
2022,10,09,1
EOF
使用日期维表
--使用维表(日期粒度)
WITH tmp AS(
SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,workday_flag
,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num
FROM dim.dim_date
--WHERE yearstr='yyyy' and monthstr='MM'
WHERE yearstr='2000' and monthstr='01'
)
SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,workday_flag
FROM tmp
WHERE num=1
--使用维表(小时粒度)
WITH tmp AS(
SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,workday_flag
,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num
FROM dim.dim_date
--WHERE yearstr='yyyy' and monthstr='MM'
WHERE yearstr='2000' and monthstr='01'
),
date_t AS(
SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,workday_flag
FROM tmp
WHERE num=1
),
datetime_t AS(
SELECT
hourstr
FROM dim.dim_datetime
group by hourstr
)
select
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,b.weekofyearid
,b.dayofyearid
,b.dayofmonthstr
,b.dayofweekid
,b.dayofweek_cn
,a.hourstr
,b.workday_flag
from datetime_t a,date_t b
--使用维表(分钟粒度)
WITH tmp AS(
SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,workday_flag
,DENSE_RANK() OVER(PARTITION BY yearstr,monthstr,dayofmonthstr ORDER BY create_time DESC) AS num
FROM dim.dim_date
--WHERE yearstr='yyyy' and monthstr='MM'
WHERE yearstr='2000' and monthstr='01'
),
date_t AS(
SELECT
datetimestr
,yearstr
,quarterid
,quarter_cn
,monthstr
,weekofyearid
,dayofyearid
,dayofmonthstr
,dayofweekid
,dayofweek_cn
,workday_flag
FROM tmp
WHERE num=1
)
select
concat(b.datetimestr," ",a.timestr) AS datetimestr
,b.yearstr
,b.quarterid
,b.quarter_cn
,b.monthstr
,b.weekofyearid
,b.dayofyearid
,b.dayofmonthstr
,b.dayofweekid
,b.dayofweek_cn
,a.hourstr
,a.minutestr
,b.workday_flag
from dim.dim_datetime a,date_t b