判断星期几:
select
cast(date_format('$[YYYY-MM-DD - 1D]', 'u') as int)
判断当前时间是几点:
select substr(from_unixtime(unix_timestamp(), 'yyyy-MM-dd HH:mm:ss'),12,2)
多个文件名同列名去重:
select
count(
distinct case
when t1.user_id is not null then t1.user_id
when t2.user_id is not null then t2.user_id
end
) as user_cnt
from
(
select
user_id
from
table_1
) t1
left join (
select
user_id
from
table_2
) t2 on t1.user_id = t2.user_id;
分位数提取:
percentile(sale_cnt,0.5) as rate5,
percentile(sale_cnt,0.999) as rate9_9_9
笔记:
正则表达式替换顺序
1. 先替换空格:regexp_replace(t14.name, '[\\s\\r\\n\\t]+', '')
2. 替换复杂的括号:regexp_replace([X], '\\((?:[^\\)]*)\\)|((?:[^)]*))|\\[(?:[^\\]]*)\\]|【(?:[^】]*)】|“(?:[^”]*)”', '')
3. 替换带单位的数字,替换时注意复杂的单位在前:regexp_replace([X], '(?:\\d+(?:\\.\\d+)?(?:小袋|支装|袋|米|卷|把|双|件|抽|层|贴|杯|只|号|套|组|本|片|条|筒|包|份|个|盒|支|盆|箱|瓶|听|罐|克|圈|颗|kg|mm|cm|ml|m|g|l))+', '')
4. 替换规格(每XX):regexp_replace(t14.name, '\d*/.+', '')
5. 替换进行运算的数字,包含半运算(15-):regexp_replace([X], '\\d+(?:\\.\\d+)?[\\+±\\-\\*x](\\d+(?:\\.\\d+)?)?', '')
6. 替换宣传语:regexp_replace([X], '秒杀!*|加粗|加厚|(?:花型|颜色)+随机|温和滋润|大号', '')
7. 替换特殊符号:regexp_replace([X], '[\\+±\\-\\*]+', '')
substr(regexp_replace(regexp_replace(regexp_replace(wkt,'[POLYGON|\\(|\\)]',''),', ',','),' ',','),2) as polygon
MD5加密:
whole_dw.data_masking(passenger_phone)
随机生成0~9:
select
uid,
round(Rand() * 9, 0) as rand_id
from
table_1
where
dt = '2021-06-17'
limit
1000;
距离函数:
HTW_DW.distance_from_lat_lng(poi_lat,poi_lng, lat, lng) as distance
分割函数:
select
split('2020,23,23', ',') [0] as c2,
split('2020,23,23', ',') [1] as c3,
split('2020,23,24', ',') [2] as c4;
size+collect_set
size(
collect_set(
case
when dt > active_date
and dt <= date_add(active_date, 7) then dt
else null
end
)
) as pre_7d_business_days
日期维表:
-- pmod
-- https://blog.csdn.net/weixin_38750084/article/details/103032383
-- pmod(int a, int b)
-- pmod(double a, double b)
-- 返回a除b的余数的绝对值。
select
`date`,
year(`date`),
month(`date`),
day(`date`),
case
when pmod(datediff(`date`, '1900-01-08'), 7) + 1 = 1 then '星期一'
when pmod(datediff(`date`, '1900-01-08'), 7) + 1 = 2 then '星期二'
when pmod(datediff(`date`, '1900-01-08'), 7) + 1 = 3 then '星期三'
when pmod(datediff(`date`, '1900-01-08'), 7) + 1 = 4 then '星期四'
when pmod(datediff(`date`, '1900-01-08'), 7) + 1 = 5 then '星期五'
when pmod(datediff(`date`, '1900-01-08'), 7) + 1 = 6 then '星期六'
when pmod(datediff(`date`, '1900-01-08'), 7) + 1 = 7 then '星期日'
else ''
end as day_of_week,
weekofyear(`date`) as week_of_year,
date_sub(`date`, pmod(datediff(`date`, '1900-01-08'), 7)) as first_date_of_week,
date_add(`date`, 7 - pmod(datediff(`date`, '1900-01-08'), 7) -1) as last_date_of_week
from
dim_day_table
星期:
-- 求当前日期的下周一,上周一等等
select next_day(current_date, 'Monday') --当前日期的下周一
select date_sub(next_day(current_date, 'Monday'), 14) --当前日期的下周一,减去7天为本周一,减去14天为上周一
lateral view:
-- #tag_values以字符串形式存储
select
id,
tags.tag
from
table_1 lateral view explode(split(tag_values, ",")) tags as tag
where
dt = date_sub(current_date(), 1);
select bin(4)
select
coalesce(a.dt, b.stat_date, c.dt) as dt;