Presto SQL常用函数
时间函数
时间类型
-- date
select date '2012-08-08' + interval '2' day -- date 2012-08-10
-- time
select time '01:00:00' - interval '3' hour -- time(3) 22:00:00
-- timestamp
select timestamp '2020-01-04 00:00:00' + interval '2' day --timestamp(3) 2020-01-06 00:00:00
时间通常也会使用 下述类型表示:
-- varchar
select '2020-01-04 00:00:00' -- varchar
-- bigint
select 1617256617000 -- bigint
date_format(format_datetime) & date_parse
Java 日期函数
在这一章节中使用的格式化字符串都是与Java的 SimpleDateFormat样式兼容的。
date_format(timestamp, format) → varchar
使用format格式化timestamp。
date_parse(string, format) → timestamp
解析时间戳字符串。
-- date_format和format_datetime:将timestamp格式化为字符串
select format_datetime(now() - interval '30' day,'yyyy-MM-dd') ; -- varchar 2021-03-09
select date_format(now() - interval '30' day,'%Y-%m-%d') ; -- varchar 2021-03-09
-- date_parse:将时间字符串转化为timestamp(3)
select date_parse('2020-01-04','%Y-%m-%d'); -- timestamp(3) 2020-01-04 00:00:00
from_unixtime & to_unixtime
-- from_unixtime
from_unixtime(unixtime) → timestamp
将UNIX时戳转换为时间戳
Select FROM_UNIXTIME(875996580) --1997-10-04 20:23:00.000
from_unixtime(unixtime, string) → timestamp with time zone
将UNIX时戳转换成时戳变量.可以带时区选项
select from_unixtime(1617256617000/1000,'Asia/Shanghai') -- timestamp
from_unixtime(unixtime, hours, minutes) → timestamp with time zone
将 UNIX 时戳转换成带时区的时戳变量。hours和minutes表示时区偏移量。
-- to_unixtime
to_unixtime(timestamp) → double
将时间戳转换成 UNIX 时间
select to_unixtime(now()) -- 1617852228.02
案例
时间转时间戳:
select cast(to_unixtime(timestamp '2016-09-01 01:00:00') as bigint);
_col0 |
----------|
1472691600|
--时间戳转格式化时间字符串:
select format_datetime(from_unixtime(cast(view_time as bigint)),'yyyyMMddHHmmss')
select format_datetime(from_unixtime(1433181293),'yyyy-MM-dd HH:mm:ss'); --2015-06-01 17:54:53
--bigint 1617256617000 转换为 日期类型
select from_unixtime(1617256617000/1000,'Asia/Shanghai');-- 2021-04-01 13:56:57
--bigint 1617256617000 转换为 日期字符串
select format_datetime(from_unixtime(1617256617000/1000,'Asia/Shanghai'),'yyyy-MM-dd');-- 字符串 2021-04-01
--时间类型 转换为 日期字符串
select format_datetime(date_parse('2020-01-04','%Y-%m-%d'),'yyyy-MM-dd'); -- 字符串 2021-04-01
时间差:
select date_diff('day',cast('2018-09-05' as date),cast('2018-09-07' as date));--2
select date_diff('day',from_unixtime(1610180252),from_unixtime(1611217052));--12
select date_diff('day', date_parse('2021-03-31','%Y-%m-%d'), now() ) as niff;
当前时间:
select now() -- 2021-04-01 16:38:10
select substring(cast(now() as varchar),1,10) as date; -- 2021-04-01
select substring(cast(now() as varchar),12,2) as hour; -- 16
-- 实现将当前日期(时间戳类型)加减N日
select date_add('day', -30, now()) --timestamp 2021-03-09 11:03:04
select now() - interval '30' day; --timestamp 2021-03-09 11:03:04
select format_datetime(now() - interval '30' day,'yyyy-MM-dd') ; -- varchar 2021-03-09
select date_format(now() - interval '30' day,'%Y-%m-%d') ; -- varchar 2021-03-09
聚合函数
-- map_agg
根据传入的参数key 和 value生成map:
map_agg(key, value)
Returns a map created from the input key / value pairs.
-- min_by
min(x) → [same as input]#
Returns the minimum value of all input values.
min(x, n) → array<[same as x]>
Returns n smallest values of all input values of x.
min_by(x, y) → [same as x]#
Returns the value of x associated with the minimum value of y over all input values.
min_by(x, y, n) → array<[same as x]>
Returns n values of x associated with the n smallest of all input values of y in ascending order of y.
-- array_agg
array_agg(x ORDER BY y DESC)
array_agg(x ORDER BY x, y, z)
SELECT array_agg(name) FILTER (WHERE name IS NOT NULL) FROM region;
开窗+排序函数
-- 排序函数
rank() OVER (ORDER BY totalfee DESC,username DESC) AS ranknum
row_number 123456
rank 111446
dense_rank 111223
cume_dist
precent_rank
-- 开窗函数用法:使用开窗函数计算 每一行值占累加总数的百分比
select level,userCount,sum(userCount) over() userSum
from
(
SELECT level , count(user_id) userCount
FROM
(
SELECT level,user_id
FROM dongxin_kudu."impala::dx_data".customer_churn_count
where 1=1
and day_now between '2021-03-01' and '2021-04-09'
--and date_type = 7
and app_id = '1372380648999882752'
group by level,user_id
) r
group by level
) r
level|userCount|userSum|
-----|---------|-------|
18| 1| 48|
55| 2| 48|
134| 5| 48|
86| 1| 48|
83| 2| 48|
...
其他常用函数
-- coalesce函数: 判空给默认值的
coalesce(worth,0)
-- cast函数
类型转化:
int 转 varchar :
select cast(1 as varchar)
varchar 转int:
select cast('1' as bigint)
-- case when then else end函数
给sex列的值起别名分类:
case
when sex is not null and sex =1 then '男'
when sex is not null and sex =0 then '女'
else '未知' end sex
行转列:
case when r.totalfee > 0 and r.totalfee <= 10 then 1 else 0 end payuser1,
case when r.totalfee > 11 and r.totalfee <= 50 then 1 else 0 end payuser2,
case when r.totalfee > 51 and r.totalfee <= 100 then 1 else 0 end payuser3,
case when r.totalfee > 101 and r.totalfee <= 1000 then 1 else 0 end payuser4,
case when r.totalfee > 1000 then 1 else 0 end payuser5
eg:
id UserName Subject Score
1 张三 语文 60
2 张三 数学 70
2 张三 英语 80
select UserName 姓名,
sum(case Subject when '语文' then Score else 0 end) 语文SCORE,
sum(case Subject when '数学' then Score else 0 end) 数学SCORE,
sum(case Subject when '英语' then Score else 0 end) 英语SCORE
from TestTable group by UserName;
姓名 语文SCORE 数学SCORE 英语SCORE
张三 60 70 80
-- union 函数
列转行:
select userName, '语文' as COURSE , 语文SCORE as SCORE from stu_grade
union
select userName, '数学' as COURSE, 数学SCORE as SCORE from stu_grade
union
select userName, '英语' as COURSE, 英语SCORE as SCORE from stu_grade
;
userName COURSE SCORE
张三 语文 60
张三 数学 70
张三 英语 80
-- FILTER
SELECT species,
count(*) FILTER (where petal_length_cm > 4) AS count
FROM iris
GROUP BY species;
-- bitwise_or
bitwise_or(x, y) → bigint#
Returns the bitwise OR of x and y in 2’s complement representation.
select bitwise_or( 0 ,10); -- 10
select bitwise_or( 1 ,10); -- 11
-- UNNEST
UNNEST can be used to expand an ARRAY or MAP into a relation. Arrays are expanded into a single column, and maps are expanded into two columns (key, value). UNNEST can also be used with multiple arguments, in which case they are expanded into multiple columns, with as many rows as the highest cardinality argument (the other columns are padded with nulls). UNNEST can optionally have a WITH ORDINALITY clause, in which case an additional ordinality column is added to the end. UNNEST is normally used with a JOIN and can reference columns from relations on the left side of the join.
SELECT student, score
FROM tests
CROSS JOIN UNNEST(scores) AS t (score);
-- eg1
SELECT numbers, animals, n, a
FROM (
VALUES
(ARRAY[2, 5], ARRAY['dog', 'cat', 'bird']),
(ARRAY[7, 8, 9], ARRAY['cow', 'pig'])
) AS x (numbers, animals)
CROSS JOIN UNNEST(numbers, animals) AS t (n, a);
numbers | animals | n | a
-----------+------------------+------+------
[2, 5] | [dog, cat, bird] | 2 | dog
[2, 5] | [dog, cat, bird] | 5 | cat
[2, 5] | [dog, cat, bird] | NULL | bird
[7, 8, 9] | [cow, pig] | 7 | cow
[7, 8, 9] | [cow, pig] | 8 | pig
[7, 8, 9] | [cow, pig] | 9 | NULL
(6 rows)
-- eg2
SELECT numbers, n, a
FROM (
VALUES
(ARRAY[2, 5]),
(ARRAY[7, 8, 9])
) AS x (numbers)
CROSS JOIN UNNEST(numbers) WITH ORDINALITY AS t (n, a);
numbers | n | a
-----------+---+---
[2, 5] | 2 | 1
[2, 5] | 5 | 2
[7, 8, 9] | 7 | 1
[7, 8, 9] | 8 | 2
[7, 8, 9] | 9 | 3
(5 rows)
https://trino.io/docs/current/sql/select.html?highlight=unnest#unnest
案例
案例一:
同时计算出如下指标:字段ACU和PCU的max以及对应的日期、avg
-- 在线分析-每日在线用户-ACU+PCU的max和avg
select max_by(date,ACU) maxACUDate , max_by(ACU,ACU) maxACU,
max_by(date,PCU) maxPCUDate , max_by(PCU,PCU) maxPCU,
cast(avg(ACU) as decimal(18,2)) avgACU,cast(avg(PCU) as decimal(18,2)) avgPCU
from
(
select day_now date,cast(avg(activeUser) as decimal(18,2)) ACU,max(activeUser) PCU
from
(
SELECT day_now,start_time,end_time,sum(online_user_count) activeUser FROM data_kudu."impala::dx_data".online_user_count
where 1=1
-- 起始时间 结束时间
and day_now
between
format_datetime(from_unixtime($stime$/1000,'Asia/Shanghai'),'yyyy-MM-dd')
and
format_datetime(from_unixtime($etime$/1000,'Asia/Shanghai'),'yyyy-MM-dd')
group by day_now,start_time,end_time
)
group by day_now
order by day_now
)
presto官网
https://prestodb.io/
trino(presto新版本)官网
https://trino.io/