Presto SQL常用函数（工作笔记）

最新推荐文章于 2024-04-23 14:30:57 发布

Lan_xuaner

最新推荐文章于 2024-04-23 14:30:57 发布

阅读量3.5k

点赞数 1

分类专栏： presto 文章标签：大数据 sql 查询引擎

本文链接：https://blog.csdn.net/weixin_45568892/article/details/115380426

版权

presto 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

Presto SQL常用函数

时间函数

时间类型

-- date
select date '2012-08-08' + interval '2' day -- date 2012-08-10
-- time
select time '01:00:00' - interval '3' hour  -- time(3) 22:00:00
-- timestamp
select timestamp '2020-01-04 00:00:00' + interval '2' day  --timestamp(3) 2020-01-06 00:00:00
时间通常也会使用 下述类型表示：
-- varchar
select '2020-01-04 00:00:00' -- varchar
-- bigint
select 1617256617000 -- bigint

date_format（format_datetime） & date_parse

Java 日期函数

在这一章节中使用的格式化字符串都是与Java的 SimpleDateFormat样式兼容的。

date_format(timestamp, format) → varchar

使用format格式化timestamp。

date_parse(string, format) → timestamp

解析时间戳字符串。

-- date_format和format_datetime：将timestamp格式化为字符串
select format_datetime(now() -  interval '30' day,'yyyy-MM-dd') ; -- varchar 2021-03-09
select date_format(now() -  interval '30' day,'%Y-%m-%d') ; -- varchar 	2021-03-09
-- date_parse:将时间字符串转化为timestamp(3)
select date_parse('2020-01-04','%Y-%m-%d');  -- timestamp(3)	2020-01-04 00:00:00

from_unixtime & to_unixtime

-- from_unixtime
from_unixtime(unixtime) → timestamp
将UNIX时戳转换为时间戳
Select FROM_UNIXTIME(875996580) --1997-10-04 20:23:00.000

from_unixtime(unixtime, string) → timestamp with time zone
将UNIX时戳转换成时戳变量.可以带时区选项
select from_unixtime(1617256617000/1000,'Asia/Shanghai') -- timestamp

from_unixtime(unixtime, hours, minutes) → timestamp with time zone
将 UNIX 时戳转换成带时区的时戳变量。hours和minutes表示时区偏移量。

-- to_unixtime
to_unixtime(timestamp) → double
将时间戳转换成 UNIX 时间

select to_unixtime(now())  -- 1617852228.02

案例

时间转时间戳： 
select cast(to_unixtime(timestamp '2016-09-01 01:00:00') as bigint);
_col0     |
----------|
1472691600|

--时间戳转格式化时间字符串：
select format_datetime(from_unixtime(cast(view_time as bigint)),'yyyyMMddHHmmss')
select format_datetime(from_unixtime(1433181293),'yyyy-MM-dd HH:mm:ss'); --2015-06-01 17:54:53
--bigint 1617256617000 转换为 日期类型
select from_unixtime(1617256617000/1000,'Asia/Shanghai');-- 2021-04-01 13:56:57
--bigint 1617256617000 转换为 日期字符串
select format_datetime(from_unixtime(1617256617000/1000,'Asia/Shanghai'),'yyyy-MM-dd');-- 字符串 2021-04-01
--时间类型 转换为 日期字符串
select format_datetime(date_parse('2020-01-04','%Y-%m-%d'),'yyyy-MM-dd'); -- 字符串 2021-04-01

时间差：
select date_diff('day',cast('2018-09-05' as date),cast('2018-09-07' as date));--2
select date_diff('day',from_unixtime(1610180252),from_unixtime(1611217052));--12
select date_diff('day', date_parse('2021-03-31','%Y-%m-%d'),  now() ) as  niff;

当前时间：
select now()  -- 2021-04-01 16:38:10
select substring(cast(now() as varchar),1,10) as date; -- 2021-04-01
select substring(cast(now() as varchar),12,2) as hour; -- 16

-- 实现将当前日期（时间戳类型）加减N日
select date_add('day', -30, now()) --timestamp  2021-03-09 11:03:04
select now() -  interval '30' day; --timestamp  2021-03-09 11:03:04
select format_datetime(now() -  interval '30' day,'yyyy-MM-dd') ; -- varchar 	2021-03-09
select date_format(now() -  interval '30' day,'%Y-%m-%d') ; -- varchar 	2021-03-09

聚合函数

-- map_agg
根据传入的参数key 和 value生成map：
map_agg(key, value)
Returns a map created from the input key / value pairs.

-- min_by
min(x) → [same as input]#
Returns the minimum value of all input values.

min(x, n) → array<[same as x]>
Returns n smallest values of all input values of x.

min_by(x, y) → [same as x]#
Returns the value of x associated with the minimum value of y over all input values.

min_by(x, y, n) → array<[same as x]>
Returns n values of x associated with the n smallest of all input values of y in ascending order of y.

-- array_agg
array_agg(x ORDER BY y DESC)
array_agg(x ORDER BY x, y, z)
SELECT array_agg(name) FILTER (WHERE name IS NOT NULL) FROM region;

开窗+排序函数

-- 排序函数
rank() OVER (ORDER BY totalfee DESC,username DESC) AS ranknum

row_number  123456
rank               111446
dense_rank     111223
cume_dist       
precent_rank 

-- 开窗函数用法：使用开窗函数计算 每一行值占累加总数的百分比
	select level,userCount,sum(userCount) over() userSum
	from 
	(
		SELECT level , count(user_id) userCount
		FROM 
		(
		SELECT level,user_id
		FROM dongxin_kudu."impala::dx_data".customer_churn_count
		where 1=1 
		and day_now between '2021-03-01' and '2021-04-09'
		--and date_type = 7
		and app_id = '1372380648999882752'
		group by level,user_id
		) r
		group by level
	) r
	
level|userCount|userSum|
-----|---------|-------|
   18|        1|     48|
   55|        2|     48|
  134|        5|     48|
   86|        1|     48|
   83|        2|     48|
   ...

其他常用函数

-- coalesce函数: 判空给默认值的
coalesce(worth,0)  

-- cast函数
类型转化：
int 转 varchar ：
select cast(1 as varchar)
varchar 转int：
select cast('1' as bigint)

-- case when then else end函数
给sex列的值起别名分类：
case 
when sex is not null and sex =1 then '男'
when sex is not null and sex =0 then '女'
else '未知' end sex

行转列：
case when r.totalfee > 0 and r.totalfee <= 10 then 1 else 0 end payuser1,
case when r.totalfee > 11 and r.totalfee <= 50 then 1 else 0 end payuser2,
case when r.totalfee > 51 and r.totalfee <= 100 then 1 else 0 end payuser3,
case when r.totalfee > 101 and r.totalfee <= 1000 then 1 else 0 end payuser4,
case when r.totalfee > 1000 then 1 else 0 end payuser5
eg:
id	UserName	Subject		Score
1	张三			语文			60
2	张三			数学			70
2	张三			英语			80
select UserName 姓名,
sum(case Subject when '语文' then Score else 0 end) 语文SCORE,
sum(case Subject when '数学' then Score else 0 end) 数学SCORE,
sum(case Subject when '英语' then Score else 0 end) 英语SCORE 
from TestTable group by UserName;
姓名	语文SCORE	数学SCORE	英语SCORE
张三	60	     70	       80

-- union 函数
列转行：
select userName, '语文' as COURSE , 语文SCORE as SCORE from stu_grade  
union 
select userName, '数学' as COURSE, 数学SCORE as SCORE from stu_grade  
union 
select userName, '英语' as COURSE, 英语SCORE as SCORE from stu_grade
;
userName	COURSE		SCORE
张三			语文			60
张三			数学			70
张三			英语			80

-- FILTER
SELECT species,
       count(*) FILTER (where petal_length_cm > 4) AS count
FROM iris
GROUP BY species;

-- bitwise_or
bitwise_or(x, y) → bigint#
Returns the bitwise OR of x and y in 2’s complement representation.
select bitwise_or( 0 ,10); -- 10
select bitwise_or( 1 ,10); -- 11

-- UNNEST
UNNEST can be used to expand an ARRAY or MAP into a relation. Arrays are expanded into a single column, and maps are expanded into two columns (key, value). UNNEST can also be used with multiple arguments, in which case they are expanded into multiple columns, with as many rows as the highest cardinality argument (the other columns are padded with nulls). UNNEST can optionally have a WITH ORDINALITY clause, in which case an additional ordinality column is added to the end. UNNEST is normally used with a JOIN and can reference columns from relations on the left side of the join.

SELECT student, score
FROM tests
CROSS JOIN UNNEST(scores) AS t (score);

-- eg1
SELECT numbers, animals, n, a
FROM (
  VALUES
    (ARRAY[2, 5], ARRAY['dog', 'cat', 'bird']),
    (ARRAY[7, 8, 9], ARRAY['cow', 'pig'])
) AS x (numbers, animals)
CROSS JOIN UNNEST(numbers, animals) AS t (n, a);

  numbers  |     animals      |  n   |  a
-----------+------------------+------+------
 [2, 5]    | [dog, cat, bird] |    2 | dog
 [2, 5]    | [dog, cat, bird] |    5 | cat
 [2, 5]    | [dog, cat, bird] | NULL | bird
 [7, 8, 9] | [cow, pig]       |    7 | cow
 [7, 8, 9] | [cow, pig]       |    8 | pig
 [7, 8, 9] | [cow, pig]       |    9 | NULL
(6 rows)

-- eg2
SELECT numbers, n, a
FROM (
  VALUES
    (ARRAY[2, 5]),
    (ARRAY[7, 8, 9])
) AS x (numbers)
CROSS JOIN UNNEST(numbers) WITH ORDINALITY AS t (n, a);

  numbers  | n | a
-----------+---+---
 [2, 5]    | 2 | 1
 [2, 5]    | 5 | 2
 [7, 8, 9] | 7 | 1
 [7, 8, 9] | 8 | 2
 [7, 8, 9] | 9 | 3
(5 rows)

https://trino.io/docs/current/sql/select.html?highlight=unnest#unnest

案例

案例一：
同时计算出如下指标：字段ACU和PCU的max以及对应的日期、avg
在这里插入图片描述

-- 在线分析-每日在线用户-ACU+PCU的max和avg
select  max_by(date,ACU) maxACUDate , max_by(ACU,ACU) maxACU,
        max_by(date,PCU) maxPCUDate , max_by(PCU,PCU) maxPCU,
        cast(avg(ACU) as decimal(18,2)) avgACU,cast(avg(PCU) as decimal(18,2)) avgPCU
from  
(
    select day_now date,cast(avg(activeUser) as decimal(18,2)) ACU,max(activeUser) PCU
    from 
    (
    	SELECT day_now,start_time,end_time,sum(online_user_count) activeUser FROM data_kudu."impala::dx_data".online_user_count
    	where 1=1 
    	-- 起始时间 结束时间
    	and day_now 
        between 
        format_datetime(from_unixtime($stime$/1000,'Asia/Shanghai'),'yyyy-MM-dd')
        and 
        format_datetime(from_unixtime($etime$/1000,'Asia/Shanghai'),'yyyy-MM-dd')
    	group by day_now,start_time,end_time
    )
    group by day_now 
    order by day_now
)

presto官网

https://prestodb.io/

trino（presto新版本）官网

https://trino.io/

Lan_xuaner

关注

1
点赞
踩
12

收藏

觉得还不错? 一键收藏
0
评论
Presto SQL常用函数（工作笔记）

Presto SQL常用函数时间函数时间转时间戳： select cast(to_unixtime(timestamp '2016-09-01 01:00:00') as bigint);_col0 |----------|1472691600|--时间戳转格式化时间字符串：select format_datetime(from_unixtime(cast(view_time as bigint)),'yyyyMMddHHmmss')select format_datetime(
复制链接

扫一扫