sql中几个有趣的函数

分组拼接函数

group_concat(拼接字段 order by 排序字段)这个函数可以按照指定分组,拼接每组中指定字段按照规则排序后的值为一个字符串。
也可以开窗使用group_concat(拼接字段) over(partition by 分组字段 order by 排序字段)

select 
	*
	,split(pay_gmv,',')[1]
	,split(pay_gmv,',')[2]
from
(SELECT 
	data_date
	,shop_num
	,shop_name
	,power
	,GROUP_CONCAT(concat_ws('-',data_item,pay_gmv) order by data_date,shop_num,data_item) pay_gmv
FROM kscs_test.test
where data_date='2023-02-20'
group by data_date,shop_num)
select 
	a.* 
	,b.time_group 
from dws.dws_niu_kwai_data a 
left join
(select
	data_date
	,brand
	,people
	,max(time_group) time_group -- 取最全的一个分组拼接值
from
( 
	select 
    	data_date
    	,brand
    	,people
    	,time_d
    	,group_concat(time_d) over(partition by data_date,brand,people order by time_d) time_group -- 按照data_date,brand,people分组,每组中所有的time_d升序后拼接展示为一个字符串
	from dws.dws_niu_kwai_data
	group by data_date,brand,people,time_d
) b 
group by data_date,brand,people) b 
on a.data_date=b.data_date and a.brand=b.brand and a.people=b.people
cross join函数

cross join可以返回两表的笛卡尔积,返回的表的记录数为两表记录数的乘积;常与UNNEST使用,UNNEST可以将一个数组存为一张临时表,且与数组所属的原表默认有关联关系。
AnalyticDB MySQL中的cross join实例
原结构:
A:[‘a’,‘b’]
B:[‘c’,‘d’,‘e’]
处理后结构:
A a
A b
B c
B d
B e

insert into dim.dim__list
SELECT 
	data_date
	,agent_id 
	,advertider_id
	,now()
FROM 
(
	select 
		data_date
		,agent_id
		,split(replace(replace(json_extract(data,'$.list'),'[',''),']',''),',') as id_array -- 将列表中的元素按逗号拆分
	from kscs_ods.ods_list)
CROSS JOIN UNNEST(id_array) as temp_table(advertider_id) -- 用UNNEST将拆分后的元素组成的数组存为临时表,并与原表进行笛卡尔积

可代替union all,进行列转行
例:将如下jsonarray格式进行解析

{
	"all_traffic_distribution": [{
		"main_source": {
			"source_name": "f1",
			"watch_pv": 4
		},
		"detail_source": [{
			"source_name": "t1",
			"watch_pv": 1
		}, {
			"source_name": "t2",
			"watch_pv": 1
		}, {
			"source_name": "t3",
			"watch_pv": 1
		}, {
			"source_name": "t4",
			"watch_pv": 1
		}]
	}, {
		"main_source": {
			"source_name": "f2",
			"watch_pv": 2
		}
	}, {
		"main_source": {
			"source_name": "f3",
			"watch_pv": 3
		}
	}, {
		"main_source": {
			"source_name": "f4",
			"watch_pv": 5
		}
	}
	]
}
SELECT 
	data_date
	,brand
	,live_id
	,ifnull(if(split_part(x,'-',1) in('t1','t2','t3','t4'),'f1',split_part(x,'-',1)),'') -- 增加渠道列:将f1对应的子渠道的渠道赋值为f1,其他均赋值为实际渠道
	,ifnull(split_part(x,'-',1),'') -- 将k-v进行拆分,取拆分后的第1个值为流量渠道:这一列将渠道与子渠道合并为一个字段,可直接作为子渠道列
	,ifnull(split_part(x,'-',2),'') -- 将k-v进行拆分,取拆分后的第2个值为流量值
from
(SELECT 
	data_date
	,brand
	,live_id
	,split(concat_ws(',',t1,t2,a,b,c,d),',') as flow_array -- 将所有k-v转换为数组,并cross join实现列转行(由于子渠道ti属于f1,所以转换后需要将f1对应的记录过滤掉,以避免数据重复;且每个直播间的渠道数量不一样,所以最终要将渠道为空的记录过滤掉)
from
(SELECT 
	data_date
	,brand
	,live_id
	-- 将每个渠道与其对应的流量拼接为一个字符串,作为单独字段
	,concat_ws('-',json_extract(data,'$.all_traffic_distribution[0].main_source.source_name'),json_extract(data,'$.all_traffic_distribution[0].main_source.watch_pv')) a
	,concat_ws('-',json_extract(data,'$.all_traffic_distribution[1].main_source.source_name'),json_extract(data,'$.all_traffic_distribution[1].main_source.watch_pv')) b
	,concat_ws('-',json_extract(data,'$.all_traffic_distribution[2].main_source.source_name'),json_extract(data,'$.all_traffic_distribution[2].main_source.watch_pv')) c
	,concat_ws('-',json_extract(data,'$.all_traffic_distribution[3].main_source.source_name'),json_extract(data,'$.all_traffic_distribution[3].main_source.watch_pv')) d
-- 在每个渠道下,先拼接每个子渠道及其对应流量(一个子渠道一个k-v),然后将所有渠道对应的的子渠道进行无符号拼接(一个子渠道一个拼接结果ti),即得到每个子渠道的k-v:由于只有f1才有子渠道,且f1在jsonarray中出现的次序不固定,所以需要将所有渠道都遍历一次
	,concat(
		CONCAT_WS('-',json_extract(data,'$.all_traffic_distribution[0].detail_source[0].source_name'),json_extract(data,'$.all_traffic_distribution[0].detail_source[0].watch_pv'))
		,CONCAT_WS('-',json_extract(data,'$.all_traffic_distribution[1].detail_source[0].source_name'),json_extract(data,'$.all_traffic_distribution[1].detail_source[0].watch_pv'))
		,CONCAT_WS('-',json_extract(data,'$.all_traffic_distribution[2].detail_source[0].source_name'),json_extract(data,'$.all_traffic_distribution[2].detail_source[0].watch_pv'))
		,CONCAT_WS('-',json_extract(data,'$.all_traffic_distribution[3].detail_source[0].source_name'),json_extract(data,'$.all_traffic_distribution[3].detail_source[0].watch_pv'))) t1
	,concat(
		CONCAT_WS('-',json_extract(data,'$.all_traffic_distribution[0].detail_source[1].source_name'),json_extract(data,'$.all_traffic_distribution[0].detail_source[1].watch_pv'))
		,CONCAT_WS('-',json_extract(data,'$.all_traffic_distribution[1].detail_source[1].source_name'),json_extract(data,'$.all_traffic_distribution[1].detail_source[1].watch_pv'))
		,CONCAT_WS('-',json_extract(data,'$.all_traffic_distribution[2].detail_source[1].source_name'),json_extract(data,'$.all_traffic_distribution[2].detail_source[1].watch_pv'))
		,CONCAT_WS('-',json_extract(data,'$.all_traffic_distribution[3].detail_source[1].source_name'),json_extract(data,'$.all_traffic_distribution[3].detail_source[1].watch_pv'))) t2
from table_name))
cross join unnest(flow_array) as temp_table(x)
where split_part(x,'-',1) <> '' and split_part(x,'-',1) <> 'f1'
每周显示日期区间,且从周一开始算一周
select a.*,b.date_group 
from a

left join
(select
		week(date_add(data_date, interval -1 day)) week_date -- 周一开始算一周
    	,brand
    	,concat(min(data_date),'~',max(data_date)) date_group -- 显示每周对应的日期区间
    from b
	group by week(date_add(data_date, interval -1 day)),brand
) b
on week(date_add(a.data_date, interval -1 day))=b.week_date
1=1为全真连接,默认查询所有数据

如:b表全真左连接a表,即b表中每天都会连接到a表中的所有记录

select 
	a.user_id, 
	a.`data`,
	b.data_date data_date,
	now() 
from
(	select * from a where data_date ='2021-12-10') a 
right join
(	select data_date from date_info where data_date in('2021-08-12','2021-08-16','2021-08-22')) b
on 1=1
FROM_UNIXTIME(FLOOR(data_ts/ 300) * 300,‘%Y-%m-%d %H:%i:%s’) AS start_5min_time

1秒为一个单位时间块个数(即时间戳)/300→5分钟为一个单位的时间块个数→向下取整→最近的一个整5分钟的时间块个数→*300→最近的一个整5分钟的以秒为单位的时间戳

-- 将每分钟累计数据,按每分钟切割,并每五分钟更新近1小时的数据
select 
    data_time,
    start_5min_time,
    end_5min_time,
    start_1hour_time,
    end_1hour_time,
    data_date,
    brand,
    live_id,
    data_ts,
    add_ts,
    add_time,
    if(watch_uv<0,0,watch_uv) as watch_uv,
    if(live_show_pv<0,0,live_show_pv) as live_show_pv,
from (
select
    from_unixtime(data_ts,'%Y-%m-%d %H:%i:%s') as data_time,
    start_5min_time as start_5min_time,
    end_5min_time as end_5min_time,
    start_1hour_time as start_1hour_time,
    end_1hour_time as end_1hour_time,
    cast(data_date as date) as data_date,
    brand as brand,
    live_id as live_id,
    cast(data_ts as int) as data_ts,
    cast(add_ts as timestamp) as add_ts,
    cast(add_time as timestamp) as add_time,
    cast(nvl(watch_uv-lag(watch_uv) over (partition by live_id order by data_ts),0) as int) as watch_uv,
    cast(nvl(live_show_pv-lag(live_show_pv) over (partition by live_id order by data_ts),0) as int) as live_show_pv,
    nvl(data_ts-lag(data_ts) over (partition by live_id order by data_ts),0) diff_ts
from(
    select *,
           FROM_UNIXTIME(FLOOR(UNIX_TIMESTAMP(FROM_UNIXTIME(data_ts)) / 300) * 300,'%Y-%m-%d %H:%i:%s') AS start_5min_time, -- 1秒为一个单位时间块个数(即时间戳)/300→5分钟为一个单位的时间块个数→向下取整→最近的一个整5分钟的时间块个数→*300→最近的一个整5分钟的以秒为单位的时间戳
           FROM_UNIXTIME((FLOOR(UNIX_TIMESTAMP(FROM_UNIXTIME(data_ts)) / 300) + 1) * 300,'%Y-%m-%d %H:%i:%s') AS end_5min_time,
           FROM_UNIXTIME(FLOOR(UNIX_TIMESTAMP(FROM_UNIXTIME(data_ts)) / 3600) * 3600,'%Y-%m-%d %H:%i:%s') AS start_1hour_time,
           FROM_UNIXTIME((FLOOR(UNIX_TIMESTAMP(FROM_UNIXTIME(data_ts)) / 3600) + 1) * 3600,'%Y-%m-%d %H:%i:%s') AS end_1hour_time,
           row_number() over (partition by live_id,data_ts order by add_ts) rn
    from test
    -- 每次重刷过去两小时的数据
    -- where data_ts between unix_timestamp('2023-07-24 14:00:00') and unix_timestamp('2023-07-24 15:00:00')
     where data_ts between unix_timestamp('$[yyyy-MM-dd HH:mm:ss-1/24]') and unix_timestamp('$[yyyy-MM-dd HH:mm:ss]') -- -1/24为1小时前、-1/24*60为1分钟前、-1/24*3600为1秒前
     and data_date='$[yyyy-MM-dd]'
    )tmp1
where rn=1 )tmp2
where data_ts<>unix_timestamp('$[yyyy-MM-dd HH:mm:ss-1/24]') and diff_ts=60 # data_ts不等于1小时前,因为1小时前的数据没有切割,只是作为切割的起点参与计算
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值