新增distinct语法解决了金仓数据库KingbaseES 聚集函数作为窗口函数使用时不支持去重问题
关键字
函数、count、distinct
问题描述
用户在使用count函数和distinct去重时报错语法错误:
create table tab_user(usename text,id int);
insert into tab_user values ('a1',1),('a1',2),('a2',2);
--使用count函数作为窗口函数时指定distinct关键字去重
select count(distinct usename) over (partition by null) from tab_user ;
问题分析
聚集函数作为窗口函数时不支持distinct关键字语法。
解决方案
为解决该问题,需要新增支持distinct关键字的语法。注:该语句仅在KES 兼容oracle模式支持。 原用户场景:
select count(distinct usename) over (partition by null) from tab_user ;
其它聚集函数做窗口函数使用场景:
create table tab_test1 (a int, b int, c int);
insert into tab_test1 values (1, 0, 0);
insert into tab_test1 values (1, 0, 2);
insert into tab_test1 values (1, 1, 4);
insert into tab_test1 values (1, 1, 6);
insert into tab_test1 values (1, null, 8);
insert into tab_test1 values (2, 2, 1);
insert into tab_test1 values (2, 2, 1);
insert into tab_test1 values (2, 3, 5);
insert into tab_test1 values (2, 3, 7);
insert into tab_test1 values (2, null, 9);
insert into tab_test1 values (2, 2, 1);
insert into tab_test1 values (2, 3, 5);
insert into tab_test1 values (1, 0, 0);
--max,min函数取最大值,最小值
select max(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls first;
select min(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--sum求和函数
select sum(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--avg求平均值函数
select avg(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
-- variance求方差函数
select variance(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
-- stddev求标准差函数
select stddev(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--当参数为非数组类型时, array_agg 将输入值(包括空值)连接到一个数组,返回参数类型的数组。 当参数为数组类型时, array_agg 将输入数组串接到一个更高维度的数组中,并返回一个与参数数据类型相同的数组。输入数组必须具有相同维度,并且不能为空或者NULL
select array_agg(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--bit_and函数计算所有非空值按位与。如果没有非空值传入,返回空值
select bit_and(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--bit_or函数对输入的非空值按位或运算,如果没有输入非空值,则返回空值
select bit_or(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--every函数求逻辑与,如果所有输入值为真则结果为真,否则为假
select every(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--json_agg函数将传入值包括空值聚集成一个JSON数组
select json_agg(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--jsonb_agg函数将传入值包括空值聚集成一个JSON数组
select jsonb_agg(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--json_object_agg函数将 name 和 value 对聚合成JSON对象,值可以为空,名字不能为空
select json_object_agg(distinct 1,c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--string_agg函数非空输入连接成一个串,用delimiter分隔
select string_agg(distinct c,'') over (partition by a) from tab_test1 where c>2 order by a nulls last;
-- CORR函数返回一组数字对的相关系数
select corr(distinct a,c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
-- covar_pop函数返回一组数字对的总体协方差
select covar_pop(distinct a,c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
-- covar_samp函数返回一组数字对的样本协方差
select covar_samp(distinct a,c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
-- regr_avgx函数返回不为null的组的值计算出的平均值
select regr_avgx(distinct b,c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
-- regr_avgy函数返回不为null的组的值计算出的平均值
select regr_avgy(distinct b,c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
-- regr_count函数返回组中非null值的数目
select regr_count(distinct b,c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--regr_r2函数返回组中非null值的决定系数
select regr_r2(distinct b,c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--regr_sxx函数返回组中非null值的平方和
select regr_sxx(distinct b,c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--regr_syy函数返回组中非null值的平方和
select regr_syy(distinct b,c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--regr_slope函数返回组中线性回归线的斜率
select regr_slope(distinct b,c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--STDDEV_POP函数计算总体标准差并返回总体方差的平方根
select stddev_pop(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--var_pop函数在丢弃该集合中的空值后返回输入值的总体方差
select var_pop(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--var_samp函数在丢弃该集合中的空值后返回一组数字的样本方差
select var_samp(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--WM_CONCAT函数实现column字段合并
select wm_concat(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
-- jsonb_object_agg函数将 name 和 value 对聚合成JSON对象,值可以为空,名字不能为空
select jsonb_object_agg(distinct 1,c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--bool_or函数求逻辑或,如果有输入值为真则结果为真,否则为假
select bool_or(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--bool_and函数求逻辑与,如果所有输入值为真则结果为真,否则为假
select bool_and(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--regr_sxy函数返回不为null组的值乘积的和
select regr_sxy(distinct c,b) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--regr_ intercept函数返回线性回归的截距项
select regr_intercept(distinct c,b) over (partition by a) from tab_test1 where c>2 order by a nulls last;
--stddev_samp函数计算累积样本标准差并返回样本方差的平方根
select stddev_samp(distinct c) over (partition by a) from tab_test1 where c>2 order by a nulls last;