数据库自定义聚合函数(求和、标准差、平均值、几何平均值、几何标准差、偏度系数、峰度系数)

/*
 * stats_agg 是 PostgreSQL 的聚合函数工作方式类似于先前存在的聚集体(min(x),max(),avg()等),但计算一
 * 次,并返回他们都多的统计数据。返回的统计数据是count,min,max,mean,variance,skewness,和 kurtosis,但其他人可以加入。
 * 
 * 我需要一个返回偏度和峰度的聚合,而不是为每个需要多次传递来计算平均值等的单独函数,我认为最好有一个聚合在一次传递中返回所有内容。
 * 
 * 感谢 John D. Cook 和他的博客文章(https://www.johndcook.com/blog/skewness_kurtosis/)解释了如何做到这一点。
 * */

CREATE TYPE _stats_agg_accum_type AS (
	n BIGINT,
	min DOUBLE PRECISION,
	max DOUBLE PRECISION,
	m1 DOUBLE PRECISION,
	m2 DOUBLE PRECISION,
	m3 DOUBLE PRECISION,
	m4 DOUBLE PRECISION
);

CREATE TYPE _stats_agg_result_type AS (
	count BIGINT,
	min DOUBLE PRECISION,
	max DOUBLE PRECISION,
	mean DOUBLE PRECISION,
	variance DOUBLE PRECISION,
	skewness DOUBLE PRECISION,
	kurtosis DOUBLE PRECISION
);

CREATE OR REPLACE FUNCTION _stats_agg_accumulator(_stats_agg_accum_type, DOUBLE PRECISION)
RETURNS _stats_agg_accum_type AS 
$example_table$
DECLARE
	a ALIAS FOR $1;
	x ALIAS FOR $2;
	n1 BIGINT;
	delta DOUBLE PRECISION;
	delta_n DOUBLE PRECISION;
	delta_n2 DOUBLE PRECISION;
	term1 DOUBLE PRECISION;
BEGIN
	IF x IS NOT NULL THEN
		n1 = a.n;
		a.n = a.n + 1;
		delta = x - a.m1;
		delta_n = delta / a.n;
		delta_n2 = delta_n * delta_n;
		term1 = delta * delta_n * n1;
		a.m1 = a.m1 + delta_n;
		a.m4 = a.m4 + term1 * delta_n2 * (a.n*a.n - 3*a.n + 3) + 6 * delta_n2 * a.m2 - 4 * delta_n * a.m3;
		a.m3 = a.m3 + term1 * delta_n * (a.n - 2) - 3 * delta_n * a.m2;
		a.m2 = a.m2 + term1;
		a.min = LEAST(a.min, x);
		a.max = GREATEST(a.max, x);
	END IF;
	
	RETURN a;
END;
$example_table$
LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION _stats_agg_finalizer(_stats_agg_accum_type)
RETURNS _stats_agg_result_type AS 
$example_table$
BEGIN
	RETURN ROW(
		$1.n, 
		$1.min,
		$1.max,
		$1.m1,
		$1.m2 / NULLIF(($1.n - 1.0), 0), 
		CASE WHEN $1.m2 = 0 THEN NULL ELSE SQRT($1.n) * $1.m3 / NULLIF(($1.m2 ^ 1.5), 0) END, 
		CASE WHEN $1.m2 = 0 THEN NULL ELSE $1.n * $1.m4 / NULLIF(($1.m2 * $1.m2) - 3.0, 0) END
	);
END;
$example_table$
LANGUAGE plpgsql;

CREATE AGGREGATE stats_agg(DOUBLE PRECISION) (
	sfunc = _stats_agg_accumulator,
	stype = _stats_agg_accum_type,
	finalfunc = _stats_agg_finalizer,
	initcond = '(0,,, 0, 0, 0, 0)'
);


/**
 * 自定义函数
 * 以下是根据具体项目需求和需求计算公式进行聚合(非标准聚合公式)
 */
/* 求和 */
CREATE OR REPLACE FUNCTION sum(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS 
$function$
declare
    ret DOUBLE PRECISION;
    item DOUBLE PRECISION;
    a DOUBLE PRECISION = 0;
begin
    FOREACH item IN ARRAY arr
    LOOP
        a = a + item;
    END LOOP;
    ret = a;
    RETURN ret;
END;
$function$
;

/* 标准差 */
CREATE OR REPLACE FUNCTION stddev(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS 
$function$
DECLARE
    ret DOUBLE PRECISION;
    item DOUBLE PRECISION;
    mean DOUBLE PRECISION;
    a DOUBLE PRECISION = 0;
    c INT = 0;
BEGIN
    c = ARRAY_LENGTH(arr,1);
    mean = MEAN(arr);
    FOREACH item IN ARRAY arr
    LOOP
        a = a + POW(item - mean, 2);
    END LOOP;
    ret = POW(a / (c - 1), 0.5);
    RETURN ret;
END;
$function$
;

/* 平均值 */
CREATE OR REPLACE FUNCTION mean(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS 
$function$
DECLARE
    ret DOUBLE PRECISION;
    item DOUBLE PRECISION;
    a DOUBLE PRECISION = 0;
    c INT = 0;
BEGIN
    c = ARRAY_LENGTH(arr, 1);
    FOREACH item IN ARRAY arr
    LOOP
        a = a + item;
    END LOOP;
    ret = a / c ;
    RETURN ret;
END;
$function$
;

/*几何平均值*/
CREATE OR REPLACE FUNCTION public.geo_mean(arr DOUBLE PRECISION[])
 RETURNS DOUBLE PRECISION
 LANGUAGE plpgsql
AS $function$
DECLARE
    ret DOUBLE PRECISION;
    item DOUBLE PRECISION;
    a DOUBLE PRECISION = 0;
    c INT;
BEGIN
    FOREACH item IN ARRAY arr
    LOOP
        IF a = 0 THEN
        	a = item;
        ELSE
            a =  ROUND(CAST(a AS NUMERIC), 5) * ROUND(CAST(item AS NUMERIC), 5);
        END IF;
    END LOOP;
    c = ARRAY_LENGTH(arr, 1);
    ret = POW(a, 1.0 / c);
    RETURN ret;
   EXCEPTION WHEN OTHERS THEN
        RAISE NOTICE '异常数据:ret = % item= % a= % c= %', ret, item, a, c;
        RETURN 'NaN';
END;
$function$
;

/* 几何标准差 */
CREATE OR REPLACE FUNCTION geo_std(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS 
$function$
declare
    ret DOUBLE PRECISION;
    item DOUBLE PRECISION;
    a DOUBLE PRECISION = 0;
    b DOUBLE PRECISION = 0;
    base NUMERIC = 10.0;
    c INT = 0;
BEGIN
    FOREACH item IN ARRAY arr
    LOOP
        a = a + POW(LOG(base, item::NUMERIC), 2.0);
        b = b + LOG(base, item::NUMERIC);
    END LOOP;
    b = POW(b, 2);
    c = ARRAY_LENGTH(arr, 1);
    ret = 10 ^ (POW(((a - b / c) / (c - 1)), 0.5));
    RETURN ret;
END;
$function$
;

/* 偏度系数 */
CREATE OR REPLACE FUNCTION skewness(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS 
$function$
DECLARE
    ret DOUBLE PRECISION;
    item DOUBLE PRECISION;
    a DOUBLE PRECISION = 0;
    mean DOUBLE PRECISION = 0;
    std_val DOUBLE PRECISION = 0;
    c INT = 0;
BEGIN
    c = ARRAY_LENGTH(arr,1);
    mean = MEAN(arr);
    std_val = STDDEV(arr);
    FOREACH item IN ARRAY arr
    LOOP
        a = a + (POW(item - mean, 3) / POW(std_val, 3));
    END LOOP;
    ret = ((1 / (c - 1)::DOUBLE PRECISION) * a);
    RETURN ret;
END;
$function$
;

/* 峰度系数 */
CREATE OR REPLACE FUNCTION kurtosis(arr DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
LANGUAGE plpgsql AS 
$function$
DECLARE
    ret DOUBLE PRECISION;
    item DOUBLE PRECISION;
    a DOUBLE PRECISION = 0;
    mean DOUBLE PRECISION = 0;
    std_val DOUBLE PRECISION = 0;
    c INT = 0;
BEGIN
    c = ARRAY_LENGTH(arr, 1);
    mean = MEAN(arr);
    std_val = STDDEV(arr);
    FOREACH item IN ARRAY arr
    LOOP
        a = a + (POW(item - mean, 4) / POW(std_val, 4));
    END LOOP;
    ret = ((1 / (c-1)::DOUBLE PRECISION) * a) - 3;
    RETURN ret;
END;
$function$
;

自定义函数中用到的几个公式

 1、 平均值

2、标准差

3、几何平均值

 

 4、几何标准差

5、偏度系数

6、峰度系数

 

Java代码实现commons-math之常用科学计算

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值