PostgreSQL计算integer类型的has值

最新推荐文章于 2024-05-08 16:02:39 发布

kmblack1

最新推荐文章于 2024-05-08 16:02:39 发布

阅读量1.3k

点赞数

分类专栏： PostgreSQL二次开发 C Hash code

本文链接：https://blog.csdn.net/kmblack1/article/details/112639278

版权

PostgreSQL二次开发同时被 3 个专栏收录

63 篇文章 11 订阅

订阅专栏

15 篇文章 0 订阅

订阅专栏

Hash code

1 篇文章 0 订阅

订阅专栏

1 出现的问题

从PostgreSQL10以后PostgreSQL自带分区,但使用分区有利有弊,例如以下比较常规的需求.

/****************************************************************************************
	传感器类型
drop table if exists sentypes;
****************************************************************************************/
create table sentypes(
	objectid smallint not null,													 		--唯一编号
	name varchar(32) not null,                                                  		--传感器类型
	unit varchar(16),																	--传感器值计量单位
	constraint pk_sentypes_objectid primary key (objectid),
);
/****************************************************************************************
	传感器采集的数据
drop table if exists sensors;
****************************************************************************************/
create table sensors(
	objectid  bigserial not null,												--唯一编号
	sentype smallint not null,													--传感器类型
	sensn smallint not null,													--同一类型传感器顺序号
	val falot4 not null,														--传感器数据
	generate timestamptz not null,												--上报时间
	constraint pk_sensors_objectid primary key (objectid,sentype),
	constraint fk_sensors_sentype foreign key(sentype) references sentypes(objectid) on delete cascade
) partition by hash(sentype);

--由于需要支持的传感器比较多,假设一共支持255种,因此根据传感器类型分表,分为255个子表
do $$
	declare 
		v_cmd text;
		v_index text;
	begin
		for i in 1..255 loop --这里是不正确的,比如你的分区值范围为1-255,
		--select clac_uint_hash(id,256) from generate_series(1,255) as id group by 1; --256为hash分区表MODULUS的值共157张表
			v_index := to_char(i,'FM000');
			v_cmd := format('
create table sensors%s partition of sensors
   for values with (MODULUS 256, REMAINDER %s)',v_index,v_index);
		execute v_cmd;
		end loop;
	end;
$$;
--正确的方法,一共创建了157张子表
do $$
	declare 
		v_rec record;
		v_cmd text;
		v_index text;
	begin
		for v_rec in (select clac_uint_hash(id,256) as has from generate_series(1,255) as id group by 1 order by 1) loop
			v_index := to_char(v_rec.has,'FM000');
			v_cmd := format('
create table meteorologicals%s partition of meteorologicals
   for values with (MODULUS 256, REMAINDER %s);',v_index,v_index);
		execute v_cmd;
		end loop;
	end;
$$;

使用分区表对于获取指定类型传感器的值分区表确实有效.

--假设传感器类型编号32的是电压传感器,现在想获取1月份8号电压传感器采集的数据
select val,generate from sensors where sentype=32 and sensn=8;

但是当想获取电压(32)\电流(33)\电量(34)最后的数据和最后上报的时间时.

explain (analyze,verbose,costs,buffers,timing) 
select 
	objectid,
	name,
	unit,
	(select row(val,generate,(not (extract(epoch from (now()-generate)) > 3600))) from sensors as t3 where sentype=t1.objectid order by generate desc limit 1)
from sentypes as t1
where objectid=any(array[32,33,34]);

虽然只需要3个种类传感器最新的数据,但执行计划扫描了所有表(255个),虽然走了索引(Bitmap Index Scan)且未执行(never executed),但是最终耗时还是达到了3秒多.(执行计划由于太长,这里只截取了一小部份)

 Bitmap Heap Scan on public.sensors255 t3_254  (cost=4.21..14.42 rows=7 width=40) (never executed)
                             Output: ROW(t3_254.val, t3_254.generate, (date_part('epoch'::text, (now() - t3_254.generate)) <= '3600'::double precision)), t3_254.generate
                             Recheck Cond: (t3_254.sentype = t1.objectid)
                             ->  Bitmap Index Scan on sensors255_xxxx  (cost=0.00..4.21 rows=7 width=0) (never executed)
                                   Index Cond: (t3_254.sentype = t1.objectid)
 Planning Time: 3530.195 ms
 Execution Time: 18.642 ms

2 解决方案

现在明确知道电压(32)\电流(33)\电量(34)的类型值,分别是32\33\34,可以通过计算这几值的hash code,根据hash code直接查询这三个值所在的表.那么现在的问题就是如何计算hash code.

直接用SQL计算hash code部份值会导致bigint越界,执行时异常如下:

ERROR:  bigint out of range
CONTEXT:  PL/pgSQL function inline_code_block line 42 at RAISE
SQL state: 22003

这是由于Postgresql没有无符号整数导致的,越界主要是hash_combine64函数,因为这个函数全部是uint64_t计算的,并且值比较大没有调整的空间.

static inline uint64_t hash_combine64(uint64_t a, uint64_t b) {
	/* 0x49a0f4dd15e5a8e3 is 64bit random data */
	a ^= b + UINT64CONST(0x49a0f4dd15e5a8e3) + (a << 54) + (a >> 7);
	return a;
}

2.1 使用C语言编写计算hashcode的函数

宇宙第一IDE开发PostgreSQL插件
一个PostgreSQL模块至少由函数头文件\函数源文件\控制文件\SQL文件4个文件组成.
函数文件

//pg_function.h
#include <postgres.h>
#include <fmgr.h>
/*计算整数的hash值*/
PGDLLEXPORT Datum clac_uint_hash(PG_FUNCTION_ARGS);

函数源文件

//pg_function.c
#include <stdint.h>
#include "postgres.h"
#include "fmgr.h"
#include "pg_function.h"

PG_FUNCTION_INFO_V1(clac_uint_hash);


#define UINT64CONST(x) (x##ULL)
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
/* Seed for the extended hash function */
#define HASH_PARTITION_SEED UINT64CONST(0x7A5B22367996DCFD)

#define mix(a,b,c) \
{ \
  a -= c;  a ^= rot(c, 4);	c += b; \
  b -= a;  b ^= rot(a, 6);	a += c; \
  c -= b;  c ^= rot(b, 8);	b += a; \
  a -= c;  a ^= rot(c,16);	c += b; \
  b -= a;  b ^= rot(a,19);	a += c; \
  c -= b;  c ^= rot(b, 4);	b += a; \
}

#define final(a,b,c) \
{ \
  c ^= b; c -= rot(b,14); \
  a ^= c; a -= rot(c,11); \
  b ^= a; b -= rot(a,25); \
  c ^= b; c -= rot(b,16); \
  a ^= c; a -= rot(c, 4); \
  b ^= a; b -= rot(a,14); \
  c ^= b; c -= rot(b,24); \
}

uint64_t hash_uint32_extended(uint32_t k, uint64_t seed) {
	register uint32_t a,
		b,
		c;

	a = b = c = 0x9e3779b9 + (uint32_t)sizeof(uint32_t) + 3923095;

	if (seed != 0) {
		a += (uint32_t)(seed >> 32);
		b += (uint32_t)seed;
		mix(a, b, c);
	}

	a += k;

	final(a, b, c);

	/* report the result */
	return (((uint64_t)b << 32) | c);
}

static inline uint64_t hash_combine64(uint64_t a, uint64_t b) {
	/* 0x49a0f4dd15e5a8e3 is 64bit random data */
	a ^= b + UINT64CONST(0x49a0f4dd15e5a8e3) + (a << 54) + (a >> 7);
	return a;
}

Datum clac_uint_hash(PG_FUNCTION_ARGS) {
	int32_t code = PG_GETARG_INT32(0);
	int32_t tabcount = PG_GETARG_INT32(1);
	uint64_t a = 0, b = 0;
	if ((code < 1 || tabcount < 1) )
		ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
			errmsg("function returning ""%s"" fail(""%s"")!", "clacHash",
				"code < 0 or code < 0")
			));
	b = hash_uint32_extended(code, HASH_PARTITION_SEED);
	a = (int32_t)(hash_combine64(a, b) % tabcount);
	PG_RETURN_INT32(a);
}

控制文件
命名规范,xxxx.control,xxxx为编译后的dll或so文件名称,且后缀名必须为.control.
default_version指明了版本,修改这个值也要修改sql文件名称.
module_pathname为

comment = '自定义C风格的函数'
default_version = '1.0'
module_pathname = '$libdir/编译后的dll或so文件名称,不要后缀名,例如xxxx.dll只需要xxxx即可' 
relocatable = true

SQL文件
命名规范,xxxx–1.0.sql,xxxx为编译后的dll或so文件名称,然后是两个中杠,最后是版本(xxxx.control同default_version),且后缀名必须为.sql.

/*
*	计算整数hash code
*	第一个参数为要计算hash code的整数
*	第二个参数为分区表的数量
*/
create function clac_uint_hash(integer,integer)
	returns integer
as '编译后的dll或so文件名称,不要后缀名,例如xxxx.dll只需要xxxx即可', 'clac_uint_hash'
language C immutable strict;

编译成功后将.dll文件复制到%pghome%\lib
编译成功后将xxxx.control和xxxx–1.0.sql文件复制到%pghome%\share\extension
使用postgres用户登录到指定的数据创建扩展

create extension xxxx;

至此所有工作准备就绪,可以使用clac_uint_hash函数了.先计算电压(32)\电流(33)\电量(34)所在的表

select clac_uint_hash(32,256),clac_uint_hash(33,256),clac_uint_hash(34,256);
 clac_uint_hash | clac_uint_hash | clac_uint_hash
----------------+----------------+----------------
             56 |            207 |            128
(1 row)

现在根据计算的值求出数据所在的子表,只需要查询这三个子表就可以获取电压(32)\电流(33)\电量(34)最后的数据和最后上报的时间时.

select * from sensors056; --56 
select * from sensors207; --207
select * from sensors128; --128

2.2 最终实现

create type ctSensorLastInfo as (
	val integer,						--传感器的值
	last timestamptz,				--最后上报时间
	online boolean				--是否在线
);
--ihash:clac_uint_hash计算的hash code
--传感器类型
--设备编号,根据自己的的情况调整
--同一传感器顺序号
create or replace function sensorsLast(ihash integer,isentype smallint,idevid bigint,isensn smallint)
	returns ctSensorLastInfo
as $$
	declare
		v_cmd text;
		v_val integer;
		v_gen timestamptz;
		v_online boolean;
	begin
		v_cmd := format('select val,generate,(not (extract(epoch from (now()-generate)) > 3600)) as last from sensors%s where sentype=$1 and devid=$2 and sensn=$3 order by generate desc limit 1',to_char($1,'FM000'));
		execute v_cmd using $2,$3,$4 into v_val,v_gen,v_online;
		return (v_val,v_gen,v_online)::ctSensorLastInfo;
	end;
$$ language plpgsql;

动态执行字符串SQL需要硬解析SQL,可以考虑采用预备语句的方式执行.对于本案例来说执行时间采用预备语句可以再优化5ms左右,预备语句执行时间在10ms以下.这个问题留给大家思考.

使用方法
round_v3是博主自己写的小数修约函数,采用四舍六入五成双的方法修约,并保留指定位数的小数.如果你使用可以不需要.

explain (analyze,verbose,costs,buffers,timing) 
with cte as(
	select
		(row_number() over(order by t1.devid,t1.sentype,t1.sensn)) as id,
		t2.name,
		t2.unit,
		t1.sentype,
		t1.sensn,
		sensorsLast(clac_uint_hash(t1.sentype,256),t1.sentype,t1.devid,t1.sensn) as l
	from sensors as t1
	inner join sentypes as t2 on t2.objectid=t1.sentype
	where t1.devid=9
)select id,name,sensn,
		format('%s%s',round_v3((l).val/100::float8,2),unit) as val,
		(l).last,
		(l).online
from cte;

这里统计了27种设备,耗时15ms

 id |       name       | sensn |    val     |             last              | online
----+------------------+-------+------------+-------------------------------+--------
.....
 21 | 蓄电池电压       |     1 | 11.35V     | 2021-01-15 08:50:03.037085+08 | t
 22 | 蓄电池电流       |     1 | -0.09A     | 2021-01-15 08:50:03.037085+08 | t
 23 | 蓄电池电量       |     1 | 70.00%     | 2021-01-15 08:50:03.037085+08 | t
 24 | 负载电流         |     1 | 0.20A      | 2021-01-15 08:50:03.037085+08 | t
 25 | 负载电压         |     1 | 11.35V     | 2021-01-15 08:50:03.037085+08 | t
 26 | 负载功率         |     1 | 2.27W      | 2021-01-15 08:50:03.037085+08 | t
 27 | 太阳能控制器温度 |     1 | 12.89℃    | 2021-01-15 08:50:03.037085+08 | t
(27 rows)

执行计划

                                                                                           QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 CTE Scan on cte  (cost=38.01..38.28 rows=9 width=133) (actual time=0.810..13.710 rows=27 loops=1)
   Output: cte.id, cte.name, cte.sensn, format('%s%s'::text, round_v3((((cte.l).val)::double precision / '100'::double precision), 2), cte.unit), (cte.l).last, (cte.l).online
   Buffers: shared hit=1144
   CTE cte
     ->  WindowAgg  (cost=35.49..38.01 rows=9 width=69) (actual time=0.786..13.489 rows=27 loops=1)
           Output: row_number() OVER (?), t2.name, t2.unit, t1.sentype, t1.sensn, sensorslast(clac_uint_hash((t1.sentype)::integer, 256), t1.sentype, (t1.devid)::bigint, t1.sensn), t1.devid
           Buffers: shared hit=1144
           ->  Sort  (cost=35.49..35.52 rows=9 width=29) (actual time=0.076..0.083 rows=27 loops=1)
                 Output: t1.sentype, t1.sensn, t1.devid, t2.name, t2.unit
                 Sort Key: t1.sentype, t1.sensn
                 Sort Method: quicksort  Memory: 27kB
                 Buffers: shared hit=2
                 ->  Hash Join  (cost=33.24..35.35 rows=9 width=29) (actual time=0.045..0.061 rows=27 loops=1)
                       Output: t1.sentype, t1.sensn, t1.devid, t2.name, t2.unit
                       Hash Cond: (t2.objectid = t1.sentype)
                       Buffers: shared hit=2
                       ->  Seq Scan on public.sentypes t2  (cost=0.00..1.78 rows=78 width=23) (actual time=0.013..0.016 rows=78 loops=1)
                             Output: t2.objectid, t2.name, t2.unit
                             Buffers: shared hit=1
                       ->  Hash  (cost=33.13..33.13 rows=9 width=8) (actual time=0.025..0.025 rows=27 loops=1)
                             Output: t1.sentype, t1.sensn, t1.devid
                             Buckets: 1024  Batches: 1  Memory Usage: 10kB
                             Buffers: shared hit=1
                             ->  Seq Scan on public.sensors t1  (cost=0.00..33.13 rows=9 width=8) (actual time=0.012..0.018 rows=27 loops=1)
                                   Output: t1.sentype, t1.sensn, t1.devid
                                   Filter: (t1.devid = 9)
                                   Rows Removed by Filter: 81
                                   Buffers: shared hit=1
 Planning Time: 0.213 ms
 Execution Time: 13.765 ms
(30 rows)

执行时间从3秒多降到15ms

这里虽然使用动态sql查询技术(低效,日常不建议使用),但每种技术都有适合它的场景,优化就是把合适的技术用在合适的地方.

kmblack1

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
PostgreSQL计算integer类型的has值

1 出现的问题从PostgreSQL10以后PostgreSQL自带分区,但使用分区有利有弊,例如以下比较常规的需求./**************************************************************************************** 传感器类型drop table if exists sentypes;*******************************************************************
复制链接

扫一扫