目录
前言
用于哈希分布策略的哈希函数由列的数据类型的哈希运算符类定义。 由于默认的数据库使用数据类型的默认哈希运算符类,因此用于哈希连接和哈希聚合的运算符类相同,适用于大多数用例。 但是,您可以在DISTRIBUTED BY
子句中声明非默认的哈希运算符类。
一、创建自定义哈希运算符
此示例为整数数据类型创建自定义哈希运算符类,该类用于提高查询性能。 运算符类比较整数的绝对值。创建一个函数和一个等于运算符,如果两个整数的绝对值相等,则返回true。
CREATE FUNCTION abseq(int, int) RETURNS BOOL AS
$$
begin return abs($1) = abs($2); end;
$$ LANGUAGE plpgsql STRICT IMMUTABLE;
CREATE OPERATOR |=| (
PROCEDURE = abseq,
LEFTARG = int,
RIGHTARG = int,
COMMUTATOR = |=|,
hashes, merges);
创建一个使用运算符的哈希函数和运算符类:
CREATE FUNCTION abshashfunc(int) RETURNS int AS
$$
begin return hashint(abs($1)); end;
$$ LANGUAGE plpgsql STRICT IMMUTABLE;
CREATE OPERATOR CLASS abs_int_hash_ops FOR TYPE int4
USING hash AS
OPERATOR 1 |=|,
FUNCTION 1 abshashfunc(int);
并且,为它们创建小于和大于运算符和B树运算符类:
CREATE FUNCTION abslt(int, int) RETURNS BOOL AS
$$
begin return abs($1) < abs($2); end;
$$ LANGUAGE plpgsql STRICT IMMUTABLE;
CREATE OPERATOR |<| (
PROCEDURE = abslt,
LEFTARG = int,
RIGHTARG = int);
CREATE FUNCTION absgt(int, int) RETURNS BOOL AS
$$
begin return abs($1) > abs($2); end;
$$ LANGUAGE plpgsql STRICT IMMUTABLE;
CREATE OPERATOR |>| (
PROCEDURE = absgt,
LEFTARG = int,
RIGHTARG = int);
CREATE FUNCTION abscmp(int, int) RETURNS int AS
$$
begin return btint4cmp(abs($1),abs($2)); end;
$$ LANGUAGE plpgsql STRICT IMMUTABLE;
CREATE OPERATOR CLASS abs_int_btree_ops FOR TYPE int4
USING btree AS
OPERATOR 1 |<|,
OPERATOR 3 |=|,
OPERATOR 5 |>|,
FUNCTION 1 abscmp(int, int);
现在,可以在表中使用自定义哈希运算符类:
CREATE TABLE atab (a int) DISTRIBUTED BY (a abs_int_hash_ops);
CREATE TABLE btab (b int) DISTRIBUTED BY (b abs_int_hash_ops);
INSERT INTO atab VALUES (-1), (0), (1);
INSERT INTO btab VALUES (-1), (0), (1), (2);