os: centos 7.4.1708
db: postgresql 10.11
pg_trgm 需要3个关键字才能使用到该索引,如果关键字只有1-2个或者更多,那就需要用到 pg_bigm。
pg_bigm 可以理解为 pg_trgm PLUS。
版本
# cat /etc/centos-release
CentOS Linux release 7.4.1708 (Core)
#
#
# yum list installed |grep -i postgresql
postgresql10.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-contrib.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-debuginfo.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-devel.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-docs.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-libs.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-odbc.x86_64 12.00.0000-1PGDG.rhel7 @pgdg10
postgresql10-plperl.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-plpython.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-pltcl.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-server.x86_64 10.11-2PGDG.rhel7 @pgdg10
postgresql10-tcl.x86_64 2.4.0-1.rhel7 @pgdg10
postgresql10-tcl-debuginfo.x86_64 2.3.1-1.rhel7 @pgdg10
postgresql10-test.x86_64 10.11-2PGDG.rhel7 @pgdg10
# su - postgres
Last login: Wed Jan 15 18:34:12 CST 2020 on pts/0
$
$
$ psql -c "select version();"
version
----------------------------------------------------------------------------------------------------------
PostgreSQL 10.11 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-39), 64-bit
(1 row)
下载安装
# su - postgres
$ git clone git://git.osdn.net/gitroot/pgbigm/pg_bigm.git
$ cd pg_bigm/
$ git branch -a
* master
remotes/origin/HEAD -> origin/master
remotes/origin/REL1_0_STABLE
remotes/origin/REL1_1_STABLE
remotes/origin/REL1_2_STABLE
remotes/origin/master
$ git checkout -b REL1_2_STABLE
$ make USE_PGXS=1 PG_CONFIG=/usr/pgsql-10/bin/pg_config
$ exit;
# cd /var/lib/pgsql/pg_bigm/
# make USE_PGXS=1 PG_CONFIG=/usr/pgsql-10/bin/pg_config install
/bin/mkdir -p '/usr/pgsql-10/lib'
/bin/mkdir -p '/usr/pgsql-10/share/extension'
/bin/mkdir -p '/usr/pgsql-10/share/extension'
/bin/install -c -m 755 pg_bigm.so '/usr/pgsql-10/lib/pg_bigm.so'
/bin/install -c -m 644 .//pg_bigm.control '/usr/pgsql-10/share/extension/'
/bin/install -c -m 644 .//pg_bigm--1.2.sql .//pg_bigm--1.1--1.2.sql .//pg_bigm--1.0--1.1.sql '/usr/pgsql-10/share/extension/'
创建 pg_bigm
# vi /var/lib/pgsql/10/data/postgresql.conf
shared_preload_libraries ='pg_bigm'
# systemctl restart postgresql-10.service
# su - postgres
$ psql
postgres=# create extension pg_bigm;
postgres=# \dx
List of installed extensions
Name | Version | Schema | Description
------------------------------+---------+------------+------------------------------------------------------------------------------------------
pg_bigm | 1.2 | public | text similarity measurement and index searching based on bigrams
plpgsql | 1.0 | pg_catalog | PL/pgSQL procedural language
(2 rows)
postgres=# select show_bigm('this is pg_bigm extension');
show_bigm
-----------------------------------------------------------------------------------------
{" e"," i"," p"," t",_b,bi,en,ex,g_,gm,hi,ig,io,is,"m ","n ",ns,on,pg,"s ",si,te,th,xt}
(1 row)
初步使用
postgres=# create table tmp_t0(
id bigint,
name varchar(100),
memo varchar(100)
);
postgres=# insert into tmp_t0
select id,
md5(id::varchar) as name,
md5(md5(id::varchar) ) as memo
from generate_series(1,1000000) as id;
postgres=# create index idx_tmp_t0_name on tmp_t0 using gin(name gin_bigm_ops);
postgres=# explain select * from tmp_t0 where name like '%uuu%';
QUERY PLAN
---------------------------------------------------------------------------------
Bitmap Heap Scan on tmp_t0 (cost=24.77..400.04 rows=100 width=74)
Recheck Cond: ((name)::text ~~ '%uuu%'::text)
-> Bitmap Index Scan on idx_tmp_t0_name (cost=0.00..24.75 rows=100 width=0)
Index Cond: ((name)::text ~~ '%uuu%'::text)
(4 rows)
postgres=# explain select * from tmp_t0 where name like '%uu%';
QUERY PLAN
---------------------------------------------------------------------------------
Bitmap Heap Scan on tmp_t0 (cost=24.77..400.04 rows=100 width=74)
Recheck Cond: ((name)::text ~~ '%uu%'::text)
-> Bitmap Index Scan on idx_tmp_t0_name (cost=0.00..24.75 rows=100 width=0)
Index Cond: ((name)::text ~~ '%uu%'::text)
(4 rows)
postgres=# explain select * from tmp_t0 where name like '%u%';
QUERY PLAN
-----------------------------------------------------------------------------------
Bitmap Heap Scan on tmp_t0 (cost=1732.78..2108.04 rows=100 width=74)
Recheck Cond: ((name)::text ~~ '%u%'::text)
-> Bitmap Index Scan on idx_tmp_t0_name (cost=0.00..1732.75 rows=100 width=0)
Index Cond: ((name)::text ~~ '%u%'::text)
(4 rows)
优秀
参考:
https://pgbigm.osdn.jp/index_en.html
https://pgbigm.osdn.jp/pg_bigm_en-1-2.html
http://git.osdn.net/view?p=pgbigm/pg_bigm.git;a=summary
git://git.osdn.net/gitroot/pgbigm/pg_bigm.git