join大表和小表对查询性能的影响和索引类型的选择

join大表和小表对查询性能的影响和索引类型的选择

示例数据库采用PostgreSQL

1.创建数据表和相关函数

drop function if exists gen_random_gps(bigint);
drop function if exists gps_save(bigint,bigint,timestamptz,geometry);
drop function if exists gen_random_plate(int,int);
drop function if exists gen_random_az(int);
drop function if exists gen_random_num(int);
drop table if exists  vehgps;
drop table if exists  vehicles;

/****************************************************************************************
    车辆基本信息表
drop table if exists  vehicles;
****************************************************************************************/
create table vehicles(
    objectid int not null,                       --唯一编号
    numplate text not null,                      --车牌号
    describe text,                               --备注
  generate timestamptz default now() not null, --注册时间
    constraint pk_vehicles_objectid primary key (objectid)
);
create index idx_vehicles_numplate on vehicles(numplate);
/****************************************************************************************
    车辆GPS轨迹表
drop table if exists  vehiclegps;
****************************************************************************************/
create table if not exists vehiclegps(
    objectid bigint not null,                           --唯一编号
    vehid int not null,                                 --车辆编号,外键
    generate timestamptz default now() not null,        --GPS时间,实际应用应按此字段分表
    constraint pk_vehiclegps_objectid primary key (objectid),
    constraint fk_vehiclegps_vehid foreign key(vehid)  references vehicles(objectid) on delete cascade
);
create index idx_vehiclegps_vehid on vehiclegps(vehid); --车辆编号
select AddGeometryColumn ('vehiclegps','geom',4326,'POINT',2); -- GPS位置,GPS采用4326 ,类型为点,二维坐标
create index idx_vehiclegps_generate on vehiclegps using brin(generate)  with (pages_per_range='1');
/****************************************************************************************
    车辆基本信息函数
drop function if exists gen_random_plate(int,int);
drop function if exists gen_random_az(int);
drop function if exists gen_random_num(int);
****************************************************************************************/
--生成指定位数的数字
create or replace function gen_random_num(int)
    returns text
as $$
	select string_agg(chr((random()*(57-48)+48 )::integer) , '')  from generate_series(1,$1);
$$ language sql;
--生成指定位数的大写A-Z
create or replace function gen_random_az(int)
    returns text
as $$
	select string_agg(chr((random()*(90-65)+65 )::integer) , '')  from generate_series(1,$1);
$$ language sql;
--随机生成车牌号
create or replace function gen_random_plate(int default 1,int default 5)
    returns text
as $$
	with cte as(
		select array['浙','粤','京','津','冀','晋','蒙','辽','黑','沪','吉','苏','皖','赣','鲁','豫','鄂','湘','桂','琼','渝','川','贵','云','藏','陕','甘','青','宁'] as provinces
	),provinces as(
		select array_to_string(array[provinces[(random()*(29-1)+1)::integer],gen_random_az($1)],'') as pro,
				gen_random_num($2) as num
			from cte
	) select array_to_string(array[pro,num],'-') from provinces
$$ language sql;
/****************************************************************************************
    随机插入车辆GPS轨迹函数,调用一次插件1千条记录,gps时间控制在2018-01-01至2018-04-16之间
drop function if exists gen_random_gps(bigint);
drop function if exists gps_save(bigint,bigint,timestamptz,geometry);
****************************************************************************************/
create or replace function gen_random_gps(bigint)
    returns table(objectid bigint,vehid bigint,generate timestamptz,geom geometry)
as $$
	with cte as(
		select start,(start + 999) as end,
			extract(epoch from '2018-01-01'::timestamptz) as sdate,
			extract(epoch from '2018-04-16'::timestamptz) as edata,
			(select array[min(objectid),max(objectid)] from vehicles) as vehid			
		from (select $1 as start) as tmp
	),points as(
		select id,
			((random()*(vehid[2]-vehid[1])+vehid[1])::bigint) as vehid, --车辆id是连续的才这么干
			to_timestamp((random()*(edata-sdate)+sdate)) as gpstime,
			ST_SetSRID(ST_Point(
				round((random()*(135.085831-73.406586)+73.406586)::numeric,6),
				round((random()*(53.880950-3.408477)+3.408477)::numeric,6)
			),4326) as geom
		from cte,generate_series(cte.start,cte.end) as id
	)select * from points
$$ language sql;
--保存GPS轨迹
create or replace function gps_save(bigint,bigint,timestamptz,geometry)
    returns bigint
as $$
	insert into vehiclegps(objectid,vehid,generate,geom) values($1,$2,$3,$4) returning objectid;
$$ language sql;

2.插入测试数据

/****************************************************************************************
    随机插入车辆基本信息测试数据(50W)
****************************************************************************************/
insert into vehicles  select id, gen_random_plate() from generate_series(1,500000) as id;
/****************************************************************************************
    随机插入车辆GPS轨迹测试数据(10000000W)
****************************************************************************************/
do $$
	declare
		v_id bigint;
	begin
		for i in 1..10000 loop
			select max(id) into v_id from (select gps_save(objectid,vehid,generate,geom) as id from gen_random_gps((i-1)*1000 + 1)) as tmp;
			raise notice  '%', v_id;
		end loop;
	end;
$$;

为了不影响执行计划数据插入完成后运行vacuum

vacuum full freeze VERBOSE vehicles;
vacuum full freeze VERBOSE vehiclegps;

3.在本案例中b树索引和brin范围索引比较

3.1 测试brin类型索引

在本案例中,因为单位时间内的gps数据非常密集,brin类型索引只有在一个非常小的范围内(大约3小时以内的数据,返回的数据约为14000行,执行时间约为360ms,以实测为准,误差基本不会太大)才有效(注意pages_per_range已经设置为1).

drop index if exists  idx_vehiclegps_generate;
create index idx_vehiclegps_generate on vehiclegps using brin(generate) with (pages_per_range='1');
--测试获取3小时25分内的数据
explain (analyze,verbose,costs,buffers,timing)
select * from vehiclegps
where generate>=('2018-01-01 12:00:00'::timestamptz) and generate<('2018-01-01 15:25:00'::timestamptz)
--测试获取一天内的数据
explain (analyze,verbose,costs,buffers,timing)
select * from vehiclegps
where generate>=('2018-01-01 12:00:00'::timestamptz) and generate<('2018-01-01 15:25:00'::timestamptz)

3.2 测试B树索引

在本案例中,B对索引表现优于brin.

drop index if exists  idx_vehiclegps_generate;
create index idx_vehiclegps_generate on vehiclegps(generate);
--测试获取3小时25分内的数据
explain (analyze,verbose,costs,buffers,timing)
select * from vehiclegps
where generate>=('2018-01-01 12:00:00'::timestamptz) and generate<('2018-01-01 15:25:00'::timestamptz)
--测试获取一天内的数据
explain (analyze,verbose,costs,buffers,timing)
select * from vehiclegps
where generate>=('2018-01-01 12:00:00'::timestamptz) and generate<('2018-01-01 15:25:00'::timestamptz)

本章小结

  • 任何事物都有适用范围,实际如何使用要根据业务需求测试后选择.
  • brin类型的索引适用于数据比较稀疏,跨度比较大的应用
  • B树索引适用于大多数应用

4.join查询

根据第3章的结论,后面的示例采用B树索引.

4.1 大表join小表

/****************************************************************************************
    大表join小表
****************************************************************************************/
explain (analyze,verbose,costs,buffers,timing)
select gps.* from vehiclegps as gps
inner join vehicles as veh on veh.objectid=gps.vehid
where gps.generate>=('2018-01-01'::timestamptz) and gps.generate<('2018-01-02'::timestamptz)

4.2 小表join大表

explain (analyze,verbose,costs,buffers,timing)
select gps.* from vehicles as veh
inner join vehiclegps as gps on gps.vehid=veh.objectid
where gps.generate>=('2018-01-01'::timestamptz) and gps.generate<('2018-01-02'::timestamptz)

本章小结

  • 基本没有区别.
阅读更多
上一篇数据表标志位对查询性能的影响
下一篇从程序里使用Postgresql复杂数据类型
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页

关闭
关闭