PostgreSQL 在子查询返回多列

最新推荐文章于 2024-06-25 18:20:04 发布

kmblack1

最新推荐文章于 2024-06-25 18:20:04 发布

阅读量2.8k

点赞数

分类专栏： postgresql

本文链接：https://blog.csdn.net/kmblack1/article/details/102846754

版权

postgresql 专栏收录该内容

95 篇文章 14 订阅

订阅专栏

子查询返回多列可以使用join的方式,但有些需求join效率不如在子查询中返回多列那么好.具体使用子查询中返回多列还是join请根据需求和执行计划决定.

快速阅读请从第3节开始.

1 创建测试表

drop table if exists deppeoples;
drop table if exists departments;
drop type if exists deppeoples01;
/****************************************************************************************
	部门表
****************************************************************************************/
create table departments(
	objectid serial not null,					--唯一编号
	parentid integer not null,					--上级部门,指向本表的objectid,0表示最顶级部门
	name text not null,							--部门名称
	describe text,								--部门备注
	generate timestamptz default(now()) not null, --创建时间
	state integer default(2) not null,			--状态.0已无效,(1<<1)正常
	constraint pk_departments_objectid primary key(objectid) with (fillfactor=80)
) with (fillfactor=80);
create index idx_departments_parentid on departments(parentid) with (fillfactor=80);

/****************************************************************************************
	部门人员表
****************************************************************************************/
create table deppeoples(
	objectid bigserial not null,				--唯一编号
	depid integer not null,						--部门编号,外键(departments->objectid,级联删除)
	name text not null,							--姓名
	title integer not null,						--职务.来自字典,测试数据用1-10分别表示,数据越大职务越高
	tel text not null,							--联系电话
	sex integer not null,						--性别.来自字典,测试数据随机生成
	national integer not null,					--民族.来自字典
	mail text,									--邮箱
	describe text,								--备注
	generate timestamptz default(now()) not null, --创建时间
	state integer default(2) not null,			--状态.0已无效,(1<<1)正常
	constraint pk_deppeoples_objectid primary key(objectid) with (fillfactor=80),
	constraint fk_deppeoples_contid foreign key(depid) references departments(objectid) on delete cascade
) with (fillfactor=80);
create index idx_deppeoples_depid on deppeoples(depid,title) with (fillfactor=80);

2 测试数据

2.1 部门测试数据

创建"xxxxxx集团公司",在"xxxxxx集团公司"下创建100个部门,部门名称在数据1514736000-1546272000之间随机生成.

insert into departments(parentid,name) values(0,'xxxxxx集团公司');
insert into departments(parentid,name)
	select
		1 as parentid,
		(random()*(1546272000-1514736000)+1514736000)::bigint as name
	from generate_series(1,999);

2.2 部门人员测试数据

创建100万部门人员,随机分布在各个部门,人员名称在数据1514736000-1546272000之间随机生成.同时每个部门的最高领导只有1人.

do $$
	declare
		v_start bigint;
		v_end bigint;
	begin
		for i in 1..1000 loop
			v_start := (i-1)*1000 + 1;
			v_end := v_start + 999;

			insert into deppeoples(depid,name,title,tel,sex,national)
				select
					(random()*(100-1)+1)::integer as depid,
					(random()*(1546272000-1514736000)+1514736000)::bigint as name,
					(random()*(9-1)+1)::integer as title,
					(random()*(13999999999-13000000000)+13000000000)::bigint as tel,
					(random()*(3-1)+1)::integer as sex,
					(random()*(54-1)+1)::integer as national
				from generate_series(v_start,v_end);
				raise notice  '%,%', v_start,v_end;
		end loop;
	end;
$$;

2.3 生成每个部门的最高领导

insert into deppeoples(depid,name,title,tel,sex,national)
	select
		objectid as depid,
		(random()*(1546272000-1514736000)+1514736000)::bigint as name,
		10 as title,
		(random()*(13999999999-13000000000)+13000000000)::bigint as tel,
		(random()*(3-1)+1)::integer as sex,
		(random()*(54-1)+1)::integer as national
	from departments;

2.4 测试数据vacuum

为保证测试的准确性,生成完成后运行vacuum.

vacuum  freeze verbose analyze departments;
vacuum  freeze verbose analyze deppeoples;

3 查询各部门的最高领导

3.1 join方式

--禁用并行
set max_parallel_workers_per_gather=0;

explain (analyze,verbose,costs,buffers,timing)
select
	t1.name as department,
	t2.name,t2.title,t2.tel
from departments as t1
left join deppeoples as t2 on t2.depid=t1.objectid
where t2.title=10;

执行5次,取最后一次,可以看到left join共扫描了3012页,用时3.296 ms

                                                                     QUERY PLAN                                                                      
-----------------------------------------------------------------------------------------------------------------------------------------------------
 Nested Loop  (cost=0.42..7796.00 rows=1 width=38) (actual time=3.257..3.257 rows=0 loops=1)
   Output: t1.name, t2.name, t2.title, t2.tel
   Buffers: shared hit=3012
   ->  Seq Scan on public.departments t1  (cost=0.00..21.00 rows=1000 width=15) (actual time=0.009..0.200 rows=1000 loops=1)
         Output: t1.objectid, t1.parentid, t1.name, t1.describe, t1.generate, t1.state
         Buffers: shared hit=11
   ->  Index Scan using idx_deppeoples_depid on public.deppeoples t2  (cost=0.42..7.76 rows=1 width=31) (actual time=0.003..0.003 rows=0 loops=1000)
         Output: t2.objectid, t2.depid, t2.name, t2.title, t2.tel, t2.sex, t2."national", t2.mail, t2.describe, t2.generate, t2.state
         Index Cond: ((t2.depid = t1.objectid) AND (t2.title = 10))
         Buffers: shared hit=3001
 Planning Time: 0.314 ms
 Execution Time: 3.296 ms
(12 rows)

3.2 子查询返回多列的方式

--重点创建row类型,定义输入的列名称和类型
drop type if exists deppeoples01;
create type deppeoples01 as (name1 text,title1 integer,tel1 text);

explain (analyze,verbose,costs,buffers,timing)
with cte as(
	select
		t1.name as department,
		(select row(name,title,tel)::deppeoples01 as r from deppeoples as t2 where t2.depid=t1.objectid and title=10 order by objectid desc limit 1)
	from departments as t1
)select department,(r).name1,(r).title1,(r).tel1 from cte;

执行5次,取最后一次,可以看到子查询共扫描了3012页,用时8.827 ms

                                                                                QUERY PLAN                                                                                 
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 CTE Scan on cte  (cost=8481.00..8501.00 rows=1000 width=100) (actual time=0.042..8.584 rows=1000 loops=1)
   Output: cte.department, (cte.r).name1, (cte.r).title1, (cte.r).tel1
   Buffers: shared hit=3012
   CTE cte
     ->  Seq Scan on public.departments t1  (cost=0.00..8481.00 rows=1000 width=43) (actual time=0.036..7.363 rows=1000 loops=1)
           Output: t1.name, (SubPlan 1)
           Buffers: shared hit=3012
           SubPlan 1
             ->  Limit  (cost=8.46..8.46 rows=1 width=40) (actual time=0.006..0.006 rows=0 loops=1000)
                   Output: (ROW(t2.name, t2.title, t2.tel)::deppeoples01), t2.objectid
                   Buffers: shared hit=3001
                   ->  Sort  (cost=8.46..8.46 rows=1 width=40) (actual time=0.005..0.005 rows=0 loops=1000)
                         Output: (ROW(t2.name, t2.title, t2.tel)::deppeoples01), t2.objectid
                         Sort Key: t2.objectid DESC
                         Sort Method: quicksort  Memory: 25kB
                         Buffers: shared hit=3001
                         ->  Index Scan using idx_deppeoples_depid on public.deppeoples t2  (cost=0.42..8.45 rows=1 width=40) (actual time=0.003..0.003 rows=0 loops=1000)
                               Output: ROW(t2.name, t2.title, t2.tel)::deppeoples01, t2.objectid
                               Index Cond: ((t2.depid = t1.objectid) AND (t2.title = 10))
                               Buffers: shared hit=3001
 Planning Time: 0.237 ms
 Execution Time: 8.827 ms
(22 rows)

4 小结

在本例中没有过多的优化,主要说明子查询返回多列效果,用join时如果有多个最高部门领导的话效率不如子查询;
在本列3.2节中介绍了子查询返回多列的用法.重点为定义类型,然后用row把输出列包装起来,然后转换为定义的类型,定义的类型要和row中输出列类型完全一至,类型名称可以和列名称可也相同也可以不同;
网上比较普遍的声音说是不要使用子查询,实际应该结合自己的需求和执行计划决定采用那种;
不要偏听偏信,适合自己的才是最好的.