一、创建测试表
测试表1
-- 创建测试表
CREATE TABLE test_table (
id SERIAL PRIMARY KEY,
random_int INTEGER,
random_text TEXT
);
-- 插入随机数据
DO $$
DECLARE
i INTEGER;
random_text_length INTEGER;
random_text_value TEXT;
BEGIN
FOR i IN 1..2000000 LOOP
-- 生成随机长度的文本
random_text_length := 50 + (random() * 150)::INTEGER;
random_text_value := substr(md5(random()::TEXT), 1, random_text_length);
-- 插入随机数据
INSERT INTO test_table (random_int, random_text)
VALUES ((random() * 1000000)::INTEGER, random_text_value);
END LOOP;
END $$;
-- 检查表大小,反复执行上述插入数据,达到自己想要的数据大小
SELECT pg_size_pretty(pg_total_relation_size('test_table')) AS table_size;
-- 收集统计信息
ANALYZE test_table;
测试表2
-- 创建第二个测试表
CREATE TABLE test_table2 (
id SERIAL PRIMARY KEY,
test_table_id INTEGER,
random_value NUMERIC
);
-- 插入随机数据到第二个表
DO $$
DECLARE
i INTEGER;
BEGIN
FOR i IN 1..2000000 LOOP
INSERT INTO test_table2 (test_table_id, random_value)
VALUES ((random() * 2000000)::INTEGER, random() * 1000);
END LOOP;
END $$;
-- 检查表大小,反复执行上述插入数据,达到自己想要的数据大小
SELECT pg_size_pretty(pg_total_relation_size('test_table2')) AS table_size;
-- 收集统计信息
ANALYZE test_table2;
-- 执行复杂查询并查看执行计划
EXPLAIN (ANALYZE, VERBOSE)
SELECT t1.random_int, AVG(t2.random_value) AS avg_value
FROM test_table t1
JOIN test_table2 t2 ON t1.id = t2.test_table_id
GROUP BY t1.random_int
ORDER BY avg_value DESC
LIMIT 10;
并行未开启
test=# EXPLAIN (ANALYZE, VERBOSE)
SELECT t1.random_int, AVG(t2.random_value) AS avg_value
FROM test_table t1
JOIN test_table2 t2 ON t1.id = t2.test_table_id
GROUP BY t1.random_int
ORDER BY avg_value DESC
LIMIT 10;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=595617.69..595617.71 rows=10 width=36) (actual time=4625.134..4625.139 rows=10 loops=1)
Output: t1.random_int, (avg(t2.random_value))
-> Sort (cost=595617.69..597569.46 rows=780708 width=36) (actual time=4625.133..4625.136 rows=10 loops=1)
Output: t1.random_int, (avg(t2.random_value))
Sort Key: (avg(t2.random_value)) DESC
Sort Method: top-N heapsort Memory: 25kB
-> HashAggregate (cost=529925.52..578746.87 rows=780708 width=36) (actual time=3367.912..4521.460 rows=822508 loops=1)
Output: t1.random_int, avg(t2.random_value)
Group Key: t1.random_int
Planned Partitions: 32 Batches: 161 Memory Usage: 8249kB Disk Usage: 160848kB
-> Hash Join (cost=143008.51..273675.52 rows=4000000 width=15) (actual time=790.949..2592.235 rows=3999999 loops=1)
Output: t1.random_int, t2.random_value
Inner Unique: true
Hash Cond: (t2.test_table_id = t1.id)
-> Seq Scan on public.test_table2 t2 (cost=0.00..65478.00 rows=4000000 width=15) (actual time=0.011..345.404 rows=4000000 loops=1)
Output: t2.random_value, t2.test_table_id
-> Hash (cost=77383.78..77383.78 rows=3999978 width=8) (actual time=790.439..790.440 rows=4000000 loops=1)
Output: t1.random_int, t1.id
Buckets: 262144 Batches: 32 Memory Usage: 6921kB
-> Seq Scan on public.test_table t1 (cost=0.00..77383.78 rows=3999978 width=8) (actual time=0.007..367.693 rows=4000000 loops=1)
Output: t1.random_int, t1.id
Query Identifier: 1846774944933342204
Planning Time: 0.375 ms
Execution Time: 4625.744 ms
(24 rows)
test=#
二、并行查询
并行查询的适用条件
并行查询在PostgreSQL中是一项可以显著提高查询性能的功能,但其使用受到多种因素的限制。以下是一些关键的配置和条件,它们决定了并行查询是否可以被应用:
必要配置
-
max_parallel_workers_per_gather必须设置为大于零的值。这意味着至少有一个并行工作者可以被用于并行查询计划的执行。
-
系统不能处于单用户模式。在单用户模式下,整个数据库系统作为单一进程运行,因此无法启动背景工作者进程。
不适用情况
即使并行查询计划理论上可以生成,但如果出现以下情况之一,查询优化器将不会生成并行计划:
-
查询涉及数据写入或行级锁:如果查询包含数据修改操作(无论是顶级操作还是公共表表达式(CTE)内的操作),则不会为该查询生成并行计划。例外情况是创建新表并填充数据的命令,这些命令可以使用并行计划:
CREATE TABLE … AS
SELECT INTO
CREATE MATERIALIZED VIEW
REFRESH MATERIALIZED VIEW -
查询可能在执行过程中被挂起:如果系统认为查询的执行可能会被部分或增量的中断,那么不会生成并行计划。例如,使用DECLARE CURSOR创建的游标永远不会使用并行计划。同样地,形式如FOR x IN query LOOP … END LOOP的PL/pgSQL循环也不会使用并行计划,因为并行查询系统无法确保循环代码在并行查询活跃时安全执行。
-
查询使用了标记为PARALLEL UNSAFE的函数:大多数系统定义的函数是PARALLEL SAFE的,但用户定义的函数默认被标记为PARALLEL UNSAFE。详情请参考第15.4节。
-
查询在另一个已经并行的查询内部运行:例如,如果一个并行查询调用的函数自身发出SQL查询,那么该查询将不会使用并行计划。这是一个当前实现的限制,而且可能不希望移除这一限制,以免单个查询使用过多的进程。
执行时的限制
即使为特定查询生成了并行查询计划,在执行时也可能因以下情况之一而无法并行执行:
-
背景工作者不足:如果由于max_worker_processes的限制,无法获取到足够的背景工作者。
-
并行工作者数量超出限制:如果由于max_parallel_workers的限制,无法获取到足够的并行工作者。
-
客户端发送带有非零获取计数的Execute消息:这通常发生在不依赖libpq的客户端中。如果这种情况频繁发生,可以考虑在可能发生串行执行的会话中将max_parallel_workers_per_gather设置为零,以避免生成在串行执行时可能次优的查询计划。
并行查询配置
以下是需要修改的地方,建议根据个人情况进行配置
# 修改以下参数
max_parallel_workers_per_gather = 4;
max_parallel_workers = 16;
max_worker_processes = 16;
parallel_setup_cost = 1000;
parallel_tuple_cost = 0.1;
min_parallel_table_scan_size = '1MB';
min_parallel_index_scan_size = '1MB';
jit = on;
更改过后的执行计划
-- 执行复杂查询并查看执行计划
EXPLAIN (ANALYZE, VERBOSE)
SELECT t1.random_int, AVG(t2.random_value) AS avg_value
FROM test_table t1
JOIN test_table2 t2 ON t1.id = t2.test_table_id
GROUP BY t1.random_int
ORDER BY avg_value DESC
LIMIT 10;
test=# EXPLAIN (ANALYZE, VERBOSE)
SELECT t1.random_int, AVG(t2.random_value) AS avg_value
FROM test_table t1
JOIN test_table2 t2 ON t1.id = t2.test_table_id
GROUP BY t1.random_int
ORDER BY avg_value DESC
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=376576.04..376576.07 rows=10 width=36) (actual time=3425.935..3454.169 rows=10 loops=1)
Output: t1.random_int, (avg(t2.random_value))
-> Sort (cost=376576.04..378527.81 rows=780708 width=36) (actual time=3425.934..3454.167 rows=10 loops=1)
Output: t1.random_int, (avg(t2.random_value))
Sort Key: (avg(t2.random_value)) DESC
Sort Method: top-N heapsort Memory: 25kB
-> Finalize GroupAggregate (cost=232414.56..359705.22 rows=780708 width=36) (actual time=1536.253..3322.661 rows=822508 loops=1)
Output: t1.random_int, avg(t2.random_value)
Group Key: t1.random_int
-> Gather Merge (cost=232414.56..326525.13 rows=3122832 width=36) (actual time=1536.246..2242.094 rows=1513177 loops=1)
Output: t1.random_int, (PARTIAL avg(t2.random_value))
Workers Planned: 4
Workers Launched: 4
-> Partial GroupAggregate (cost=232314.50..249573.30 rows=780708 width=36) (actual time=1460.530..1877.445 rows=302635 loops=5)
Output: t1.random_int, PARTIAL avg(t2.random_value)
Group Key: t1.random_int
Worker 0: actual time=1452.893..1829.912 rows=269927 loops=1
Worker 1: actual time=1447.372..1949.316 rows=345027 loops=1
Worker 2: actual time=1420.501..1809.073 rows=265385 loops=1
Worker 3: actual time=1445.882..1859.911 rows=285168 loops=1
-> Sort (cost=232314.50..234814.48 rows=999994 width=15) (actual time=1460.510..1574.450 rows=800000 loops=5)
Output: t1.random_int, t2.random_value
Sort Key: t1.random_int
Sort Method: external merge Disk: 24008kB
Worker 0: actual time=1452.868..1557.530 rows=675843 loops=1
Sort Method: external merge Disk: 16712kB
Worker 1: actual time=1447.354..1594.915 rows=929370 loops=1
Sort Method: external merge Disk: 22976kB
Worker 2: actual time=1420.478..1529.668 rows=683334 loops=1
Sort Method: external merge Disk: 16896kB
Worker 3: actual time=1445.860..1563.544 rows=740113 loops=1
Sort Method: external merge Disk: 18304kB
-> Parallel Hash Join (cost=63790.87..115566.80 rows=999994 width=15) (actual time=661.549..1104.918 rows=800000 loops=5)
Output: t1.random_int, t2.random_value
Inner Unique: true
Hash Cond: (t2.test_table_id = t1.id)
Worker 0: actual time=675.613..1100.772 rows=675843 loops=1
Worker 1: actual time=654.632..1117.668 rows=929370 loops=1
Worker 2: actual time=675.644..1097.117 rows=683334 loops=1
Worker 3: actual time=637.896..1088.461 rows=740113 loops=1
-> Parallel Seq Scan on public.test_table2 t2 (cost=0.00..35477.94 rows=999994 width=15) (actual time=0.025..135.608 rows=800000 loops=5)
Output: t2.random_value, t2.test_table_id
Worker 0: actual time=0.035..155.472 rows=828960 loops=1
Worker 1: actual time=0.030..132.829 rows=1002916 loops=1
Worker 2: actual time=0.030..118.347 rows=884021 loops=1
Worker 3: actual time=0.022..153.351 rows=538824 loops=1
-> Parallel Hash (cost=47383.94..47383.94 rows=999994 width=8) (actual time=316.469..316.470 rows=800000 loops=5)
Output: t1.random_int, t1.id
Buckets: 262144 Batches: 32 Memory Usage: 7008kB
Worker 0: actual time=317.067..317.068 rows=878596 loops=1
Worker 1: actual time=322.550..322.550 rows=688224 loops=1
Worker 2: actual time=297.070..297.071 rows=656017 loops=1
Worker 3: actual time=326.658..326.659 rows=1000450 loops=1
-> Parallel Seq Scan on public.test_table t1 (cost=0.00..47383.94 rows=999994 width=8) (actual time=0.016..120.297 rows=800000 loops=5)
Output: t1.random_int, t1.id
Worker 0: actual time=0.013..132.002 rows=878596 loops=1
Worker 1: actual time=0.019..95.735 rows=688224 loops=1
Worker 2: actual time=0.022..98.217 rows=656017 loops=1
Worker 3: actual time=0.015..138.631 rows=1000450 loops=1
Query Identifier: 1846774944933342204
Planning Time: 0.183 ms
Execution Time: 3454.233 ms
(62 rows)
如此就实现了并行查询。