LightDB分布式与集中式部分场景性能对比

最新推荐文章于 2024-07-20 00:05:47 发布

追魂曲

最新推荐文章于 2024-07-20 00:05:47 发布

阅读量68

点赞数

分类专栏： postgresql LightDB 文章标签：分布式数据库 sql

本文链接：https://blog.csdn.net/qq_22066003/article/details/128440822

版权

LightDB 同时被 2 个专栏收录

34 篇文章 1 订阅

订阅专栏

postgresql

32 篇文章 0 订阅

订阅专栏

创建分布式表

lightdb@postgres=# create table canopy_table(id int primary key,v text);
CREATE TABLE
lightdb@postgres=# select create_distributed_table('canopy_table','id');
 create_distributed_table 
--------------------------
 
(1 row)

lightdb@postgres=# create table canopy_table_detail(id int primary key,v text,branch_id varchar(100));
CREATE TABLE
lightdb@postgres=# create table canopy_table_branch(v text,branch_id varchar(100));
CREATE TABLE
lightdb@postgres=# select create_distributed_table('canopy_table_detail','id');
 create_distributed_table 
--------------------------
 
(1 row)

lightdb@postgres=# select create_reference_table('canopy_table_branch');
 create_reference_table 
------------------------
 
(1 row)

插入数据

lightdb@postgres=# insert into canopy_table select id, uuid() from generate_series(1,10000000) id;
INSERT 0 10000000
lightdb@postgres=# insert into canopy_table_detail select id, uuid(),id % 1000 from generate_series(1,1000000) id;
INSERT 0 1000000
lightdb@postgres=# insert into canopy_table_branch select uuid(),id from generate_series(1,1000) id;
INSERT 0 1000

执行一个比较复杂的查询sql

lightdb@postgres=# explain analyze select b.branch_id,max(a.id),count(1),max(a.v) from canopy_table a,canopy_table_detail b,canopy_table_branch c where a.id=b.id and b.branch_id = c.branch_id group by b.branch_id;
                                                                                                 QUERY PLAN                                                                                                  
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 HashAggregate  (cost=1000.00..1003.50 rows=200 width=262) (actual time=353.692..354.317 rows=999 loops=1)
   Group Key: remote_scan.branch_id
   Batches: 1  Memory Usage: 593kB
   ->  Custom Scan (Canopy Adaptive)  (cost=0.00..0.00 rows=100000 width=262) (actual time=323.179..331.211 rows=31968 loops=1)
         Task Count: 32
         Tuple data received from nodes: 1464 kB
         Tasks Shown: One of 32
         ->  Task
               Tuple data received from node: 46 kB
               Node: host=192.168.237.136 port=5436 dbname=postgres
               ->  HashAggregate  (cost=2935.07..2945.07 rows=1000 width=47) (actual time=258.244..258.743 rows=999 loops=1)
                     Group Key: b.branch_id
                     Batches: 1  Memory Usage: 321kB
                     ->  Hash Join  (cost=33.30..2622.81 rows=31226 width=44) (actual time=1.114..193.310 rows=31197 loops=1)
                           Hash Cond: ((b.branch_id)::text = (c.branch_id)::text)
                           ->  Merge Join  (cost=0.80..2160.95 rows=31226 width=44) (actual time=0.071..152.280 rows=31226 loops=1)
                                 Merge Cond: (a.id = b.id)
                                 ->  Index Scan using canopy_table_pkey_102053 on canopy_table_102053 a  (cost=0.42..8467.58 rows=312477 width=41) (actual time=0.035..27.941 rows=31227 loops=1)
                                 ->  Index Scan using canopy_table_detail_pkey_102085 on canopy_table_detail_102085 b  (cost=0.29..848.68 rows=31226 width=7) (actual time=0.032..50.230 rows=31226 loops=1)
                           ->  Hash  (cost=20.00..20.00 rows=1000 width=3) (actual time=1.019..1.020 rows=1000 loops=1)
                                 Buckets: 1024  Batches: 1  Memory Usage: 44kB
                                 ->  Seq Scan on canopy_table_branch_102104 c  (cost=0.00..20.00 rows=1000 width=3) (actual time=0.017..0.334 rows=1000 loops=1)
                   Planning Time: 3.273 ms
                   Execution Time: 259.489 ms
 Planning Time: 2.154 ms
 Execution Time: 356.801 ms
(26 rows)

创建对应的非分布式表，然后对比性能

lightdb@postgres=# create table canopy_table_detail_classic(id int primary key,v text,branch_id varchar(100));
CREATE TABLE
lightdb@postgres=# create table canopy_table_branch_classic(v text,branch_id varchar(100));
CREATE TABLE
lightdb@postgres=# create table canopy_table_classic(id int primary key,v text);
CREATE TABLE
lightdb@postgres=# insert into canopy_table_branch_classic select uuid(),id from generate_series(1,1000) id;
INSERT 0 1000
lightdb@postgres=# insert into canopy_table_detail_classic select id, uuid(),id % 1000 from generate_series(1,1000000) id;
INSERT 0 1000000
lightdb@postgres=# insert into canopy_table_classic select id, uuid() from generate_series(1,10000000) id;
INSERT 0 10000000

执行相同的查询语句

explain analyze select b.branch_id,max(a.id),count(1),max(a.v) from canopy_table_classic a,canopy_table_detail_classic b,canopy_table_branch_classic c where a.id=b.id and b.branch_id = c.branch_id group by b.branch_id;
                                                                                             QUERY PLAN                                                                                             
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 HashAggregate  (cost=92587.26..92597.26 rows=1000 width=47) (actual time=2950.792..2951.208 rows=999 loops=1)
   Group Key: b.branch_id
   Batches: 1  Memory Usage: 321kB
   ->  Hash Join  (cost=34.54..82587.26 rows=1000000 width=44) (actual time=0.773..2217.860 rows=999000 loops=1)
         Hash Cond: ((b.branch_id)::text = (c.branch_id)::text)
         ->  Merge Join  (cost=2.04..68804.76 rows=1000000 width=44) (actual time=0.071..1717.877 rows=1000000 loops=1)
               Merge Cond: (a.id = b.id)
               ->  Index Scan using canopy_table_classic_pkey on canopy_table_classic a  (cost=0.43..270879.42 rows=9999999 width=41) (actual time=0.017..538.585 rows=1000001 loops=1)
               ->  Index Scan using canopy_table_detail_classic_pkey on canopy_table_detail_classic b  (cost=0.42..27091.42 rows=1000000 width=7) (actual time=0.045..485.613 rows=1000000 loops=1)
         ->  Hash  (cost=20.00..20.00 rows=1000 width=3) (actual time=0.689..0.691 rows=1000 loops=1)
               Buckets: 1024  Batches: 1  Memory Usage: 44kB
               ->  Seq Scan on canopy_table_branch_classic c  (cost=0.00..20.00 rows=1000 width=3) (actual time=0.035..0.441 rows=1000 loops=1)
 Planning Time: 1.486 ms
 Execution Time: 2951.499 ms
(14 rows)