数据治理开发经常需要查询Greenplum的所有database、schema、table、partition等信息用于元数据信息分析、表结构同步等。
1.查询数据库信息
--查看数据库版本
select version();
# PostgreSQL 9.4.24 (Greenplum Database 6.18.2 build commit:1242aadf0137d3b26ee42c80e579e78bd7a805c7 Open Source) on x86_64-unknown-linux-gnu, compiled by gcc (GCC) 6.4.0, 64-bit compiled on Nov 10 2021 00:17:26
-- 查询所有的database,不显示自带的库
select datname from pg_database where datname not in ('template1','template0','postgres');
-- 查询database名为cnoocdw存储大小
select 'cnoocdw' as database_name, pg_size_pretty(pg_database_size('cnoocdw')) as databasesize;
2.查询schema信息
2.1查看schema清单
-- 查看schema清单
-- 方式1
select schema_name from information_schema.schemata where schema_name!~'pg_*' order by schema_name;
-- 方式2
select nspname as schema_name from pg_namespace where nspname!~'pg_*' order by nspname;
2.2查看schema下面的表清单
--查看schema下的表清单,过滤掉分区表的子表
select t.tablename
from pg_tables t inner join pg_class c on c.relname = t.tablename
where
c.relnamespace = ( select pn.oid from pg_namespace pn where pn.nspname = t.schemaname)
and not exists ( select 1 from pg_inherits i where i.inhrelid = c.oid)
and t.schemaname = 'dw_ods'
order by t.tablename;
2.3查询schema下的视图清单
-- 查询dw_ods的视图清单
select viewname, cast(obj_description(c.oid,'pg_class') as varchar) as comment
from pg_views t inner join pg_class c on c.relname = t.viewname
where c.relnamespace = (select pn.oid from pg_namespace pn where pn.nspname = t.schemaname) and t.schemaname = 'dw_ods';
2.4查询schema下的Function信息
-- 查询dw_ods下Function信息
select pg_proc.proname as proname,pg_type.typname as typename, pg_proc.pronargs as argscount
from pg_proc join pg_type on pg_proc.prorettype = pg_type.oid
where pg_type.typname != 'void' and pg_proc.proname like 'sp_%'
and pronamespace = (select pg_namespace.oid from pg_namespace where nspname = 'dw_ods')
order by pg_proc.proname;
3.查询表信息
3.1查询schema的表清单
查询schema的表清单以及表的基本信息,owner、表内类型、是否分区表、创建时间、表结果最后更新时间、存储大小
-- 查询dw_ods对应schema下面的表清单
select t.schemaname as schema_name, t.tablename as tblname, t.tableowner, c.relkind as tblType,
cast(obj_description(c.oid,'pg_class') as varchar) as comment, o1.statime as created, o.statime as transientlastddltime, pg_total_relation_size(c.OID) as used_space,
case when exists (select 1 from pg_inherits i where i.inhparent = c.oid ) then 'true' else 'false' end as ispartitiontable
from pg_tables t left join pg_class c on c.relname = t.tablename
inner join pg_stat_last_operation o1 on o1.objid = c.oid and o1.staactionname = 'CREATE'
left join pg_stat_last_operation o on o.objid = c.oid and o.staactionname = 'ALTER'
where c.relnamespace = (select pn.oid from pg_namespace pn where pn.nspname = t.schemaname)
and t.schemaname = 'dw_ods'
order by t.tablename;
3.2 查看表的字段的信息
查询表的字段信息、以及各个字段名、类型、comments、是否分区字段、唯一约束、是否主键
-- 查询dw_ods.t1字段信息
select a.attname as columnname, format_type(a.atttypid, a.atttypmod) as data_type,
case when p.columnname is not null then 'true' else 'false' end as ispartition,
d.description as comments,
case when a.attnum in (select unnest(conkey) from pg_constraint where conrelid = a.attrelid and contype = 'p') then 'true' else 'false' end as isprimary,
case when a.attnum in (select unnest(conkey) from pg_constraint where conrelid = a.attrelid and contype = 'u') then 'true' else 'false' end as isunique,
case when a.attnotnull then 'true' else 'false' end as isnotnull
from pg_class c
inner join pg_tables ta on c.relname = ta.tablename
inner join pg_attribute a on a.attrelid = c.oid and a.attnum > 0
left join pg_description d on d.objoid = a.attrelid and d.objsubid = a.attnum
left join pg_partition_columns p on p.schemaname = ta.schemaname and p.tablename = ta.tablename and a.attname = p.columnname
where c.relnamespace = (select pn.oid from pg_namespace pn where pn.nspname = ta.schemaname)
and ta.schemaname = 'dw_ods' and c.relname = 't1'
and a.attisdropped = false order by a.attnum;
Greenplum7 没有pg_partition_columns视图
-- 查询dw_ods.t1字段信息
select a.attname as columnname, format_type(a.atttypid, a.atttypmod) as data_type,
case when pt.partattrs[0] is not null then 'true' else 'false' end as ispartition,
d.description as comments,
case when a.attnum in (select unnest(conkey) from pg_constraint where conrelid = a.attrelid and contype = 'p') then 'true' else 'false' end as isprimary,
case when a.attnum in (select unnest(conkey) from pg_constraint where conrelid = a.attrelid and contype = 'u') then 'true' else 'false' end as isunique,
case when a.attnotnull then 'true' else 'false' end as isnotnull
from pg_class c
inner join pg_tables ta on c.relname = ta.tablename
inner join pg_attribute a on a.attrelid = c.oid and a.attnum > 0
left join pg_description d on d.objoid = a.attrelid and d.objsubid = a.attnum
left join pg_partitioned_table pt on pt.partrelid = c.oid and a.attnum = pt.partattrs[0]
where c.relnamespace = (select pn.oid from pg_namespace pn where pn.nspname = ta.schemaname)
and ta.schemaname = 'dw_ods' and c.relname = 't1'
and a.attisdropped = false order by a.attnum;
3.3查询表的分区信息
-- 查询dw_ods.t1分区信息
select p.*,
o.statime, pg_total_relation_size(c.OID) as used_space
from pg_class c inner join pg_stat_last_operation o on o.objid = c.oid and o.staactionname = 'CREATE'
inner join pg_partitions p on p.tablename = c.relname
where c.relnamespace = (select pn.oid from pg_namespace pn where pn.nspname = p.schemaname)
and p.schemaname = 'dw_ods' and p.tablename = 't1';
Greenplum7 没有pg_partitions 视图
-- 查询dw_ods.t1分区信息
select
(select relnamespace::regnamespace from pg_class where oid = pg_partition_root(c.oid)) as schemaname,
n.nspname as partitionschemaname,
c.relname as partitiontablename,
split_part(substr(c.relname, position(pc.relname in c.relname) + length(pc.relname)), '_', 4) as partitionname,
pc.relname as tablename,
split_part(substr(pc.relname, position(ppc.relname in pc.relname) + length(ppc.relname)), '_', 4) as parentpartitionname,
case when pt.partstrat = 'r' then 'range' else 'list' end as partitiontype,
(select level from pg_partition_tree(pg_partition_root(c.oid)) where relid = c.oid) as partitionlevel,
(regexp_matches(pg_get_expr(c.relpartbound, c.oid), 'FOR VALUES IN \\((.*?)\\)'))[1] as partitionlistvalues,
(regexp_matches(pg_get_expr(c.relpartbound, c.oid), 'FOR VALUES FROM \\(([0-9]+)\\) TO \\(([0-9]+)\\)'))[1] as partitionrangestart,
(regexp_matches(pg_get_expr(c.relpartbound, c.oid), 'FOR VALUES FROM \\(([0-9]+)\\) TO \\(([0-9]+)\\)'))[2] as partitionrangeend,
case when pg_get_expr(c.relpartbound, c.oid) = 'DEFAULT' then 't' else 'f' end as partitionisdefault,
(regexp_matches(pg_get_expr(c.relpartbound, c.oid), '.+'))[1] as partitionboundary,
case when pc.reltablespace = 0 then 'pg_default' else (select spcname from pg_tablespace where oid = pc.reltablespace) end as parenttablespace,
case when c.reltablespace = 0 then 'pg_default' else (select spcname from pg_tablespace where oid = c.reltablespace) end as partitiontablespace,
o.statime, pg_total_relation_size(c.oid) as used_space
from pg_class c inner join pg_stat_last_operation o on o.objid = c.oid and o.staactionname = 'CREATE'
left join pg_namespace n on n.oid = c.relnamespace left join pg_inherits inh on inh.inhrelid = c.oid
left join pg_partitioned_table pt on inh.inhparent = pt.partrelid left join pg_class pc on pc.oid = pt.partrelid
left join pg_inherits pinh on pinh.inhrelid = pc.oid left join pg_partitioned_table ppt on pinh.inhparent = ppt.partrelid
left join pg_class ppc on ppc.oid = ppt.partrelid
where c.relispartition = 't' and n.nspname = 'dw_ods' and pc.relname = 't1';
3.4查询表的倾斜率
--dw_ods.t1的倾斜率
select max(c) as maxsegrows, min(c) as minsegrows, substr((max(c)-min(c))*100.0/max(c)||'',0,8) as percentagedifferencebetween
from (select count(*) c, gp_segment_id from dw_ods.t1 group by 2) as a;