元数据
查看所有表基本信息
- 所有表信息,不包含分区表的子表
SELECT
psut.relid,
psut.relname,
psut.schemaname
FROM
pg_statio_user_tables psut
LEFT JOIN pg_inherits pi ON psut.relid = pi.inhrelid
WHERE
schemaname = 'public'
AND pi.inhparent IS NULL
ORDER BY 2
- 所有表信息,包含分区表的子表
SELECT
psut.relid,
psut.relname,
psut.schemaname
FROM
pg_statio_user_tables psut
LEFT JOIN pg_inherits pi ON psut.relid = pi.inhrelid
WHERE
schemaname = 'public'
-- AND pi.inhparent IS NULL
ORDER BY 2
查看表索引信息
包含索引的名称和索引的SQL定义语句
- 查看所有表的索引信息
SELECT
A.SCHEMANAME,
A.TABLENAME,
A.INDEXNAME,
A.TABLESPACE,
A.INDEXDEF,
B.AMNAME,
C.INDEXRELID,
C.INDNATTS,
C.INDISUNIQUE,
C.INDISPRIMARY,
C.INDISCLUSTERED,
D.DESCRIPTION
FROM
PG_AM B
LEFT JOIN PG_CLASS F ON B.OID = F.RELAM
LEFT JOIN PG_STAT_ALL_INDEXES E ON F.OID = E.INDEXRELID
LEFT JOIN PG_INDEX C ON E.INDEXRELID = C.INDEXRELID
LEFT OUTER JOIN PG_DESCRIPTION D ON C.INDEXRELID = D.OBJOID,
PG_INDEXES A
WHERE
A.SCHEMANAME = E.SCHEMANAME
AND A.TABLENAME = E.RELNAME
AND A.INDEXNAME = E.INDEXRELNAME
AND E.SCHEMANAME = 'public'
- 查看单表的索引信息
SELECT
A.SCHEMANAME,
A.TABLENAME,
A.INDEXNAME,
A.TABLESPACE,
A.INDEXDEF,
B.AMNAME,
C.INDEXRELID,
C.INDNATTS,
C.INDISUNIQUE,
C.INDISPRIMARY,
C.INDISCLUSTERED,
D.DESCRIPTION
FROM
PG_AM B
LEFT JOIN PG_CLASS F ON B.OID = F.RELAM
LEFT JOIN PG_STAT_ALL_INDEXES E ON F.OID = E.INDEXRELID
LEFT JOIN PG_INDEX C ON E.INDEXRELID = C.INDEXRELID
LEFT OUTER JOIN PG_DESCRIPTION D ON C.INDEXRELID = D.OBJOID,
PG_INDEXES A
WHERE
A.SCHEMANAME = E.SCHEMANAME
AND A.TABLENAME = E.RELNAME
AND A.INDEXNAME = E.INDEXRELNAME
AND E.SCHEMANAME = 'public'
AND E.RELNAME = 'table_name';
查询表的所有列信息
- 查看单表的列信息
SELECT * FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'table_name';
- 查看单表列名称,逗号分隔
SELECT string_agg(column_name, ',') FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'table_name';
- 查看所有表的列信息
SELECT * FROM information_schema.columns WHERE table_schema = 'public' ;
- 查看所有表的列名称,逗号分隔
SELECT string_agg(column_name, ','),table_name FROM information_schema.columns WHERE table_schema = 'public' GROUP BY 2;
查看哪些表是AO表哪些是HEAP表
查询某个表是否为AO表,查询出来有记录就为AO表,无记录就是HEAP表
SELECT relid::regclass table_name,compresslevel,compresstype,columnstore FROM pg_appendonly WHERE relid::regclass = 'bm_acct_loan'::regclass;
查询所有AO表信息
SELECT relid::regclass table_name,* FROM pg_appendonly;
查看AO表的分布键
- 查询所有表的分布键
SELECT
att.attname distributed_str,
gpdp.localoid::regclass table_name
FROM
gp_distribution_policy gpdp,
pg_attribute att,
pg_class pg,
pg_namespace pn
WHERE pg.oid = gpdp.localoid
AND pn.oid = pg.relnamespace
AND gpdp.localoid = att.attrelid
AND att.attnum = any(gpdp.distkey)
AND pn.nspname = 'public';
- 查询所有表的分布键,逗号分隔
SELECT
string_agg(att.attname,',') distributed_str,
gpdp.localoid::regclass table_name
FROM gp_distribution_policy gpdp,
pg_attribute att,
pg_class pg,
pg_namespace pn
WHERE pg.oid = gpdp.localoid
ANd pn.oid = pg.relnamespace
AND gpdp.localoid = att.attrelid
AND att.attnum = any(gpdp.distkey)
AND pn.nspname = 'public'
GROUP BY 2;
3.按照Schema查看表类型、压缩类型、压缩等级、分布键
select
t1.table_catalog
,t1.table_schema
,t1.table_name
,case when t2.relid is not null then 'AO' else 'Heap' end as table_type
,t2.compresstype
,t2.compresslevel
,t3.distributed_key
from information_schema."tables" t1
left join pg_appendonly t2
on t1.table_schema||'.'||t1.table_name = t2.relid::regclass::varchar
left join (
SELECT
string_agg(att.attname,',') distributed_key,
gpdp.localoid::regclass::varchar table_name
FROM gp_distribution_policy gpdp,
pg_attribute att,
pg_class pg,
pg_namespace pn
WHERE pg.oid = gpdp.localoid
ANd pn.oid = pg.relnamespace
AND gpdp.localoid = att.attrelid
AND att.attnum = any(gpdp.distkey)
group by gpdp.localoid::regclass::varchar
) t3
on t1.table_schema||'.'||t1.table_name = t3.table_name
where t1.table_schema = 'table_schema'
;
查看分区表信息
- 查看某个分区表的信息
SELECT
tablename,
partitiontablename,
partitiontype,
partitionboundary
FROM
pg_partitions
WHERE
tablename = 'table_name'
ORDER BY
partitionboundary DESC;
修改表字段类型
-- 修改类型为 numeric(10,2)
ALTER TABLE "public".table_name ALTER COLUMN column_name TYPE numeric(10,2) USING column_name::numeric(10,2);
-- 修改类型为 INT
alter table "member" alter COLUMN imgfileid type int using imgfileid::int ;
数据运维
启动和停止Greenplum数据库
- 启动数据库
gpstart
- 重启数据库
gpstop -r
- 仅重新加载配置文件,不中断服务
gpstop -u
- 在维护模式下启动,仅启动Master节点以执行维护或管理任务
gpstart -m
- 以快速模式停止集群,未完成的事务将回滚
gpstop -M fast
greenplum总体运行状态
gpstate
查看segment节点状态
select * from gp_segment_configuration;
查看segment节点故障等历史信息
select * from gp_configuration_history order by 1 desc ;
检查segment instance同步状态
该命令会输出各节点实例的同步状态,各节点的状态为“synchronizing”时,如果有数据,表示segment instance正在同步,隔几分钟再做一次,如果有实例长时间都不能同步完成,需要报给DBA做进一步监控。各节点状态为“Synchronized”时,表示主备上的数据都是最新的。
gpstate -m
查看segment节点磁盘空闲情况
SELECT * FROM gp_toolkit.gp_disk_free;
检查standby同步状态
该命令会输出Standby Master的同步状态,Standby Master状态为“synchronizing”时为不正常
gpstate -f
检查数据库日志
日志级别: PANIC > FATAL > ERROR
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "source /usr/local/greenplum-db/greenplum_path.sh && gplogfilter -b '2020-05-30 10:00:00' -e '2020-05-30 18:30:00' -f 'FATAL'" /data/primary*/gpseg*/pg_log/gpdb-2020-05-30_000000.csv
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "source /usr/local/greenplum-db/greenplum_path.sh && gplogfilter -b '2020-05-30 10:00:00' -e '2020-05-30 18:30:00' -f 'PANIC'" /data/primary*/gpseg*/pg_log/gpdb-2020-05-30_000000.csv
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "source /usr/local/greenplum-db/greenplum_path.sh && gplogfilter -b '2020-05-30 10:00:00' -e '2020-05-30 18:30:00' -f 'ERROR'" /data/primary*/gpseg*/pg_log/gpdb-2020-05-30_000000.csv
显示在系统表中被标记为掉线的Segment的信息
SELECT * from gp_toolkit.gp_pgdatabase_invalid;
系统情况
检查OS的日志中是否有出错告警
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "cat /var/log/messages"
检查/tmp目录空间使用率
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "df -h |grep /tmp"
检查GP数据目录空间使用率
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "df -h |grep /data"
检查Raid卡状态
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "omreport storage vdisk"
检查磁盘
是否有错误报出
1.gpssh -f /app/gpadmin/gpconfigs/seghosts -e "omreport storage pdisk controller=0 |grep -i fail/state "
2.gpssh -f /app/gpadmin/gpconfigs/seghosts -e "omreport storage pdisk controller=1 grep -i fail/state"
OS关键目录剩余空间监控
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "df -h |grep tmp"
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "df -h |grep sda1"
检查RAID卡电池状态
检查是否有充电中的节点,如果有,需要请系统管理员及时跟进充电状态,如果持续10小时未能完成充电,需要请硬件原厂服务判断是否需要更换RAID卡电池
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "omreport storage battery"
时钟同步情况
使集群内所有机器的时间一致,如果时间不一致,需要立即同步。
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "date"
Hosts文件检查
收集各台主机的/etc/hosts文件,检查各台主机hosts文件内容是否一致
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "cat /etc/hosts"
系统参数核查
系统参数核查是否正确
gpssh -f /app/gpadmin/gpconfigs/seghosts -e "more /etc/project"
数据库维护
查看正在执行的SQL进程
- 正在活动的进程
select * from pg_stat_activity where state ='active’;
- 空闲的进程
select * from pg_stat_activity where state =‘idle';
Kill正在执行的SQL进程
- 这个函数只能 kill、Select查询,而updae、delete DML不生效
select pg_cancel_backend(进程pid);
- 可以kill 各种DML(SELECT,UPDATE,DELETE,DROP)操作
select pg_terminate_backend(线程id);
资源队列检查
查看greenplum资源队列状态
SELECT * FROM gp_toolkit.gp_resqueue_status;
查看greemplum资源队列锁
SELECT * FROM gp_toolkit.gp_locks_on_resqueue WHERE lorwaiting='true';
查看greemplum资源队列优先级
select * from gp_toolkit.gp_resq_priority_statement;
表数据重新分布
不会改变分布键,会清理过期空间
ALTER TABLE table_name SET WITH (REORGANIZE=true);
指定新的分布键,会清理过期空间
ALTER TABLE table_name SET WITH (REORGANIZE=true) DISTRIBUTED BY (column_name,column_name);
修改表分布策略为随机分布,但是不移动数据
ALTER TABLE table_name SET WITH (REORGANIZE=false) DISTRIBUTED randomly;
查看数据库使用容量
greemplum所有数据库大小(占用空间)
select datname,pg_size_pretty(pg_database_size(datname)) from pg_database;
greemplum查看指定数据库大小(占用空间)
select pg_size_pretty(pg_database_size('postgres'));
查看表容量
- 查询分区表容量,优化容量大小为可读
SELECT pg_size_pretty((t1.raw_data_size)) FROM (
SELECT SUM(pg_relation_size(psut.relid)) as raw_data_size
FROM pg_statio_user_tables psut
INNER JOIN pg_inherits pi ON psut.relid = pi.inhrelid
WHERE pi.inhparent = ‘scheme.table_name'::regclass
) t1;
- 查询分区表容量
SELECT SUM(pg_relation_size(psut.relid)) as raw_data_size
FROM pg_statio_user_tables psut
INNER JOIN pg_inherits pi ON psut.relid = pi.inhrelid
WHERE pi.inhparent = ‘scheme.table_name'::regclass;
- 查询表容量,优化容量大小为可读
SELECT pg_size_pretty(pg_relation_size('scheme.table_name’));
- 查询表容量
select pg_size_pretty(pg_total_relation_size('scheme.table_name'));
- 记录某个时刻的表大小(快照)
CREATE TABLE all_tab
(
sotdschemaname character varying,
sotdtablename character varying,
pg_size_table numeric,
tab_size unknown
)
WITH (OIDS=FALSE, APPENDONLY=TRUE, COMPRESSLEVEL=5, COMPRESSTYPE=ZLIB)
DISTRIBUTED RANDOMLY;
CREATE TABLE pg_partition_tab
(
relname name,
relpartname name,
nspname name
)
WITH (OIDS=FALSE, APPENDONLY=TRUE, COMPRESSLEVEL=5, COMPRESSTYPE=ZLIB)
DISTRIBUTED RANDOMLY;
CREATE TABLE gp_size_of_table_disk
(
sotdoid_x integer,
sotdsize bigint,
sotdtoastsize bigint,
sotdadditionalsize bigint,
sotdschemaname character varying,
sotdtablename character varying,
snapshot_time character varying
)
WITH (OIDS=FALSE)
DISTRIBUTED RANDOMLY;
CREATE TABLE gp_table_size
(
sum_tab_tot_size numeric,
cnt_tab bigint,
sotdschemaname character varying,
sotdtablename name,
sum_pg_size_table numeric,
tab_size_unit unknown,
seq_case text
)
WITH (OIDS=FALSE, APPENDONLY=TRUE, COMPRESSLEVEL=5, COMPRESSTYPE=ZLIB)
DISTRIBUTED RANDOMLY;
CREATE OR REPLACE FUNCTION tools_get_tab_size() RETURNS integer AS
$BODY$
declare
--功能描述: 获取当前时刻数据库所有表的大小(GB)
--使用说明:
-- 1.执行函数:select tools_get_tab_size();
-- 2.获取数据库表占用空间快照:select * from gp_size_of_table_disk limit 10
-- 3.结果表:select * from gp_table_size order by 5 desc
v_sql_create_ao varchar(4000);
v_sql_create_insert_ao varchar(4000);
v_sql_drop_tab varchar(4000);
v_sql_rename_tab varchar(4000);
c_parti_tab record;
c_parti record;
v_cnt_part numeric;
begin
truncate table all_tab;
truncate table pg_partition_tab;
truncate table gp_size_of_table_disk;
truncate table gp_table_size;
insert into pg_partition_tab
(
relname
,relpartname
,nspname
)
select
t1.relname --分区表主表名
,aa1.relname relpartname --分区表子分区名
,dd.nspname
from pg_inherits t1
inner join pg_class t2 on t2.oid = t1.inhparent
inner join pg_class t3 on t3.oid = t1.inhrelid
left join pg_namespace dd on dd.oid = aa.relnamespace
;
--快照时间戳
insert into gp_size_of_table_disk
(sotdoid_x,sotdsize,sotdtoastsize,sotdadditionalsize,sotdschemaname,sotdtablename,snapshot_time)
select
sotdoid::int sotdoid_x,
sotdsize sotdsize_1,
sotdtoastsize sotdtoastsize_1,
sotdadditionalsize sotdadditionalsize_1,
sotdschemaname::VARCHAR sotdschemaname_1,
sotdtablename::VARCHAR sotdtablename_1,
to_char(now(),'YYYYMMDDhh24miss') snapshot_time
from gp_toolkit.gp_size_of_table_disk;
insert into all_tab(sotdschemaname,sotdtablename,pg_size_table,tab_size)
select t1.sotdschemaname
,sotdtablename
,case when split_part( pg_size_pretty(t1.total),' ',2) = 'MB' then split_part( pg_size_pretty(t1.total),' ',1)::numeric/1024
when split_part( pg_size_pretty(t1.total),' ',2) = 'kB' then split_part( pg_size_pretty(t1.total),' ',1)::numeric/1024/1024
when split_part( pg_size_pretty(t1.total),' ',2) = 'bytes' then split_part( pg_size_pretty(t1.total),' ',1)::numeric/1024/1024/1024
when split_part( pg_size_pretty(t1.total),' ',2) = 'GB' then split_part( pg_size_pretty(t1.total),' ',1)::numeric
end pg_size_table
,'GB' tab_size
from (
select
sotdschemaname
,sotdtablename
,sum(sotdsize) as total
from gp_size_of_table_disk
group by sotdschemaname,sotdtablename
)t1;
insert into gp_table_size
(
sum_tab_tot_size
,cnt_tab
,sotdschemaname
,sotdtablename
,sum_pg_size_table
,tab_size_unit
,seq_case
)
select
sum(sum_pg_size_table)over()/1024 as sum_tab_tot_size
,count(1)over() as cnt_tab
, *
from (
select
t.sotdschemaname
,coalesce(t1.relname,t.sotdtablename) as sotdtablename
,sum(pg_size_table::numeric)over(partition by t.sotdschemaname, coalesce(t1.relname,t.sotdtablename)) as sum_pg_size_table
,'GB' tab_size_unit
,case when coalesce(t1.relname,t.sotdtablename) = t.sotdtablename then '1'
when t.sotdtablename is null then '1'
else '2'
end seq_case
from all_tab t
left join pg_partition_tab t1
on t1.nspname = t.sotdschemaname
and t1.relpartname = t.sotdtablename
) tt
where seq_case = '1'
;
return 0;
exception when others then
return 1;
end;
$BODY$ LANGUAGE plpgsql VOLATILE EXECUTE ON ANY COST 100.0;
查看表是否需要ANALYZE
此视图显示没有统计信息的表,因此可能需要在表上运行分析。
SELECT * FROM gp_toolkit.gp_stats_missing;
SELECT * FROM gp_toolkit.gp_stats_missing where smisize = 'f’;
收集统计分析信息 ANALYZE TABLE_NAME
ANALYZE table_name;
查看表膨胀情况
这个视图显示了具有膨胀的常规堆存储表(给定表统计数据,磁盘上的实际页面数超过了预期的页面数)。膨胀的表需要一个VACUUM或一个VACUUM FULL,以回收被删除或废弃的行占用的磁盘空间。
VACUUM:逻辑回收空间,仅将删除行标记为可重新写入;VACUUM FULL:物理回收空间。
SELECT * FROM gp_toolkit.gp_bloat_diag;
查看数据分布和倾斜情况
select gp_segment_id,count(*) from table_name group by gp_segment_id order by 1;
SELECT
t1.gp_segment_id,
t1.count_tatol,
round( t1.count_tatol - ( AVG ( t1.count_tatol ) OVER ( ) ), 0 )
FROM
( SELECT gp_segment_id, COUNT ( * ) count_tatol FROM table_name
GROUP BY gp_segment_id ) t1
ORDER BY
3
SQL执行时间检查
该命令会输出当前正在执行的SQL信息,检查各语句的“query_start”字段中的时间是否在6个小时之前。
select
procpid
,usename
,replace(current_query,E'\x09','') as current_query
,waiting
,date_trunc('seconds', query_start)::timestamp as query_start,
to_char( (now() - query_start), 'HH24:MI:SS') as duration ,
client_addr
,client_port
,application_name
,xact_start
from pg_stat_activity;
Statistics 状态检查
select * from gp_toolkit.gp_stats_missing where smisize='f';
数据库参数核查
max_connections
max_prepared_transaction
gp_fault_action
gp_interconnect_setup_timeout
max_fsm_relations
max_fsm_pages
gp_vmem_protect_limit
work_mem
stats_queue_level
1. 查看参数
gpconfig -s param_name
2. 设置参数
gpconfig -c gp_enable_global_deadlock_detector -v on
重新建立数据库目录的索引
提高数据库目录性能
Reindex system <databasename>;
备份和导入导出
导出所有函数(存储过程)
1.导出对应schema下的所有对象
pg_dump -U gpadmin dev_ifrs17 -Fc -v -s -n tpd_dws -f func20220810dwd.txt
2.获得函数list
pg_restore -l func20220810dwd.txt | grep FUNCTION > funcion_list0810
3.导出所有函数
pg_restore -L funcion_list0810 func20220810dwd.txt > function_dwd_0810.sql
并发数据备份
gpbackup -dbname dw_szsbank_est_import --backup-dir /home/gpadmin/gpbackup/
并发数据恢复
gprestore --backup-dir /home/gpadmin/gpbackup --timestamp 20200603092641 --redirect-db dw_szsbank_est_import_restore --create-db
COPY方式导出导入数据
非超级用户只能运行这些类型COPY 命令:
- COPY FROM 命令,其中源为stdin
- COPY TO 命令,其中源为 stdout
导出数据带表头
COPY (SELECT * FROM table_name ) TO 'xxx.csv DELIMITER AS E'\001' HEADER CSV;
导出数据不带表头
COPY (SELECT * FROM table_name ) TO 'xxx.csv DELIMITER AS E'\001' CSV;
copy导入数据
COPY table_name FROM 'xxx.csv DELIMITER AS E'\001' HEADER NULL AS '' CSV;
COPY table_name FROM 'xxx.csv DELIMITER AS E'\001' NULL AS '' CSV;