postgres的表和数据

最新推荐文章于 2024-08-21 15:26:42 发布

文档搬运工

最新推荐文章于 2024-08-21 15:26:42 发布

阅读量697

点赞数

分类专栏： Postgres SQL

本文链接：https://blog.csdn.net/xxzhaobb/article/details/112985489

版权

Postgres SQL 专栏收录该内容

39 篇文章 6 订阅

订阅专栏

本文档介绍了PostgreSQL数据库中如何重命名索引、处理带引号的对象名，以及大小写敏感性。此外，讲解了创建临时表、查询和比较表结构、删除重复数据、创建局部索引的方法，并提供了数据抽样、导入导出数据的示例。内容涵盖了SQL查询、索引管理、事务处理等多个方面。

摘要由CSDN通过智能技术生成

参考文档：

PostgreSQL 9 Administration Cookbook （第二版）中文版

-- 对索引进行重命名

ALTER INDEX badly_named_index RENAME TO tablename_status_idx;

-- 处理使用双引号括起来的对象名。在建表的时候，表名使用了双引号，则在查询的时候，也需要使用双引号。

CREATE TABLE "Myemp"  AS  SELECT * FROM emp;

mydb=# select count(*) from "Myemp";
 count 
-------
     1
(1 row)

mydb=# \ds Myemp
Did not find any relation named "Myemp".
mydb=# \ds "Myemp";
Did not find any relation named ""Myemp"".
mydb=# \ds "Myemp"
Did not find any relation named ""Myemp"".
mydb=# select count(*) from Myemp;
ERROR:  relation "myemp" does not exist
LINE 1: select count(*) from Myemp;
                             ^
mydb=#

-- 好像还区分大小写（使用引号了区分，没有使用引号了，不区分，真奇怪）

mydb=# \d "myemp"
Did not find any relation named ""myemp"".
mydb=# \d "Myemp"
                Table "public.Myemp"
 Column  |  Type   | Collation | Nullable | Default 
---------+---------+-----------+----------+---------
 name    | text    |           |          | 
 salary  | numeric |           |          | 
 age     | integer |           |          | 
 cubicle | point   |           |          | 

mydb=# 
mydb=# \d foo
                 Table "public.foo"
  Column  |  Type   | Collation | Nullable | Default 
----------+---------+-----------+----------+---------
 fooid    | integer |           |          | 
 foosubid | integer |           |          | 
 fooname  | text    |           |          | 

mydb=# \d FOO
                 Table "public.foo"
  Column  |  Type   | Collation | Nullable | Default 
----------+---------+-----------+----------+---------
 fooid    | integer |           |          | 
 foosubid | integer |           |          | 
 fooname  | text    |           |          | 

mydb=#

-- 引号里面的内容，是区分大小写的
将给定字符串返回成合适的引用形式，使它可以在一个SQL语句字符串中被用作一个标识符。
只有需要时才会加上引号（即，如果字符串包含非标识符字符或可能是大小写折叠的）。嵌入的引号会被正确地双写

mydb=# select quote_ident('Myemp');
 quote_ident 
-------------
 "Myemp"
(1 row)

mydb=# select quote_ident('myemp');
 quote_ident 
-------------
 myemp
(1 row)

mydb=#

-- quote_ident在使用Pl/pgSQL函数中使用变量建表时，很有用。例如
EXECUTE 'CREATE TEMP TABLE ' || quote_ident(tablename) ||'(col1 INTEGER);' -- 这个看的不是太明白

-- 确保相同的名称和相同的列定义

CREATE SCHEMA S1;
CREATE SCHEMA S2;
CREATE TABLE S1.X (col1 INTEGER,col2 TEXT);
CREATE TABLE S2.X(col1 INTEGER,col3 NUMERIC);

-- 通过查询系统编目找到在不同表中使用不同类型定义的某个列。使用一个针对信息模式information schema的查询（奇怪，没有找到X之类的表，难道是因为权限问题吗？）

SELECT
table_schema
,table_name
,column_name
,data_type
||coalesce(' ' || text(character_maximum_length), '')
||coalesce(' ' || text(numeric_precision), '')
||coalesce(',' || text(numeric_scale), '')
as data_type
FROM information_schema.columns
WHERE column_name IN
(SELECT
column_name
FROM
(SELECT
column_name
,data_type
,character_maximum_length
,numeric_precision
,numeric_scale
FROM information_schema.columns
WHERE table_schema = 'public'
GROUP BY
column_name
,data_type
,character_maximum_length
,numeric_precision
,numeric_scale
) derived
GROUP BY column_name
HAVING count(*) > 1
)
AND table_schema NOT IN ('information_schema', 'pg_catalog')
ORDER BY column_name
;

-- 查询所有名称相同，但定义不同的表（没有string_agg这个函数，版本问题吗？）

SELECT
table_schema
,table_name
,column_name
,data_type
FROM information_schema.columns
WHERE table_name IN
(SELECT
table_name
FROM
(SELECT
table_schema
,table_name
,string_agg(' '||column_name||' '||data_type)
FROM information_schema.columns
GROUP BY
table_schema
,table_name
) def
GROUP BY
table_name
HAVING
count(*) > 1
)
ORDER BY
table_name
,table_schema
,column_name;

-- 创建函数，来对比两张表的定义

CREATE OR REPLACE FUNCTION diff_table_definition
(t1_schemaname text
,t1_tablename text
,t2_schemaname text
,t2_tablename text)
RETURNS TABLE
(t1_column_name text
,t1_data_type text
,t2_column_name text
,t2_data_type text
)
LANGUAGE SQL
as
$$
SELECT
t1.column_name
,t1.data_type
,t2.column_name
,t2.data_type
FROM
(SELECT column_name, data_type
FROM information_schema.columns
WHERE table_schema = $1
AND table_name = $2
) t1
FULL OUTER JOIN
(SELECT column_name, data_type
FROM information_schema.columns
WHERE table_schema = $3
AND table_name = $4
) t2
ON (t1.column_name = t2.column_name)
WHERE t1.column_name IS NULL OR t2.column_name IS NULL
;
$$;

-- 标记和删除重复数据

-- 通过查询标记出重复的行，将重复的数据存放在另一个表中（数据量很大的时候，效率较差）

SELECT *
FROM cust
WHERE customerid IN
(SELECT customerid
FROM cust
GROUP BY customerid
HAVING count(*) > 1);

-- 通过ctid来删除重复的数据

begin； -- 开启一个事务 
lock table new_cust in share row exclusive mode ;  -- 锁表，避免dml操作 
create temporary table dups_cust as select customerid,min(ctid) as min_ctid from new_cust group by customerid having count(*) >1;  -- 定位重复记录，追踪ctid较小的值
delete from new_cust using dups_cust where new_cust.customerid = dups_cust.customerid and new_cust.ctid !=dups_cust.min_ctid ; -- 删除重复记录，保留ctid最小的值
commit ;  -- 提交事务
vacuum new_cust ;  -- 清理执行删除操作后的表 

/*
DELETE FROM new_cust
WHERE ctid NOT IN
(SELECT min(ctid)
FROM new_cust
WHERE customer_id IN (4) --specify exact duplicate ids
GROUP BY customerid);
*/
 
/*   -- 对于多个列存在重复的查询
SELECT *
FROM mytable
WHERE (col1, col2, … ,colN) IN
(SELECT col1, col2, … ,colN
FROM mytable
GROUP BY col1, col2, … ,colN
HAVING count(*) > 1);
*/

-- 创建一个局部索引

create unique index on partial_unique(customerid) where status = 'OPEN';   -- 创建局部索引，确保status=open的情况下customerid唯一 。

CREATE TABLE boxes (name text, position box);
INSERT INTO boxes VALUES ('First', box '((0,0), (1,1))');
INSERT INTO boxes VALUES ('Second', box '((2,0), (2,1))');
SELECT * FROM boxes;

ALTER TABLE boxes ADD EXCLUDE USING gist (position WITH &&); -- 会建立一个索引 

mydb=# \d boxes
               Table "public.boxes"
  Column  | Type | Collation | Nullable | Default 
----------+------+-----------+----------+---------
 name     | text |           |          | 
 position | box  |           |          | 
Indexes:
    "boxes_position_excl" EXCLUDE USING gist ("position" WITH &&)

mydb=# 

postgres=# \c mydb
You are now connected to database "mydb" as user "postgres".
mydb=# select * from foo;
 fooid | foosubid | fooname 
-------+----------+---------
     1 |        1 | Joe
     1 |        2 | Ed
     2 |        1 | Mary
(3 rows)


mydb=# alter table foo add exclude(fooid with =);
ERROR:  could not create exclusion constraint "foo_fooid_excl"
DETAIL:  Key (fooid)=(1) conflicts with key (fooid)=(1).     -- 因为有重复的值，所以不能建立索引 
                               ^
mydb=# update foo set fooid=3 where fooname='Joe';
UPDATE 1
mydb=# 

mydb=# alter table foo add exclude(fooid with =);
ALTER TABLE
mydb=# \d foo
                 Table "public.foo"
  Column  |  Type   | Collation | Nullable | Default 
----------+---------+-----------+----------+---------
 fooid    | integer |           |          | 
 foosubid | integer |           |          | 
 fooname  | text    |           |          | 
Indexes:
    "foo_fooid_excl" EXCLUDE USING btree (fooid WITH =)

mydb=#

-- 为一组数据找到唯一主键
如果大于零，表示列中可区分值的估计个数。如果小于零，是可区分值个数除以行数的负值（当ANALYZE认为可区分值的数量会随着表增长而增加时采用负值的形式，
而如果认为列具有固定数量的可选值时采用正值的形式）。例如，-1表示一个唯一列，即其中可区分值的个数等于行数。

mydb=# analyze foo;
ANALYZE
mydb=# select attname,n_distinct from pg_stats where  tablename='foo';   -- n_distinct为-1，表示该列是唯一的 。
 attname  | n_distinct 
----------+------------
 fooid    |         -1
 foosubid |  -0.666667
 fooname  |         -1
(3 rows)

mydb=# select * from foo;
 fooid | foosubid | fooname 
-------+----------+---------
     1 |        2 | Ed
     2 |        1 | Mary
     3 |        1 | Joe
(3 rows)

mydb=#

-- 生成测试数据

SELECT * FROM generate_series(1,5);
SELECT date(generate_series(now(), now() + '1 week', '1day'));
(random()*(2*10^9))::integer   -- 生成随机interger值，可以通过select来产生
(random()*(9*10^18))::bigint   -- 随机bigint 
(random()*100.)::numeric(4,2);  -- 随机numeric 
repeat('1',(random()*40)::integer)  -- 随机长度的字符串，有长度限制 
substr('abcdefghijklmnopqrstuvwxyz',1, (random()*26)::integer)   -- 随机长度的子串 
(ARRAY['one','two','three'])[1+random()*3]   -- 从字符串列表中随机选择字符串的函数 
SELECT generate_series(1,10) as key ,(random()*100.)::numeric(4,2) ,repeat('1',(random()*25)::integer);  -- 组合在一起，随机生成
SELECT generate_series(1,10) as key ,(random()*100.)::numeric(4,2) ,repeat('1',(random()*25)::integer) ORDER BY random();  -- 组合在一起，使用随机的顺序 （好像有问题）

-- 随机抽样数据

mydb=# select count(*) from t;
 count 
-------
 10000
(1 row)

mydb=# select count(*) from t where random()<0.01;
 count 
-------
   109
(1 row)

mydb=# select count(*) from t where random()<0.01;
 count 
-------
    83
(1 row)

mydb=# select count(*) from t where random()<0.01;
 count 
-------
    99
(1 row)

mydb=#

-- 导出数据

pg_dump –-exclude-table=MyBigTable > db.dmp     -- 导出数据，排除表
pg_dump –-table=MyBigTable –schema-only > mybigtable.schema   -- 只导出schema 
psql -c '\copy (SELECT * FROM MyBigTable WHERE random() < 0.01) to mybigtable.dat'  -- 随机导出采样数据

-- 导入数据

psql -f db.dmp   -- 导入数据 
psql -f mybigtable.schema   -- 导入schema 
psql -c '\copy mybigtable from mybigtable.dat'   -- 导入随机采样的数据

-- 从电子表格导入数据

mydb=# \copy abc from abc.csv CSV HEADER    -- 先要创建表，否则会报错，如果文件不在当前路径，则需要指定具体的文件路径
ERROR:  relation "abc" does not exist
mydb=# create table abc (key integer,value varchar(8));
CREATE TABLE
mydb=# \copy abc from abc.csv CSV HEADER  -- header命令，主要是为了忽略第一行 
COPY 5
mydb=# select * from abc;
 key | value 
-----+-------
   1 | a
   2 | b
   3 | c
   4 | d
   5 | e
(5 rows)

mydb=# 

psql -c '\copy abc from abc.csv CSV HEADER'

-- pgloader （未测试，有空了再补充）

END