postgres的表和数据

本文档介绍了PostgreSQL数据库中如何重命名索引、处理带引号的对象名,以及大小写敏感性。此外,讲解了创建临时表、查询和比较表结构、删除重复数据、创建局部索引的方法,并提供了数据抽样、导入导出数据的示例。内容涵盖了SQL查询、索引管理、事务处理等多个方面。
摘要由CSDN通过智能技术生成

参考文档:

PostgreSQL 9 Administration Cookbook (第二版)中文版

-- 对索引进行重命名

ALTER INDEX badly_named_index RENAME TO tablename_status_idx;

-- 处理使用双引号括起来的对象名 。在建表的时候,表名使用了双引号,则在查询的时候,也需要使用双引号。

CREATE TABLE "Myemp"  AS  SELECT * FROM emp;

mydb=# select count(*) from "Myemp";
 count 
-------
     1
(1 row)

mydb=# \ds Myemp
Did not find any relation named "Myemp".
mydb=# \ds "Myemp";
Did not find any relation named ""Myemp"".
mydb=# \ds "Myemp"
Did not find any relation named ""Myemp"".
mydb=# select count(*) from Myemp;
ERROR:  relation "myemp" does not exist
LINE 1: select count(*) from Myemp;
                             ^
mydb=# 

-- 好像还区分大小写 (使用引号了区分,没有使用引号了,不区分,真奇怪)

mydb=# \d "myemp"
Did not find any relation named ""myemp"".
mydb=# \d "Myemp"
                Table "public.Myemp"
 Column  |  Type   | Collation | Nullable | Default 
---------+---------+-----------+----------+---------
 name    | text    |           |          | 
 salary  | numeric |           |          | 
 age     | integer |           |          | 
 cubicle | point   |           |          | 

mydb=# 
mydb=# \d foo
                 Table "public.foo"
  Column  |  Type   | Collation | Nullable | Default 
----------+---------+-----------+----------+---------
 fooid    | integer |           |          | 
 foosubid | integer |           |          | 
 fooname  | text    |           |          | 

mydb=# \d FOO
                 Table "public.foo"
  Column  |  Type   | Collation | Nullable | Default 
----------+---------+-----------+----------+---------
 fooid    | integer |           |          | 
 foosubid | integer |           |          | 
 fooname  | text    |           |          | 

mydb=# 

-- 引号里面的内容,是区分大小写的
将给定字符串返回成合适的引用形式,使它可以在一个SQL语句字符串中被用作一个标识符。
只有需要时才会加上引号(即,如果字符串包含非标识符字符或可能是大小写折叠的)。嵌入的引号会被正确地双写

mydb=# select quote_ident('Myemp');
 quote_ident 
-------------
 "Myemp"
(1 row)

mydb=# select quote_ident('myemp');
 quote_ident 
-------------
 myemp
(1 row)

mydb=# 

-- quote_ident在使用Pl/pgSQL函数中使用变量建表时,很有用。例如
EXECUTE 'CREATE TEMP TABLE ' || quote_ident(tablename) ||'(col1 INTEGER);'    -- 这个看的不是太明白

-- 确保相同的名称和相同的列定义

CREATE SCHEMA S1;
CREATE SCHEMA S2;
CREATE TABLE S1.X (col1 INTEGER,col2 TEXT);
CREATE TABLE S2.X(col1 INTEGER,col3 NUMERIC);

-- 通过查询系统编目找到在不同表中使用不同类型定义的某个列。使用一个针对信息模式information schema的查询 (奇怪,没有找到X之类的表,难道是因为权限问题吗?)

SELECT
table_schema
,table_name
,column_name
,data_type
||coalesce(' ' || text(character_maximum_length), '')
||coalesce(' ' || text(numeric_precision), '')
||coalesce(',' || text(numeric_scale), '')
as data_type
FROM information_schema.columns
WHERE column_name IN
(SELECT
column_name
FROM
(SELECT
column_name
,data_type
,character_maximum_length
,numeric_precision
,numeric_scale
FROM information_schema.columns
WHERE table_schema = 'public'
GROUP BY
column_name
,data_type
,character_maximum_length
,numeric_precision
,numeric_scale
) derived
GROUP BY column_name
HAVING count(*) > 1
)
AND table_schema NOT IN ('information_schema', 'pg_catalog')
ORDER BY column_name
;

-- 查询所有名称相同,但定义不同的表 (没有string_agg这个函数,版本问题吗?)

SELECT
table_schema
,table_name
,column_name
,data_type
FROM information_schema.columns
WHERE table_name IN
(SELECT
table_name
FROM
(SELECT
table_schema
,table_name
,string_agg(' '||column_name||' '||data_type)
FROM information_schema.columns
GROUP BY
table_schema
,table_name
) def
GROUP BY
table_name
HAVING
count(*) > 1
)
ORDER BY
table_name
,table_schema
,column_name;

-- 创建函数,来对比两张表的定义

CREATE OR REPLACE FUNCTION diff_table_definition
(t1_schemaname text
,t1_tablename text
,t2_schemaname text
,t2_tablename text)
RETURNS TABLE
(t1_column_name text
,t1_data_type text
,t2_column_name text
,t2_data_type text
)
LANGUAGE SQL
as
$$
SELECT
t1.column_name
,t1.data_type
,t2.column_name
,t2.data_type
FROM
(SELECT column_name, data_type
FROM information_schema.columns
WHERE table_schema = $1
AND table_name = $2
) t1
FULL OUTER JOIN
(SELECT column_name, data_type
FROM information_schema.columns
WHERE table_schema = $3
AND table_name = $4
) t2
ON (t1.column_name = t2.column_name)
WHERE t1.column_name IS NULL OR t2.column_name IS NULL
;
$$;

-- 标记和删除重复数据

-- 通过查询 标记出重复的行,将重复的数据存放在另一个表中(数据量很大的时候,效率较差)

SELECT *
FROM cust
WHERE customerid IN
(SELECT customerid
FROM cust
GROUP BY customerid
HAVING count(*) > 1);

-- 通过ctid来删除重复的数据

begin; -- 开启一个事务 
lock table new_cust in share row exclusive mode ;  -- 锁表,避免dml操作 
create temporary table dups_cust as select customerid,min(ctid) as min_ctid from new_cust group by customerid having count(*) >1;  -- 定位重复记录,追踪ctid较小的值
delete from new_cust using dups_cust where new_cust.customerid = dups_cust.customerid and new_cust.ctid !=dups_cust.min_ctid ; -- 删除重复记录,保留ctid最小的值
commit ;  -- 提交事务
vacuum new_cust ;  -- 清理执行删除操作后的表 

/*
DELETE FROM new_cust
WHERE ctid NOT IN
(SELECT min(ctid)
FROM new_cust
WHERE customer_id IN (4) --specify exact duplicate ids
GROUP BY customerid);
*/
 
/*   -- 对于多个列存在重复的查询
SELECT *
FROM mytable
WHERE (col1, col2, … ,colN) IN
(SELECT col1, col2, … ,colN
FROM mytable
GROUP BY col1, col2, … ,colN
HAVING count(*) > 1);
*/

-- 创建一个局部索引

create unique index on partial_unique(customerid) where status = 'OPEN';   -- 创建局部索引,确保status=open的情况下customerid唯一 。

CREATE TABLE boxes (name text, position box);
INSERT INTO boxes VALUES ('First', box '((0,0), (1,1))');
INSERT INTO boxes VALUES ('Second', box '((2,0), (2,1))');
SELECT * FROM boxes;

ALTER TABLE boxes ADD EXCLUDE USING gist (position WITH &&); -- 会建立一个索引 

mydb=# \d boxes
               Table "public.boxes"
  Column  | Type | Collation | Nullable | Default 
----------+------+-----------+----------+---------
 name     | text |           |          | 
 position | box  |           |          | 
Indexes:
    "boxes_position_excl" EXCLUDE USING gist ("position" WITH &&)

mydb=# 

postgres=# \c mydb
You are now connected to database "mydb" as user "postgres".
mydb=# select * from foo;
 fooid | foosubid | fooname 
-------+----------+---------
     1 |        1 | Joe
     1 |        2 | Ed
     2 |        1 | Mary
(3 rows)


mydb=# alter table foo add exclude(fooid with =);
ERROR:  could not create exclusion constraint "foo_fooid_excl"
DETAIL:  Key (fooid)=(1) conflicts with key (fooid)=(1).     -- 因为有重复的值,所以不能建立索引 
                               ^
mydb=# update foo set fooid=3 where fooname='Joe';
UPDATE 1
mydb=# 

mydb=# alter table foo add exclude(fooid with =);
ALTER TABLE
mydb=# \d foo
                 Table "public.foo"
  Column  |  Type   | Collation | Nullable | Default 
----------+---------+-----------+----------+---------
 fooid    | integer |           |          | 
 foosubid | integer |           |          | 
 fooname  | text    |           |          | 
Indexes:
    "foo_fooid_excl" EXCLUDE USING btree (fooid WITH =)

mydb=# 

-- 为一组数据找到唯一主键
如果大于零,表示列中可区分值的估计个数。如果小于零,是可区分值个数除以行数的负值(当ANALYZE认为可区分值的数量会随着表增长而增加时采用负值的形式,
而如果认为列具有固定数量的可选值时采用正值的形式)。例如,-1表示一个唯一列,即其中可区分值的个数等于行数。

mydb=# analyze foo;
ANALYZE
mydb=# select attname,n_distinct from pg_stats where  tablename='foo';   -- n_distinct为-1,表示该列是唯一的 。
 attname  | n_distinct 
----------+------------
 fooid    |         -1
 foosubid |  -0.666667
 fooname  |         -1
(3 rows)

mydb=# select * from foo;
 fooid | foosubid | fooname 
-------+----------+---------
     1 |        2 | Ed
     2 |        1 | Mary
     3 |        1 | Joe
(3 rows)

mydb=# 

-- 生成测试数据

SELECT * FROM generate_series(1,5);
SELECT date(generate_series(now(), now() + '1 week', '1day'));
(random()*(2*10^9))::integer   -- 生成随机interger值,可以通过select来产生
(random()*(9*10^18))::bigint   -- 随机bigint 
(random()*100.)::numeric(4,2);  -- 随机numeric 
repeat('1',(random()*40)::integer)  -- 随机长度的字符串,有长度限制 
substr('abcdefghijklmnopqrstuvwxyz',1, (random()*26)::integer)   -- 随机长度的子串 
(ARRAY['one','two','three'])[1+random()*3]   -- 从字符串列表中随机选择字符串的函数 
SELECT generate_series(1,10) as key ,(random()*100.)::numeric(4,2) ,repeat('1',(random()*25)::integer);  -- 组合在一起,随机生成
SELECT generate_series(1,10) as key ,(random()*100.)::numeric(4,2) ,repeat('1',(random()*25)::integer) ORDER BY random();  -- 组合在一起,使用随机的顺序 (好像有问题)

-- 随机抽样数据

mydb=# select count(*) from t;
 count 
-------
 10000
(1 row)

mydb=# select count(*) from t where random()<0.01;
 count 
-------
   109
(1 row)

mydb=# select count(*) from t where random()<0.01;
 count 
-------
    83
(1 row)

mydb=# select count(*) from t where random()<0.01;
 count 
-------
    99
(1 row)

mydb=# 

-- 导出数据

pg_dump –-exclude-table=MyBigTable > db.dmp     -- 导出数据,排除表
pg_dump –-table=MyBigTable –schema-only > mybigtable.schema   -- 只导出schema 
psql -c '\copy (SELECT * FROM MyBigTable WHERE random() < 0.01) to mybigtable.dat'  -- 随机导出采样数据 

-- 导入数据

psql -f db.dmp   -- 导入数据 
psql -f mybigtable.schema   -- 导入schema 
psql -c '\copy mybigtable from mybigtable.dat'   -- 导入随机采样的数据 

-- 从电子表格导入数据

mydb=# \copy abc from abc.csv CSV HEADER    -- 先要创建表,否则会报错,如果文件不在当前路径,则需要指定具体的文件路径
ERROR:  relation "abc" does not exist
mydb=# create table abc (key integer,value varchar(8));
CREATE TABLE
mydb=# \copy abc from abc.csv CSV HEADER  -- header命令,主要是为了忽略第一行 
COPY 5
mydb=# select * from abc;
 key | value 
-----+-------
   1 | a
   2 | b
   3 | c
   4 | d
   5 | e
(5 rows)

mydb=# 

psql -c '\copy abc from abc.csv CSV HEADER'

-- pgloader (未测试,有空了再补充 )

END

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值