参考文档:
PostgreSQL 9 Administration Cookbook (第二版)中文版
-- 对索引进行重命名
ALTER INDEX badly_named_index RENAME TO tablename_status_idx;
-- 处理使用双引号括起来的对象名 。在建表的时候,表名使用了双引号,则在查询的时候,也需要使用双引号。
CREATE TABLE "Myemp" AS SELECT * FROM emp;
mydb=# select count(*) from "Myemp";
count
-------
1
(1 row)
mydb=# \ds Myemp
Did not find any relation named "Myemp".
mydb=# \ds "Myemp";
Did not find any relation named ""Myemp"".
mydb=# \ds "Myemp"
Did not find any relation named ""Myemp"".
mydb=# select count(*) from Myemp;
ERROR: relation "myemp" does not exist
LINE 1: select count(*) from Myemp;
^
mydb=#
-- 好像还区分大小写 (使用引号了区分,没有使用引号了,不区分,真奇怪)
mydb=# \d "myemp"
Did not find any relation named ""myemp"".
mydb=# \d "Myemp"
Table "public.Myemp"
Column | Type | Collation | Nullable | Default
---------+---------+-----------+----------+---------
name | text | | |
salary | numeric | | |
age | integer | | |
cubicle | point | | |
mydb=#
mydb=# \d foo
Table "public.foo"
Column | Type | Collation | Nullable | Default
----------+---------+-----------+----------+---------
fooid | integer | | |
foosubid | integer | | |
fooname | text | | |
mydb=# \d FOO
Table "public.foo"
Column | Type | Collation | Nullable | Default
----------+---------+-----------+----------+---------
fooid | integer | | |
foosubid | integer | | |
fooname | text | | |
mydb=#
-- 引号里面的内容,是区分大小写的
将给定字符串返回成合适的引用形式,使它可以在一个SQL语句字符串中被用作一个标识符。
只有需要时才会加上引号(即,如果字符串包含非标识符字符或可能是大小写折叠的)。嵌入的引号会被正确地双写
mydb=# select quote_ident('Myemp');
quote_ident
-------------
"Myemp"
(1 row)
mydb=# select quote_ident('myemp');
quote_ident
-------------
myemp
(1 row)
mydb=#
-- quote_ident在使用Pl/pgSQL函数中使用变量建表时,很有用。例如
EXECUTE 'CREATE TEMP TABLE ' || quote_ident(tablename) ||'(col1 INTEGER);' -- 这个看的不是太明白
-- 确保相同的名称和相同的列定义
CREATE SCHEMA S1;
CREATE SCHEMA S2;
CREATE TABLE S1.X (col1 INTEGER,col2 TEXT);
CREATE TABLE S2.X(col1 INTEGER,col3 NUMERIC);
-- 通过查询系统编目找到在不同表中使用不同类型定义的某个列。使用一个针对信息模式information schema的查询 (奇怪,没有找到X之类的表,难道是因为权限问题吗?)
SELECT
table_schema
,table_name
,column_name
,data_type
||coalesce(' ' || text(character_maximum_length), '')
||coalesce(' ' || text(numeric_precision), '')
||coalesce(',' || text(numeric_scale), '')
as data_type
FROM information_schema.columns
WHERE column_name IN
(SELECT
column_name
FROM
(SELECT
column_name
,data_type
,character_maximum_length
,numeric_precision
,numeric_scale
FROM information_schema.columns
WHERE table_schema = 'public'
GROUP BY
column_name
,data_type
,character_maximum_length
,numeric_precision
,numeric_scale
) derived
GROUP BY column_name
HAVING count(*) > 1
)
AND table_schema NOT IN ('information_schema', 'pg_catalog')
ORDER BY column_name
;
-- 查询所有名称相同,但定义不同的表 (没有string_agg这个函数,版本问题吗?)
SELECT
table_schema
,table_name
,column_name
,data_type
FROM information_schema.columns
WHERE table_name IN
(SELECT
table_name
FROM
(SELECT
table_schema
,table_name
,string_agg(' '||column_name||' '||data_type)
FROM information_schema.columns
GROUP BY
table_schema
,table_name
) def
GROUP BY
table_name
HAVING
count(*) > 1
)
ORDER BY
table_name
,table_schema
,column_name;
-- 创建函数,来对比两张表的定义
CREATE OR REPLACE FUNCTION diff_table_definition
(t1_schemaname text
,t1_tablename text
,t2_schemaname text
,t2_tablename text)
RETURNS TABLE
(t1_column_name text
,t1_data_type text
,t2_column_name text
,t2_data_type text
)
LANGUAGE SQL
as
$$
SELECT
t1.column_name
,t1.data_type
,t2.column_name
,t2.data_type
FROM
(SELECT column_name, data_type
FROM information_schema.columns
WHERE table_schema = $1
AND table_name = $2
) t1
FULL OUTER JOIN
(SELECT column_name, data_type
FROM information_schema.columns
WHERE table_schema = $3
AND table_name = $4
) t2
ON (t1.column_name = t2.column_name)
WHERE t1.column_name IS NULL OR t2.column_name IS NULL
;
$$;
-- 标记和删除重复数据
-- 通过查询 标记出重复的行,将重复的数据存放在另一个表中(数据量很大的时候,效率较差)
SELECT *
FROM cust
WHERE customerid IN
(SELECT customerid
FROM cust
GROUP BY customerid
HAVING count(*) > 1);
-- 通过ctid来删除重复的数据
begin; -- 开启一个事务
lock table new_cust in share row exclusive mode ; -- 锁表,避免dml操作
create temporary table dups_cust as select customerid,min(ctid) as min_ctid from new_cust group by customerid having count(*) >1; -- 定位重复记录,追踪ctid较小的值
delete from new_cust using dups_cust where new_cust.customerid = dups_cust.customerid and new_cust.ctid !=dups_cust.min_ctid ; -- 删除重复记录,保留ctid最小的值
commit ; -- 提交事务
vacuum new_cust ; -- 清理执行删除操作后的表
/*
DELETE FROM new_cust
WHERE ctid NOT IN
(SELECT min(ctid)
FROM new_cust
WHERE customer_id IN (4) --specify exact duplicate ids
GROUP BY customerid);
*/
/* -- 对于多个列存在重复的查询
SELECT *
FROM mytable
WHERE (col1, col2, … ,colN) IN
(SELECT col1, col2, … ,colN
FROM mytable
GROUP BY col1, col2, … ,colN
HAVING count(*) > 1);
*/
-- 创建一个局部索引
create unique index on partial_unique(customerid) where status = 'OPEN'; -- 创建局部索引,确保status=open的情况下customerid唯一 。
CREATE TABLE boxes (name text, position box);
INSERT INTO boxes VALUES ('First', box '((0,0), (1,1))');
INSERT INTO boxes VALUES ('Second', box '((2,0), (2,1))');
SELECT * FROM boxes;
ALTER TABLE boxes ADD EXCLUDE USING gist (position WITH &&); -- 会建立一个索引
mydb=# \d boxes
Table "public.boxes"
Column | Type | Collation | Nullable | Default
----------+------+-----------+----------+---------
name | text | | |
position | box | | |
Indexes:
"boxes_position_excl" EXCLUDE USING gist ("position" WITH &&)
mydb=#
postgres=# \c mydb
You are now connected to database "mydb" as user "postgres".
mydb=# select * from foo;
fooid | foosubid | fooname
-------+----------+---------
1 | 1 | Joe
1 | 2 | Ed
2 | 1 | Mary
(3 rows)
mydb=# alter table foo add exclude(fooid with =);
ERROR: could not create exclusion constraint "foo_fooid_excl"
DETAIL: Key (fooid)=(1) conflicts with key (fooid)=(1). -- 因为有重复的值,所以不能建立索引
^
mydb=# update foo set fooid=3 where fooname='Joe';
UPDATE 1
mydb=#
mydb=# alter table foo add exclude(fooid with =);
ALTER TABLE
mydb=# \d foo
Table "public.foo"
Column | Type | Collation | Nullable | Default
----------+---------+-----------+----------+---------
fooid | integer | | |
foosubid | integer | | |
fooname | text | | |
Indexes:
"foo_fooid_excl" EXCLUDE USING btree (fooid WITH =)
mydb=#
-- 为一组数据找到唯一主键
如果大于零,表示列中可区分值的估计个数。如果小于零,是可区分值个数除以行数的负值(当ANALYZE认为可区分值的数量会随着表增长而增加时采用负值的形式,
而如果认为列具有固定数量的可选值时采用正值的形式)。例如,-1表示一个唯一列,即其中可区分值的个数等于行数。
mydb=# analyze foo;
ANALYZE
mydb=# select attname,n_distinct from pg_stats where tablename='foo'; -- n_distinct为-1,表示该列是唯一的 。
attname | n_distinct
----------+------------
fooid | -1
foosubid | -0.666667
fooname | -1
(3 rows)
mydb=# select * from foo;
fooid | foosubid | fooname
-------+----------+---------
1 | 2 | Ed
2 | 1 | Mary
3 | 1 | Joe
(3 rows)
mydb=#
-- 生成测试数据
SELECT * FROM generate_series(1,5);
SELECT date(generate_series(now(), now() + '1 week', '1day'));
(random()*(2*10^9))::integer -- 生成随机interger值,可以通过select来产生
(random()*(9*10^18))::bigint -- 随机bigint
(random()*100.)::numeric(4,2); -- 随机numeric
repeat('1',(random()*40)::integer) -- 随机长度的字符串,有长度限制
substr('abcdefghijklmnopqrstuvwxyz',1, (random()*26)::integer) -- 随机长度的子串
(ARRAY['one','two','three'])[1+random()*3] -- 从字符串列表中随机选择字符串的函数
SELECT generate_series(1,10) as key ,(random()*100.)::numeric(4,2) ,repeat('1',(random()*25)::integer); -- 组合在一起,随机生成
SELECT generate_series(1,10) as key ,(random()*100.)::numeric(4,2) ,repeat('1',(random()*25)::integer) ORDER BY random(); -- 组合在一起,使用随机的顺序 (好像有问题)
-- 随机抽样数据
mydb=# select count(*) from t;
count
-------
10000
(1 row)
mydb=# select count(*) from t where random()<0.01;
count
-------
109
(1 row)
mydb=# select count(*) from t where random()<0.01;
count
-------
83
(1 row)
mydb=# select count(*) from t where random()<0.01;
count
-------
99
(1 row)
mydb=#
-- 导出数据
pg_dump –-exclude-table=MyBigTable > db.dmp -- 导出数据,排除表
pg_dump –-table=MyBigTable –schema-only > mybigtable.schema -- 只导出schema
psql -c '\copy (SELECT * FROM MyBigTable WHERE random() < 0.01) to mybigtable.dat' -- 随机导出采样数据
-- 导入数据
psql -f db.dmp -- 导入数据
psql -f mybigtable.schema -- 导入schema
psql -c '\copy mybigtable from mybigtable.dat' -- 导入随机采样的数据
-- 从电子表格导入数据
mydb=# \copy abc from abc.csv CSV HEADER -- 先要创建表,否则会报错,如果文件不在当前路径,则需要指定具体的文件路径
ERROR: relation "abc" does not exist
mydb=# create table abc (key integer,value varchar(8));
CREATE TABLE
mydb=# \copy abc from abc.csv CSV HEADER -- header命令,主要是为了忽略第一行
COPY 5
mydb=# select * from abc;
key | value
-----+-------
1 | a
2 | b
3 | c
4 | d
5 | e
(5 rows)
mydb=#
psql -c '\copy abc from abc.csv CSV HEADER'
-- pgloader (未测试,有空了再补充 )
END