1、数据库连接
conn = psycopg2.connect(database='test',user='test', password='123456', host='127.0.0.1', port='5432')
cur = conn.cursor()
pg数据库查询大批量数据时,可以使用数据库连接池
# 批量查询大小
batch_size = 10000
# 使用数据库连接池,使用普通的连接方法运行貌似也会内存飙升,因此改为了连接池
simple_conn_pool = psycopg2.pool.SimpleConnectionPool(minconn=1, maxconn=5, database="test", user="test", password="123456", host="127.0.0.1", port="5432")
# 从数据库连接池获取连接
conn = simple_conn_pool.getconn()
# 自动提交事务设为false
conn.autocommit = False
# 创建游标,这里传入name参数,会返回一个服务端游标否则返回的是客户端游标fenci(rows)
cursor = conn.cursor('cursorname')
# 首先查询全量数据
cursor.execute("SELECT * FROM test ")
count = 0
# 开始时间
start_time = datetime.now()
while True:
count = count + 1
# 每次获取时会从上次游标的位置开始移动size个位置,返回size条数据
data = cursor.fetchmany(batch_size)
# 数据为空的时候中断循环
if not data:
break
print('获取%s到%s数据成功' % ((count - 1) * batch_size, count * batch_size))
print('fetchmany获取全量数据所用时间:', (datetime.now() - start_time).seconds)
2、数据库数据预处理常用语句
查询、插入、更新、删除
SELECT DISTINCT 列名称 FROM 表名称;#返回唯一值
SELECT 列名称 FROM 表名称 ORDER BY 列名称 DESC/ASC; #降序/升序
INSERT INTO 表名称 VALUES (值1, 值2,....)
INSERT INTO table_name (列1, 列2,...) VALUES (值1, 值2,....)
UPDATE 表名称 SET 列名称 = 新值 WHERE 列名称 = 某值; #更新数据
UPDATE 表名称 SET 列名称 = REPLACE(列名称, ']', ''); #替换某字段中的某个字符
UPDATA 表名称 SET 列名称=regexp_replace(列名称,'((?<=的).*?(?=纠纷))',''); #正则替换某字段中的某个字符
UPDATA 表名称 SET 列名称=regexp_replace(列名称,'^[^\u4e00-\u9fa5a-zA-Z0-9]+','\1','g'); #去首字非中文,非数字,非字母
ALTER TABLE 表名称 RENAME 原列名称 to 新列名称; #重命名字段
ALTER TABLE 表名称 ADD 新列名称 字段类型; #增加字段
ALTER TABLE 表名称 DROP COLUMN 列名称; #删除某字段
DELETE FROM 表名称 WHERE 列名称 = 值; #删除某行
create table testa as select * from test; -此方式即有表结构又包含数据
create table testb (like test); --此方式仅创建表结构,没有数据
左连接
CREATE TABLE test AS select a.*,b.AH from test1 a LEFT JOIN test2 b ON a.TITLE =b.TITLE; 两表根据TITLE左连接
update test1 b set typeid = a.typeid from test a where a.id_= b.id_; 两表连接更新数据
去重复
DELETE from 表名称 WHERE (列名称) IN ( SELECT 列名称 FROM 表名称 GROUP BY 列名称 HAVING COUNT(列名称) > 1) AND ctid NOT IN (SELECT MIN(ctid) FROM 表名称 GROUP BY 列名称 HAVING COUNT(*) > 1);
3、数据库常用查询
--查看客户端版本
psql --version
--查看数据库
select * from pg_database;
--查看表空间
select * from pg_tablespace;
--查看语言
select * from pg_language;
--查看角色用户
select * from pg_user;
select * from pg_shadow;
select * from pg_roles;
--查看会话进程
select * from pg_stat_activity;
--查看表
SELECT * FROM pg_tables where schemaname = 'public';
--查看表字段
select * from information_schema.columns where table_schema = 'public' and table_name = 'pf_vip_org';
--查看视图
select * from pg_views where schemaname = 'public';
select * from information_schema.views where table_schema = 'public';
--查看触发器
select * from information_schema.triggers;
--删除触发器
drop trigger 触发器 on 表名称;
--查看序列
select * from information_schema.sequences where sequence_schema = 'public';
--查看约束
select * from pg_constraint where contype = 'p'
--u unique,p primary,f foreign,c check,t trigger,x exclusion
select a.relname as table_name,b.conname as constraint_name,b.contype as constraint_type from pg_class a,pg_constraint b where a.oid = b.conrelid and a.relname = 'cc';
--查看索引
select * from pg_indexes where tablename='表名称';
--删除索引
drop index 索引;
--查看表上存在哪些索引以及大小
select relname,n.amname as index_type from pg_class m,pg_am n where m.relam = n.oid and m.oid in (
select b.indexrelid from pg_class a,pg_index b where a.oid = b.indrelid and a.relname = 'cc');
SELECT c.relname,c2.relname, c2.relpages*8 as size_kb
FROM pg_class c, pg_class c2, pg_index i
WHERE c.relname = 'cc' AND
c.oid = i.indrelid AND
c2.oid = i.indexrelid
ORDER BY c2.relname;
--查看索引定义
select b.indexrelid from pg_class a,pg_index b where a.oid = b.indrelid and a.relname = 'cc';
select pg_get_indexdef(b.indexrelid);
--查看过程函数定义
select oid,* from pg_proc where proname = 'insert_platform_action_exist'; --oid = 24610
select * from pg_get_functiondef(24610);
--查看表大小(不含索引等信息)
select pg_relation_size('cc'); --368640 byte
select pg_size_pretty(pg_relation_size('cc')) --360 kB
--查看DB大小
select pg_size_pretty(pg_database_size('smiletao')); --12M
--查看服务器DB运行状态
[postgres@eyar ~]$ pg_ctl status -D $PGDATA
pg_ctl: server is running (PID: 2373)
/home/postgres/bin/postgres "-D" "/database/pgdata"
--查看每个DB的使用情况(读,写,缓存,更新,事务等)
select * from pg_stat_database
--查看索引的使用情况
select * from pg_stat_user_indexes;
--查看表所对应的数据文件路径与大小
SELECT pg_relation_filepath(oid), relpages FROM pg_class WHERE relname = 'empsalary';
--查看索引与相关字段及大小
SELECT n.nspname AS schema_name,
r.rolname as table_owner,
bc.relname AS table_name,
ic.relname AS index_name,
a.attname AS column_name,
bc.relpages*8 as index_size_kb
FROM pg_namespace n,
pg_class bc, -- base class
pg_class ic, -- index class
pg_index i,
pg_attribute a, -- att in base
pg_roles r
WHERE bc.relnamespace = n.oid
and i.indrelid = bc.oid
and i.indexrelid = ic.oid
and bc.relowner = r.oid
and i.indkey[0] = a.attnum
and i.indnatts = 1
and a.attrelid = bc.oid
and n.nspname = 'public'
and bc.relname = 'cc'
ORDER BY schema_name, table_name, index_name, attname;
--查看PG锁
select * from pg_locks;
备注:relpages*8 是实际所占磁盘大小
--查看表空间大小
select pg_tablespace_size('pg_default');
--查看序列与表的对应关系
WITH fq_objects AS (SELECT c.oid,c.relname AS fqname ,
c.relkind, c.relname AS relation
FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace ),
sequences AS (SELECT oid,fqname FROM fq_objects WHERE relkind = 'S'),
tables AS (SELECT oid, fqname FROM fq_objects WHERE relkind = 'r' )
SELECT
s.fqname AS sequence,
'->' as depends,
t.fqname AS table
FROM
pg_depend d JOIN sequences s ON s.oid = d.objid
JOIN tables t ON t.oid = d.refobjid
WHERE
d.deptype = 'a' and t.fqname = 'cc'