本文简单介绍PG删除表的执行流程和实现原理,从源码角度学习。
1 关键数据结构
DropStmt结构体记录了待删除对象类型和处理方式等信息
typedef struct DropStmt
{
NodeTag type;
List *objects; /* list of names */ // 对象名,元素通过链表连接
ObjectType removeType; /* object type */ // drop类型
DropBehavior behavior; /* RESTRICT or CASCADE behavior */ //行为方式,严格/级联
bool missing_ok; /* skip error if object is missing? */ // 对象是否发生missing
bool concurrent; /* drop index concurrently? */ // 是否并行drop index
} DropStmt;
ObjectAddress结构体记录了任意数据库对象的信息,drop会更新系统表信息
/*
* An ObjectAddress represents a database object of any type.
*/
typedef struct ObjectAddress
{
Oid classId; /* Class Id from pg_class */
Oid objectId; /* OID of the object */
int32 objectSubId; /* Subitem within object (eg column), or 0 */
} ObjectAddress;
/*
* Deletion processing requires additional state for each ObjectAddress that
* it's planning to delete. For simplicity and code-sharing we make the
* ObjectAddresses code support arrays with or without this extra state.
*/
typedef struct
{
int flags; /* bitmask, see bit definitions below */
ObjectAddress dependee; /* object whose deletion forced this one */
} ObjectAddressExtra;
2 调试跟踪
postgres=# create table class (no int primary key, class_name text);
postgres=# drop table class ;
DEBUG: StartTransaction(1) name: unnamed; blockState: DEFAULT; state: INPROGRESS, xid/subid/cid: 0/1/0
LOG: statement: drop table class ;
函数堆栈:
(gdb) bt
#0 RemoveRelations (drop=0x188bb38) at tablecmds.c:1375
#1 0x0000000000906127 in ExecDropStmt (stmt=0x188bb38, isTopLevel=true) at utility.c:1984
#2 0x0000000000905b47 in ProcessUtilitySlow (pstate=0x1999048, pstmt=0x188be58, queryString=0x188b058 "drop table class ;",
context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x188bf38, qc=0x7ffdf4801990) at utility.c:1750
#3 0x00000000009043c1 in standard_ProcessUtility (pstmt=0x188be58, queryString=0x188b058 "drop table class ;",
readOnlyTree=false, context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x188bf38, qc=0x7ffdf4801990)
at utility.c:978
#4 0x00000000009038e8 in ProcessUtility (pstmt=0x188be58, queryString=0x188b058 "drop table class ;", readOnlyTree=false,
context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x188bf38, qc=0x7ffdf4801990) at utility.c:527
#5 0x000000000090283e in PortalRunUtility (portal=0x1931e98, pstmt=0x188be58, isTopLevel=true, setHoldSnapshot=false,
dest=0x188bf38, qc=0x7ffdf4801990) at pquery.c:1147
#6 0x0000000000902a39 in PortalRunMulti (portal=0x1931e98, isTopLevel=true, setHoldSnapshot=false, dest=0x188bf38,
altdest=0x188bf38, qc=0x7ffdf4801990) at pquery.c:1304
#7 0x0000000000902027 in PortalRun (portal=0x1931e98, count=9223372036854775807, isTopLevel=true, run_once=true,
dest=0x188bf38, altdest=0x188bf38, qc=0x7ffdf4801990) at pquery.c:786
#8 0x00000000008fc14f in exec_simple_query (query_string=0x188b058 "drop table class ;") at postgres.c:1214
#9 0x00000000009003f8 in PostgresMain (argc=1, argv=0x7ffdf4801c20, dbname=0x18b6c48 "postgres", username=0x18b6c28 "postgres")
at postgres.c:4486
#10 0x0000000000851ca3 in BackendRun (port=0x18ade30) at postmaster.c:4506
#11 0x0000000000851629 in BackendStartup (port=0x18ade30) at postmaster.c:4228
#12 0x000000000084dd48 in ServerLoop () at postmaster.c:1745
#13 0x000000000084d629 in PostmasterMain (argc=1, argv=0x1885ba0) at postmaster.c:1417
#14 0x000000000075ec02 in main (argc=1, argv=0x1885ba0) at main.c:209
3 源码分析
/*
* RemoveRelations
* Implements DROP TABLE, DROP INDEX, DROP SEQUENCE, DROP VIEW,
* DROP MATERIALIZED VIEW, DROP FOREIGN TABLE
*/
void
RemoveRelations(DropStmt *drop)
{
ObjectAddresses *objects;
char relkind;
ListCell *cell;
int flags = 0;
LOCKMODE lockmode = AccessExclusiveLock; // DDL 操作
/* DROP CONCURRENTLY uses a weaker lock, and has some restrictions */
// 如果采用并行drop操作,则需要持有一把弱锁,还有一些其他限制
if (drop->concurrent)
{
/*
* Note that for temporary relations this lock may get upgraded later
* on, but as no other session can access a temporary relation, this
* is actually fine.
*/
lockmode = ShareUpdateExclusiveLock; // 共享更新排他锁 4号事务锁
Assert(drop->removeType == OBJECT_INDEX); // 要求drop类型为 index
if (list_length(drop->objects) != 1) // 不支持drop多对象
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("DROP INDEX CONCURRENTLY does not support dropping multiple objects")));
if (drop->behavior == DROP_CASCADE) // 不支持级联并行删除index
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("DROP INDEX CONCURRENTLY does not support CASCADE")));
}
/*
* First we identify all the relations, then we delete them in a single
* performMultipleDeletions() call. This is to avoid unwanted DROP
* RESTRICT errors if one of the relations depends on another.
*/
// 首先确定drop类型: 表, 索引, 序列, 视图,物化视图 ,外部表
/* Determine required relkind */
switch (drop->removeType)
{
case OBJECT_TABLE:
relkind = RELKIND_RELATION;
break;
case OBJECT_INDEX:
relkind = RELKIND_INDEX;
break;
case OBJECT_SEQUENCE:
relkind = RELKIND_SEQUENCE;
break;
case OBJECT_VIEW:
relkind = RELKIND_VIEW;
break;
case OBJECT_MATVIEW:
relkind = RELKIND_MATVIEW;
break;
case OBJECT_FOREIGN_TABLE:
relkind = RELKIND_FOREIGN_TABLE;
break;
default:
elog(ERROR, "unrecognized drop object type: %d",
(int) drop->removeType);
relkind = 0; /* keep compiler quiet */
break;
}
/* Lock and validate each relation; build a list of object addresses */
objects = new_object_addresses(); // 用来构建删除对象 addres
案例调试结果:
RemoveRelations (drop=0x188bb38) at tablecmds.c:1311
1311 int flags = 0;
(gdb) n
1312 LOCKMODE lockmode = AccessExclusiveLock;
(gdb) n
1315 if (drop->concurrent)
(gdb) p *drop
$103 = {type = T_DropStmt, objects = 0x188bae8, removeType = OBJECT_TABLE, behavior = DROP_RESTRICT, missing_ok = false,
concurrent = false}
(gdb) n
1341 switch (drop->removeType)
(gdb) n
1344 relkind = RELKIND_RELATION;
(gdb) n
1345 break;
根据dropl类型进一步确定删除的对象。直接遍历 DropStmt结构体中所拥有的 objects,代码流程如下:
foreach(cell, drop->objects)
{
RangeVar *rel = makeRangeVarFromNameList((List *) lfirst(cell));
Oid relOid;
ObjectAddress obj;
struct DropRelationCallbackState state;
/*
* These next few steps are a great deal like relation_openrv, but we
* don't bother building a relcache entry since we don't need it.
*
* Check for shared-cache-inval messages before trying to access the
* relation. This is needed to cover the case where the name
* identifies a rel that has been dropped and recreated since the
* start of our transaction: if we don't flush the old syscache entry,
* then we'll latch onto that entry and suffer an error later.
*/
// 由于该表确定要删除,因此无需构建该relation的 relcache
// 同时在访问该relation前检查共享缓存是否有无效信息,已进行后续操作,可能在事务进行操作
// 前,该relation已经被删除且重建
AcceptInvalidationMessages();
/* Look up the appropriate relation using namespace search. */
state.relkind = relkind;
state.heapOid = InvalidOid;
state.partParentOid = InvalidOid;
state.concurrent = drop->concurrent;
// 根据rel 信息扫描命名空间确定该relation的oid信息
relOid = RangeVarGetRelidExtended(rel, lockmode, RVR_MISSING_OK,
RangeVarCallbackForDropRelation,
(void *) &state);
/* Not there? */
if (!OidIsValid(relOid)) // 未找到,下一个
{
DropErrorMsgNonExistent(rel, relkind, drop->missing_ok); // 发送NonExistent消息
continue;
}
/*
* Decide if concurrent mode needs to be used here or not. The
* relation persistence cannot be known without its OID.
*/
if (drop->concurrent &&
get_rel_persistence(relOid) != RELPERSISTENCE_TEMP)
{
Assert(list_length(drop->objects) == 1 &&
drop->removeType == OBJECT_INDEX);
flags |= PERFORM_DELETION_CONCURRENTLY;
}
/*
* Concurrent index drop cannot be used with partitioned indexes,
* either.
*/
// 不支持并发删除 分区索引
if ((flags & PERFORM_DELETION_CONCURRENTLY) != 0 &&
get_rel_relkind(relOid) == RELKIND_PARTITIONED_INDEX)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot drop partitioned index \"%s\" concurrently",
rel->relname)));
/* OK, we're ready to delete this one */
// 填充待删除对象ObjectAddress信息
obj.classId = RelationRelationId;
obj.objectId = relOid;
obj.objectSubId = 0;
add_exact_object_address(&obj, objects);
}
案例调试结果:
在命名空间OID= 2200 [public ]找到表明为 “class”在系统表中对应的reloid,class表对应的oid为 131114,并获取AccessExclusiveLock锁住该表,最后将其添加至objets链表中。
RelnameGetRelid (relname=0x188ba60 "class") at namespace.c:693
693 if (OidIsValid(relid))
(gdb) n
688 foreach(l, activeSearchPath) // 根据表明在 namespace 命名空间查找
(gdb) n
690 Oid namespaceId = lfirst_oid(l);
(gdb) n
692 relid = get_relname_relid(relname, namespaceId);
(gdb) s
get_relname_relid (relname=0x188ba60 "class", relnamespace=2200) at lsyscache.c:1858
1858 return GetSysCacheOid2(RELNAMENSP, Anum_pg_class_oid,
(gdb) s
GetSysCacheOid (cacheId=50, oidcol=1, key1=25737824, key2=2200, key3=0, key4=0) at syscache.c:1247
1247 tuple = SearchSysCache(cacheId, key1, key2, key3, key4);
...
1248 if (!HeapTupleIsValid(tuple))
(gdb) n
1250 result = heap_getattr(tuple, oidcol,
(gdb) p *tuple
$116 = {t_len = 172, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 109}, t_tableOid = 1259,
t_data = 0x7f8e7f81f3e0}
(gdb) p *tuple->t_data
$117 = {t_choice = {t_heap = {t_xmin = 964, t_xmax = 0, t_field3 = {t_cid = 4, t_xvac = 4}},
t_datum = {datum_len_ = 964, datum_typmod = 0, datum_typeid = 4}}, t_ctid = {ip_blkid =
{bi_hi = 0, bi_lo = 0}, ip_posid = 109}, t_infomask2 = 32801, t_infomask = 10497, t_hoff = 32 ' ', t_bits = 0x7f8e7f81f3f7 "\377\377\377?"}
通过上述历程确定删除的对象,进入真正的删除环节,该操作定义在函数 performMultipleDeletions,是drop核心代码,其执行流程如下:
1)首先以RowExclusiveLock模式打开pg_depend表,该表记录所有relation的依赖关系;
2)遍历objects数组,在目标对象上获取deletion lock;
a: 并发drop索引,即有 PERFORM_DELETION_CONCURRENTLY标识,获取 ShareUpdateExclusiveLock;
b: 无 PERFORM_DELETION_CONCURRENTLY标识,获取AccessExclusiveLock。
3)根据pg_depend系统表,获取目标表涉及的依赖关系,并加入删除目标链表targetObjects;进入2),直至遍历完objects数组元素。
4)调用deleteObjectInList函数进行relation的真正删除,
/*
* performMultipleDeletions: Similar to performDeletion, but act on multiple
* objects at once.
*
* The main difference from issuing multiple performDeletion calls is that the
* list of objects that would be implicitly dropped, for each object to be
* dropped, is the union of the implicit-object list for all objects. This
* makes each check be more relaxed.
*/
void
performMultipleDeletions(const ObjectAddresses *objects,
DropBehavior behavior, int flags)
{
Relation depRel;
ObjectAddresses *targetObjects;
int i;
/* No work if no objects... */
if (objects->numrefs <= 0)
return;
/*
* We save some cycles by opening pg_depend just once and passing the
* Relation pointer down to all the recursive deletion steps.
*/
depRel = table_open(DependRelationId, RowExclusiveLock);
/*
* Construct a list of objects to delete (ie, the given objects plus
* everything directly or indirectly dependent on them). Note that
* because we pass the whole objects list as pendingObjects context, we
* won't get a failure from trying to delete an object that is internally
* dependent on another one in the list; we'll just skip that object and
* delete it when we reach its owner.
*/
targetObjects = new_object_addresses();
for (i = 0; i < objects->numrefs; i++)
{
const ObjectAddress *thisobj = objects->refs + i;
/*
* Acquire deletion lock on each target object. (Ideally the caller
* has done this already, but many places are sloppy about it.)
*/
AcquireDeletionLock(thisobj, flags);
findDependentObjects(thisobj,
DEPFLAG_ORIGINAL,
flags,
NULL, /* empty stack */
targetObjects,
objects,
&depRel);
}
/*
* Check if deletion is allowed, and report about cascaded deletes.
*
* If there's exactly one object being deleted, report it the same way as
* in performDeletion(), else we have to be vaguer.
*/
reportDependentObjects(targetObjects,
behavior,
flags,
(objects->numrefs == 1 ? objects->refs : NULL));
/* do the deed */
deleteObjectsInList(targetObjects, &depRel, flags);
/* And clean up */
free_object_addresses(targetObjects);
table_close(depRel, RowExclusiveLock);
}
调试结果:
(gdb) p targetObjects->refs[0]
$153 = {classId = 1259, objectId = 131119, objectSubId = 0}
(gdb) p targetObjects->refs[1]
$154 = {classId = 2606, objectId = 131120, objectSubId = 0}
(gdb) p targetObjects->refs[2]
$155 = {classId = 1259, objectId = 131118, objectSubId = 0}
(gdb) p targetObjects->refs[3]
$156 = {classId = 1259, objectId = 131117, objectSubId = 0}
(gdb) p targetObjects->refs[4]
$157 = {classId = 1247, objectId = 131115, objectSubId = 0}
(gdb) p targetObjects->refs[5]
$158 = {classId = 1247, objectId = 131116, objectSubId = 0}
(gdb) p targetObjects->refs[6]
$159 = {classId = 1259, objectId = 131114, objectSubId = 0}
(gdb) p targetObjects->extras[0]
$164 = {flags = 8, dependee = {classId = 2606, objectId = 131120, objectSubId = 0}}
(gdb) p targetObjects->extras[1]
$165 = {flags = 4, dependee = {classId = 1259, objectId = 131114, objectSubId = 0}}
(gdb) p targetObjects->extras[2]
$166 = {flags = 4, dependee = {classId = 1259, objectId = 131117, objectSubId = 0}}
(gdb) p targetObjects->extras[3]
$167 = {flags = 8, dependee = {classId = 1259, objectId = 131114, objectSubId = 0}}
(gdb) p targetObjects->extras[4]
$168 = {flags = 8, dependee = {classId = 1247, objectId = 131116, objectSubId = 0}}
(gdb) p targetObjects->extras[5]
$169 = {flags = 8, dependee = {classId = 1259, objectId = 131114, objectSubId = 0}}
postgres=# select * from pg_depend where refobjid = 131114;
DEBUG: StartTransaction(1) name: unnamed; blockState: DEFAULT; state: INPROGRESS, xid/subid/cid: 0/1/0
LOG: statement: select * from pg_depend where refobjid = 131114;
DEBUG: CommitTransaction(1) name: unnamed; blockState: STARTED; state: INPROGRESS, xid/subid/cid: 0/1/0
classid | objid | objsubid | refclassid | refobjid | refobjsubid | deptype
---------+--------+----------+------------+----------+-------------+---------
1247 | 131116 | 0 | 1259 | 131114 | 0 | i
1259 | 131117 | 0 | 1259 | 131114 | 0 | i
2606 | 131120 | 0 | 1259 | 131114 | 1 | a
1247 ---> pg_type
1259 ---> pg_class
2606 ---> pg_constraint
131118 --> pg_toast_131114_index
131119 --> class_pkey
deleteObjectsInList函数负责将上述的对象relation物理删除以及相关系统表的信息,涉及数据库和操作系统底层知识,下一小节深入学习分析。