本文将从底层角度窥探Drop表的实现原理,上一小节的内容:根据删除表明确定其Reloid及其所有的依赖信息,如主键、约束等,可回顾 postgres Drop_table源码解析-1
1 源码分析
/*
* deleteOneObject: delete a single object for performDeletion.
*
* *depRel is the already-open pg_depend relation.
*/
static void
deleteOneObject(const ObjectAddress *object, Relation *depRel, int flags)
{
ScanKeyData key[3];
int nkeys;
SysScanDesc scan;
HeapTuple tup;
/* DROP hook of the objects being removed */
InvokeObjectDropHookArg(object->classId, object->objectId,
object->objectSubId, flags);
/*
* Close depRel if we are doing a drop concurrently. The object deletion
* subroutine will commit the current transaction, so we can't keep the
* relation open across doDeletion().
*/
if (flags & PERFORM_DELETION_CONCURRENTLY) //并行drop,需关闭depRel对象,根据上下文可知为 pg_depend系统表
table_close(*depRel, RowExclusiveLock);
/*
* Delete the object itself, in an object-type-dependent way.
*
* We used to do this after removing the outgoing dependency links, but it
* seems just as reasonable to do it beforehand. In the concurrent case
* we *must* do it in this order, because we can't make any transactional
* updates before calling doDeletion() --- they'd get committed right
* away, which is not cool if the deletion then fails.
*/
doDeletion(object, flags); // 物理删除操作
/*
* Reopen depRel if we closed it above
*/
if (flags & PERFORM_DELETION_CONCURRENTLY)
*depRel = table_open(DependRelationId, RowExclusiveLock);
/*
* Now remove any pg_depend records that link from this object to others.
* (Any records linking to this object should be gone already.)
*
* When dropping a whole object (subId = 0), remove all pg_depend records
* for its sub-objects too.
*/
ScanKeyInit(&key[0],
Anum_pg_depend_classid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(object->classId));
ScanKeyInit(&key[1],
Anum_pg_depend_objid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(object->objectId));
if (object->objectSubId != 0)
{
ScanKeyInit(&key[2],
Anum_pg_depend_objsubid,
BTEqualStrategyNumber, F_INT4EQ,
Int32GetDatum(object->objectSubId));
nkeys = 3;
}
else
nkeys = 2;
scan = systable_beginscan(*depRel, DependDependerIndexId, true,
NULL, nkeys, key);
// 更新系统表depend信息,删除目标表的依赖关系
while (HeapTupleIsValid(tup = systable_getnext(scan)))
{
CatalogTupleDelete(*depRel, &tup->t_self);
}
systable_endscan(scan);
/*
* Delete shared dependency references related to this object. Again, if
* subId = 0, remove records for sub-objects too.
*/
deleteSharedDependencyRecordsFor(object->classId, object->objectId,
object->objectSubId);
/*
* Delete any comments, security labels, or initial privileges associated
* with this object. (This is a convenient place to do these things,
* rather than having every object type know to do it.)
*/
DeleteComments(object->objectId, object->classId, object->objectSubId);
DeleteSecurityLabel(object);
DeleteInitPrivs(object);
/*
* CommandCounterIncrement here to ensure that preceding changes are all
* visible to the next deletion step.
*/
CommandCounterIncrement();
/*
* And we're done!
*/
}
2 执行流程梳理
堆栈
deleteOneObject
|-- doDeletion
|-- index_drop 【索引drop】
|-- RelationDropStorage [物理删除--调度]
|-- CatalogTupleDelete
|-- heap_delete [在pg_index系统表中标记删除,设置xmax并更新informask等标识信息]
|-- ReleaseSysCache [删除索引在内存中的RelCache]
|-- DeleteRelationTuple [将索引表所在pg_class中条目Entry删除]
|-- heap_drop_with_catalog 【普通表】
|-- RelationDropStorage [物理删除--调度]
|-- RelationForgetRelation
|-- DeleteRelationTuple [将改表所在pg_class中条目Entry删除]
|-- free_object_addresses [释放此过程中内存等资源]
|-- table_close [关闭对象]
3案例调试结果
索引表的删除操作与普通标的删除操作逻辑大致相同,调试以删除索引表 class_pkey为例:
(gdb) n
268 deleteOneObject(thisobj, depRel, flags);
(gdb) s
deleteOneObject (object=0x19d11f8, depRel=0x7ffdf4801190, flags=0) at dependency.c:1290
1290 InvokeObjectDropHookArg(object->classId, object->objectId,
(gdb) p *depRel
$171 = (Relation) 0x7f8e7f81f588
(gdb) p **depRel
$172 = {rd_node = {spcNode = 1663, dbNode = 13835, relNode = 2608}, rd_smgr = 0x198dce8, rd_refcnt = 1, rd_backend = -1,
rd_islocaltemp = false, rd_isnailed = false, rd_isvalid = true, rd_indexvalid = false, rd_statvalid = false,
rd_createSubid = 0, rd_newRelfilenodeSubid = 0, rd_firstRelfilenodeSubid = 0, rd_droppedSubid = 0, rd_rel = 0x7f8e7f81f798,
rd_att = 0x7f8e7f81f8a8, rd_id = 2608, rd_lockInfo = {lockRelId = {relId = 2608, dbId = 13835}}, rd_rules = 0x0,
rd_rulescxt = 0x0, trigdesc = 0x0, rd_rsdesc = 0x0, rd_fkeylist = 0x0, rd_fkeyvalid = false, rd_partkey = 0x0,
rd_partkeycxt = 0x0, rd_partdesc = 0x0, rd_pdcxt = 0x0, rd_partdesc_nodetached = 0x0, rd_pddcxt = 0x0,
rd_partdesc_nodetached_xmin = 0, rd_partcheck = 0x0, rd_partcheckvalid = false, rd_partcheckcxt = 0x0, rd_indexlist = 0x0,
rd_pkindex = 0, rd_replidindex = 0, rd_statlist = 0x0, rd_indexattr = 0x0, rd_keyattr = 0x0, rd_pkattr = 0x0,
rd_idattr = 0x0, rd_pubactions = 0x0, rd_options = 0x0, rd_amhandler = 3, rd_tableam = 0xaf8ae0 <heapam_methods>,
rd_index = 0x0, rd_indextuple = 0x0, rd_indexcxt = 0x0, rd_indam = 0x0, rd_opfamily = 0x0, rd_opcintype = 0x0,
rd_support = 0x0, rd_supportinfo = 0x0, rd_indoption = 0x0, rd_indexprs = 0x0, rd_indpred = 0x0, rd_exclops = 0x0,
rd_exclprocs = 0x0, rd_exclstrats = 0x0, rd_indcollation = 0x0, rd_opcoptions = 0x0, rd_amcache = 0x0, rd_fdwroutine = 0x0,
rd_toastoid = 0, pgstat_info = 0x1948660}
(gdb) p *object
$173 = {classId = 1259, objectId = 131119, objectSubId = 0} 131119为 class_key 主键索引表index_drop (indexId=131119, concurrent=false, concurrent_lock_mode=false) at index.c:2131
1401 index_drop(object->objectId, concurrent, concurrent_lock_mode);
(gdb) s
index_drop (indexId=131119, concurrent=false, concurrent_lock_mode=false) at index.c:2131
2131 heapId = IndexGetRelation(indexId, false);
(gdb) n
2132 lockmode = (concurrent || concurrent_lock_mode) ? ShareUpdateExclusiveLock : AccessExclusiveLock;
(gdb) s
RelationForgetRelation (rid=131119) at relcache.c:2757
2757 RelationIdCacheLookup(rid, relation); // 通过cache获取rid对应的RelationData信息
(gdb) n
2759 if (!PointerIsValid(relation))
(gdb) n
2762 if (!RelationHasReferenceCountZero(relation)) // 判断引用计数
(gdb) n
2766 if (relation->rd_createSubid != InvalidSubTransactionId ||
(gdb) n
2767 relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
(gdb) n
2766 if (relation->rd_createSubid != InvalidSubTransactionId ||
(gdb) n
2778 RelationClearRelation(relation, false);
RelationClearRelation (relation=0x7f8e7f8237c8, rebuild=false) at relcache.c:2450
2450 RelationCloseSmgr(relation); // 关闭磁盘管理器
(gdb) s
2453 if (relation->rd_amcache)
$191 = (void *) 0x0
(gdb) n
2455 relation->rd_amcache = NULL;
(gdb) n
2461 if (relation->rd_isnailed)
(gdb) n
2468 relation->rd_isvalid = false;
(gdb) n
2471 if (relation->rd_droppedSubid != InvalidSubTransactionId)
(gdb) n
2481 if ((relation->rd_rel->relkind == RELKIND_INDEX ||
(gdb)
2483 relation->rd_refcnt > 0 &&
(gdb)
2482 relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
(gdb)
2497 if (!rebuild)
(gdb) n
2500 RelationCacheDelete(relation);
RelationClearRelation (relation=0x7f8e7f8237c8, rebuild=false) at relcache.c:2503
2503 RelationDestroyRelation(relation, false);
(gdb) s
RelationDestroyRelation (relation=0x7f8e7f8237c8, remember_tupdesc=false) at relcache.c:2351
2351 RelationCloseSmgr(relation);
(gdb) n
2357 if (relation->rd_rel)
(gdb) n
2358 pfree(relation->rd_rel);
(gdb)
2361 if (--relation->rd_att->tdrefcount == 0)
(gdb)
2371 if (remember_tupdesc)
(gdb)
2374 FreeTupleDesc(relation->rd_att);
(gdb) n
2376 FreeTriggerDesc(relation->trigdesc);
(gdb) n
2377 list_free_deep(relation->rd_fkeylist);
(gdb) n
2378 list_free(relation->rd_indexlist);
(gdb) n
2379 list_free(relation->rd_statlist);
(gdb) n
2380 bms_free(relation->rd_indexattr);
(gdb) s
bms_free (a=0x0) at bitmapset.c:210
210 if (a)
(gdb) n
212 }
(gdb) n
RelationDestroyRelation (relation=0x7f8e7f8237c8, remember_tupdesc=false) at relcache.c:2381
2381 bms_free(relation->rd_keyattr);
(gdb) n
2382 bms_free(relation->rd_pkattr);
(gdb) n
2383 bms_free(relation->rd_idattr);
(gdb) n
2384 if (relation->rd_pubactions)
(gdb)
2386 if (relation->rd_options)
(gdb)
2388 if (relation->rd_indextuple)
(gdb)
2389 pfree(relation->rd_indextuple);
(gdb) p *relation->rd_indextuple
$192 = {t_len = 166, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 1}, ip_posid = 105}, t_tableOid = 2610, t_data = 0x7f8e7f823c40}
(gdb) p *relation->rd_indextuple->t_data
$193 = {t_choice = {t_heap = {t_xmin = 964, t_xmax = 0, t_field3 = {t_cid = 5, t_xvac = 5}}, t_datum = {datum_len_ = 964,
datum_typmod = 0, datum_typeid = 5}}, t_ctid = {ip_blkid = {bi_hi = 0, bi_lo = 1}, ip_posid = 105}, t_infomask2 = 20,
t_infomask = 2307, t_hoff = 32 ' ', t_bits = 0x7f8e7f823c57 "\377\377\003"}
(gdb) p relation->rd_indextuple[0]
$194 = {t_len = 166, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 1}, ip_posid = 105}, t_tableOid = 2610, t_data = 0x7f8e7f823c40}
(gdb) p relation->rd_indextuple[01
A syntax error in expression, near `'.
(gdb) p relation->rd_indextuple[1]
$195 = {t_len = 964, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 5}, t_tableOid = 65536,
t_data = 0xff20090300140069}
(gdb) p relation->rd_indextuple[2]
$196 = {t_len = 1023, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 47}, t_tableOid = 131114,
t_data = 0x100010100010001}
(gdb) p relation->rd_indextuple[3]
$197 = {t_len = 16777472, t_self = {ip_blkid = {bi_hi = 1, bi_lo = 0}, ip_posid = 104}, t_tableOid = 1, t_data = 0x1500000000}
(gdb) p relation->rd_indextuple[4]
$198 = {t_len = 1, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 1}, t_tableOid = 112, t_data = 0x1}
(gdb) n
2390 if (relation->rd_amcache)
(gdb) n
2392 if (relation->rd_fdwroutine)
(gdb) n
2394 if (relation->rd_indexcxt)
(gdb) n
2395 MemoryContextDelete(relation->rd_indexcxt);
(gdb) p *relation->rd_indexcxt
$199 = {type = T_AllocSetContext, isReset = false, allowInCritSection = false, mem_allocated = 1024,
methods = 0xc64620 <AllocSetMethods>, parent = 0x190fd20, firstchild = 0x0, prevchild = 0x0, nextchild = 0x19d25f0,
name = 0xc4fcfe "index info", ident = 0x1971838 "class_pkey", reset_cbs = 0x0}
(gdb) n
2396 if (relation->rd_rulescxt)
(gdb) n
2398 if (relation->rd_rsdesc)
(gdb) n
2400 if (relation->rd_partkeycxt)
(gdb) n
2402 if (relation->rd_pdcxt)
(gdb) n
2404 if (relation->rd_pddcxt)
(gdb) n
2406 if (relation->rd_partcheckcxt)
(gdb) n
2408 pfree(relation); // 释放relation对应的Relcache
1175 }
(gdb) n
RelationDestroyRelation (relation=0x7f8e7f8237c8, remember_tupdesc=false) at relcache.c:2409
2409 }
(gdb) n
RelationClearRelation (relation=0x7f8e7f8237c8, rebuild=false) at relcache.c:2711
2711 }
(gdb) n
RelationForgetRelation (rid=131119) at relcache.c:2779
2779 }
(gdb) n
index_drop (indexId=131119, concurrent=false, concurrent_lock_mode=false) at index.c:2301
2301 indexRelation = table_open(IndexRelationId, RowExclusiveLock);
(gdb) n
2303 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
(gdb) p *indexRelation
$206 = {rd_node = {spcNode = 1663, dbNode = 13835, relNode = 2610}, rd_smgr = 0x195c668, rd_refcnt = 1, rd_backend = -1,
rd_islocaltemp = false, rd_isnailed = false, rd_isvalid = true, rd_indexvalid = false, rd_statvalid = false,
rd_createSubid = 0, rd_newRelfilenodeSubid = 0, rd_firstRelfilenodeSubid = 0, rd_droppedSubid = 0, rd_rel = 0x7f8e7f9a4ba8,
rd_att = 0x7f8e7f9a4cb8, rd_id = 2610, rd_lockInfo = {lockRelId = {relId = 2610, dbId = 13835}}, rd_rules = 0x0,
rd_rulescxt = 0x0, trigdesc = 0x0, rd_rsdesc = 0x0, rd_fkeylist = 0x0, rd_fkeyvalid = false, rd_partkey = 0x0,
rd_partkeycxt = 0x0, rd_partdesc = 0x0, rd_pdcxt = 0x0, rd_partdesc_nodetached = 0x0, rd_pddcxt = 0x0,
rd_partdesc_nodetached_xmin = 0, rd_partcheck = 0x0, rd_partcheckvalid = false, rd_partcheckcxt = 0x0, rd_indexlist = 0x0,
rd_pkindex = 0, rd_replidindex = 0, rd_statlist = 0x0, rd_indexattr = 0x0, rd_keyattr = 0x0, rd_pkattr = 0x0,
rd_idattr = 0x0, rd_pubactions = 0x0, rd_options = 0x0, rd_amhandler = 3, rd_tableam = 0xaf8ae0 <heapam_methods>,
rd_index = 0x0, rd_indextuple = 0x0, rd_indexcxt = 0x0, rd_indam = 0x0, rd_opfamily = 0x0, rd_opcintype = 0x0,
rd_support = 0x0, rd_supportinfo = 0x0, rd_indoption = 0x0, rd_indexprs = 0x0, rd_indpred = 0x0, rd_exclops = 0x0,
rd_exclprocs = 0x0, rd_exclstrats = 0x0, rd_indcollation = 0x0, rd_opcoptions = 0x0, rd_amcache = 0x0, rd_fdwroutine = 0x0,
rd_toastoid = 0, pgstat_info = 0x19486d8}
(gdb) n
2304 if (!HeapTupleIsValid(tuple))
(gdb) n
2307 hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs,
...
(gdb) n
2310 CatalogTupleDelete(indexRelation, &tuple->t_self);
(gdb) s
CatalogTupleDelete (heapRel=0x7f8e7f9a4998, tid=0x7f8e7f8235fc) at indexing.c:352
352 simple_heap_delete(heapRel, tid);
(gdb) s
simple_heap_delete (relation=0x7f8e7f9a4998, tid=0x7f8e7f8235fc) at heapam.c:3112
3112 result = heap_delete(relation, tid,
(gdb) s
设置xmax和informas标识信息
...
(gdb) s
compute_new_xmax_infomask (xmax=0, old_infomask=2307, old_infomask2=20, add_to_xmax=969, mode=LockTupleExclusive,
is_update=true, result_xmax=0x7ffdf4800e48, result_infomask=0x7ffdf4800e46, result_infomask2=0x7ffdf4800e44)
at heapam.c:4998
4998 new_infomask = 0;
(gdb) n
4999 new_infomask2 = 0;
(gdb)
5000 if (old_infomask & HEAP_XMAX_INVALID)
(gdb)
5010 if (is_update)
(gdb) n
5012 new_xmax = add_to_xmax;
(gdb) n
5013 if (mode == LockTupleExclusive)
(gdb) n
5014 new_infomask2 |= HEAP_KEYS_UPDATED;
(gdb) n
5245 *result_infomask = new_infomask;
(gdb) n
5246 *result_infomask2 = new_infomask2;
(gdb) n
5247 *result_xmax = new_xmax; // 969
(gdb) n
5248 }
(gdb) n
heap_delete (relation=0x7f8e7f9a4998, tid=0x7f8e7f8235fc, cid=0, crosscheck=0x0, wait=true, tmfd=0x7ffdf4800ef0,
changingPart=false) at heapam.c:2950
2950 START_CRIT_SECTION();
(gdb) n
2959 PageSetPrunable(page, xid);
(gdb) n
2961 if (PageIsAllVisible(page))
(gdb) p *(PageHeader)page
$227 = {pd_lsn = {xlogid = 0, xrecoff = 940180616}, pd_checksum = 0, pd_flags = 0, pd_lower = 452, pd_upper = 3496,
pd_special = 16384, pd_pagesize_version = 16388, pd_prune_xid = 764, pd_linp = 0x7f8e6a8ef798}
(gdb) n
2970 tp.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
(gdb) n
2971 tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
(gdb) n
2972 tp.t_data->t_infomask |= new_infomask;
(gdb) n
2973 tp.t_data->t_infomask2 |= new_infomask2;
(gdb) n
2974 HeapTupleHeaderClearHotUpdated(tp.t_data);
(gdb) p tp
$228 = {t_len = 166, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 1}, ip_posid = 105}, t_tableOid = 2610, t_data = 0x7f8e6a8f0680}
(gdb) p *tp.t_data
$229 = {t_choice = {t_heap = {t_xmin = 964, t_xmax = 0, t_field3 = {t_cid = 5, t_xvac = 5}}, t_datum = {datum_len_ = 964,
datum_typmod = 0, datum_typeid = 5}}, t_ctid = {ip_blkid = {bi_hi = 0, bi_lo = 1}, ip_posid = 105}, t_infomask2 = 8212,
t_infomask = 259, t_hoff = 32 ' ', t_bits = 0x7f8e6a8f0697 "\377\377\003"}
(gdb) n
2975 HeapTupleHeaderSetXmax(tp.t_data, new_xmax); // 更新xamx 和 cid [命令ID]
(gdb) n
2976 HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
// 从pg_class 系统表中删除class_pkey条目
...
DeleteRelationTuple (relid=131119) at heap.c:1562
1562 pg_class_desc = table_open(RelationRelationId, RowExclusiveLock);
(gdb) n
1564 tup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
(gdb) n
1565 if (!HeapTupleIsValid(tup))
(gdb) p *pg_class_desc
$232 = {rd_node = {spcNode = 1663, dbNode = 13835, relNode = 1259}, rd_smgr = 0x195c868, rd_refcnt = 2, rd_backend = -1,
rd_islocaltemp = false, rd_isnailed = true, rd_isvalid = true, rd_indexvalid = true, rd_statvalid = true, rd_createSubid = 0,
rd_newRelfilenodeSubid = 0, rd_firstRelfilenodeSubid = 0, rd_droppedSubid = 0, rd_rel = 0x7f8e7f88d958,
rd_att = 0x7f8e7f88dc78, rd_id = 1259, rd_lockInfo = {lockRelId = {relId = 1259, dbId = 13835}}, rd_rules = 0x0,
rd_rulescxt = 0x0, trigdesc = 0x0, rd_rsdesc = 0x0, rd_fkeylist = 0x0, rd_fkeyvalid = false, rd_partkey = 0x0,
rd_partkeycxt = 0x0, rd_partdesc = 0x0, rd_pdcxt = 0x0, rd_partdesc_nodetached = 0x0, rd_pddcxt = 0x0,
rd_partdesc_nodetached_xmin = 0, rd_partcheck = 0x0, rd_partcheckvalid = false, rd_partcheckcxt = 0x0,
rd_indexlist = 0x7f8e7f8b4428, rd_pkindex = 2662, rd_replidindex = 0, rd_statlist = 0x0, rd_indexattr = 0x0,
rd_keyattr = 0x0, rd_pkattr = 0x0, rd_idattr = 0x0, rd_pubactions = 0x0, rd_options = 0x0, rd_amhandler = 3,
rd_tableam = 0xaf8ae0 <heapam_methods>, rd_index = 0x0, rd_indextuple = 0x0, rd_indexcxt = 0x0, rd_indam = 0x0,
rd_opfamily = 0x0, rd_opcintype = 0x0, rd_support = 0x0, rd_supportinfo = 0x0, rd_indoption = 0x0, rd_indexprs = 0x0,
rd_indpred = 0x0, rd_exclops = 0x0, rd_exclprocs = 0x0, rd_exclstrats = 0x0, rd_indcollation = 0x0, rd_opcoptions = 0x0,
rd_amcache = 0x0, rd_fdwroutine = 0x0, rd_toastoid = 0, pgstat_info = 0x1948408}
(gdb) n
1569 CatalogTupleDelete(pg_class_desc, &tup->t_self);
(gdb) n
1571 ReleaseSysCache(tup);
(gdb) n
1573 table_close(pg_class_desc, RowExclusiveLock);
(gdb) n
1574 }