postgres源码解析40 表创建执行全流程梳理--4

本文讲解非系统表的创建逻辑([<fontcolor=0000dd>普通表和索引表]),其入口函数为heap_create,内部公共接口函数为RelationBuildLocalRelation和RelationCreateStorage相关知识回顾见:
postgres源码解析38 表创建执行全流程梳理–1
postgres源码解析38 表创建执行全流程梳理–2
postgres源码解析38 表创建执行全流程梳理–3

heap_create 执行逻辑

在这里插入图片描述
1)首先进行安全性检查,不允许在系统表中创建relations,判断是否需要创建持久化文件等;
2)根据表名、表空间、表对象标识符和文件节点relfilenode等信息调用 RelationBuildLocalRelation在内存中构建Relation,并插入全局relcache 哈希表中;
3)结合relation类型调用相应的接口函数进行relation的创建,[普通表/TOAST/物化视图: table_relation_set_new_filenode,索引/序列:RelationCreateStorage];
4) 对于无需创建持久化的relation且用户指定表空间,则需要在 pg_tablespace 中注册对应的信息。

RelationBuildLocalRelation

该函数目的是在内存中构建创建表的relcache Entry,并插入全局Relcache 哈希表中,用于加速后续对此表的访问。
1)如果不存在CacheMemoryContext,则创建此上下文,后续操作均在此上下文进行;
2)分配并初始化Relation结构体,结合入参的TueDesc填充Relation结构体中rd_att字段:字段属性的详细信息;
3)分配并根据入参填充Relation结构体中rd_att字段的Form_pg_class字段:表名、命名空间、字段属性/数目等;
4)调用 RelationInitLockInfo初始化relation描述符锁信息;
5)调用 RelationInitPhysicalAddr 初始化relation描述符对应的物理地址:spcNode/dbNode//RelNode [表空间/数据库/表]
6)将上述构建好的RelCache Entry插入全局ralcache 哈希表中,并增加该条目的引用计数
在这里插入图片描述

*
 *		RelationBuildLocalRelation
 *			Build a relcache entry for an about-to-be-created relation,
 *			and enter it into the relcache.
 */
Relation
RelationBuildLocalRelation(const char *relname,
						   Oid relnamespace,
						   TupleDesc tupDesc,
						   Oid relid,
						   Oid accessmtd,
						   Oid relfilenode,
						   Oid reltablespace,
						   bool shared_relation,
						   bool mapped_relation,
						   char relpersistence,
						   char relkind)
{
	Relation	rel;
	MemoryContext oldcxt;
	int			natts = tupDesc->natts;
	int			i;
	bool		has_not_null;
	bool		nailit;

	AssertArg(natts >= 0);

	/*
	 * check for creation of a rel that must be nailed in cache.
	 *
	 * XXX this list had better match the relations specially handled in
	 * RelationCacheInitializePhase2/3.
	 */
	switch (relid)
	{
		case DatabaseRelationId:
		case AuthIdRelationId:
		case AuthMemRelationId:
		case RelationRelationId:
		case AttributeRelationId:
		case ProcedureRelationId:
		case TypeRelationId:
			nailit = true;
			break;
		default:
			nailit = false;
			break;
	}

	/*
	 * check that hardwired list of shared rels matches what's in the
	 * bootstrap .bki file.  If you get a failure here during initdb, you
	 * probably need to fix IsSharedRelation() to match whatever you've done
	 * to the set of shared relations.
	 */
	if (shared_relation != IsSharedRelation(relid))
		elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
			 relname, relid);

	/* Shared relations had better be mapped, too */
	Assert(mapped_relation || !shared_relation);

	/*
	 * switch to the cache context to create the relcache entry.
	 */
	if (!CacheMemoryContext)
		CreateCacheMemoryContext();

	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);

	/*
	 * allocate a new relation descriptor and fill in basic state fields.
	 */
	rel = (Relation) palloc0(sizeof(RelationData));

	/* make sure relation is marked as having no open file yet */
	rel->rd_smgr = NULL;

	/* mark it nailed if appropriate */
	rel->rd_isnailed = nailit;

	rel->rd_refcnt = nailit ? 1 : 0;

	/* it's being created in this transaction */
	rel->rd_createSubid = GetCurrentSubTransactionId();
	rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
	rel->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
	rel->rd_droppedSubid = InvalidSubTransactionId;

	/*
	 * create a new tuple descriptor from the one passed in.  We do this
	 * partly to copy it into the cache context, and partly because the new
	 * relation can't have any defaults or constraints yet; they have to be
	 * added in later steps, because they require additions to multiple system
	 * catalogs.  We can copy attnotnull constraints here, however.
	 */
	rel->rd_att = CreateTupleDescCopy(tupDesc);
	rel->rd_att->tdrefcount = 1;	/* mark as refcounted */
	has_not_null = false;
	for (i = 0; i < natts; i++)
	{
		Form_pg_attribute satt = TupleDescAttr(tupDesc, i);
		Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i);

		datt->attidentity = satt->attidentity;
		datt->attgenerated = satt->attgenerated;
		datt->attnotnull = satt->attnotnull;
		has_not_null |= satt->attnotnull;
	}

	if (has_not_null)
	{
		TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));

		constr->has_not_null = true;
		rel->rd_att->constr = constr;
	}

	/*
	 * initialize relation tuple form (caller may add/override data later)
	 */
	rel->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);

	namestrcpy(&rel->rd_rel->relname, relname);
	rel->rd_rel->relnamespace = relnamespace;

	rel->rd_rel->relkind = relkind;
	rel->rd_rel->relnatts = natts;
	rel->rd_rel->reltype = InvalidOid;
	/* needed when bootstrapping: */
	rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;

	/* set up persistence and relcache fields dependent on it */
	rel->rd_rel->relpersistence = relpersistence;
	switch (relpersistence)
	{
		case RELPERSISTENCE_UNLOGGED:
		case RELPERSISTENCE_PERMANENT:
			rel->rd_backend = InvalidBackendId;
			rel->rd_islocaltemp = false;
			break;
		case RELPERSISTENCE_TEMP:
			Assert(isTempOrTempToastNamespace(relnamespace));
			rel->rd_backend = BackendIdForTempRelations();
			rel->rd_islocaltemp = true;
			break;
		default:
			elog(ERROR, "invalid relpersistence: %c", relpersistence);
			break;
	}

	/* if it's a materialized view, it's not populated initially */
	if (relkind == RELKIND_MATVIEW)
		rel->rd_rel->relispopulated = false;
	else
		rel->rd_rel->relispopulated = true;

	/* set replica identity -- system catalogs and non-tables don't have one */
	if (!IsCatalogNamespace(relnamespace) &&
		(relkind == RELKIND_RELATION ||
		 relkind == RELKIND_MATVIEW ||
		 relkind == RELKIND_PARTITIONED_TABLE))
		rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
	else
		rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;

	/*
	 * Insert relation physical and logical identifiers (OIDs) into the right
	 * places.  For a mapped relation, we set relfilenode to zero and rely on
	 * RelationInitPhysicalAddr to consult the map.
	 */
	rel->rd_rel->relisshared = shared_relation;

	RelationGetRelid(rel) = relid;

	for (i = 0; i < natts; i++)
		TupleDescAttr(rel->rd_att, i)->attrelid = relid;

	rel->rd_rel->reltablespace = reltablespace;

	if (mapped_relation)
	{
		rel->rd_rel->relfilenode = InvalidOid;
		/* Add it to the active mapping information */
		RelationMapUpdateMap(relid, relfilenode, shared_relation, true);
	}
	else
		rel->rd_rel->relfilenode = relfilenode;

	RelationInitLockInfo(rel);	/* see lmgr.c */

	RelationInitPhysicalAddr(rel);

	rel->rd_rel->relam = accessmtd;

	/*
	 * RelationInitTableAccessMethod will do syscache lookups, so we mustn't
	 * run it in CacheMemoryContext.  Fortunately, the remaining steps don't
	 * require a long-lived current context.
	 */
	MemoryContextSwitchTo(oldcxt);

	if (relkind == RELKIND_RELATION ||
		relkind == RELKIND_SEQUENCE ||
		relkind == RELKIND_TOASTVALUE ||
		relkind == RELKIND_MATVIEW)
		RelationInitTableAccessMethod(rel);

	/*
	 * Okay to insert into the relcache hash table.
	 *
	 * Ordinarily, there should certainly not be an existing hash entry for
	 * the same OID; but during bootstrap, when we create a "real" relcache
	 * entry for one of the bootstrap relations, we'll be overwriting the
	 * phony one created with formrdesc.  So allow that to happen for nailed
	 * rels.
	 */
	RelationCacheInsert(rel, nailit);

	/*
	 * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
	 * can't do this before storing relid in it.
	 */
	EOXactListAdd(rel);

	/* It's fully valid */
	rel->rd_isvalid = true;

	/*
	 * Caller expects us to pin the returned entry.
	 */
	RelationIncrementReferenceCount(rel);

	return rel;
}

RelationCreateStorage
物理文件的创建由磁盘管理器负责,pg中所有文件系统均调用这统一接口,而RelationCreateStorage 函数的实现就是通过调用这些函数进一步封装而成,期执行流程如下:
1)对于持久化的relation,设置字段表示need_wal,表明需要写WAL日志,对于临时relation或者unlogged relation无需此操作;
2)根据输入的RelFileNode调用 smgropen返回 SMgrRelation对象,不存在会创建一个;
3)结合上述返回的 SMgrRelation和ForkNumber号调用 smgrcreate创建relation的物理文件;
4)如需写WAL日志,调用 log_smgrcreate函数记录下此relation的实际物理信息;
5)最后将其添加至PendingRelDelete链表尾,在事务真正提交的时候如需回滚则可通过此信息将创建的文件删除,并返回 SMgrRelation对象。

在这里插入图片描述

/*
 * RelationCreateStorage
 *		Create physical storage for a relation.
 *
 * Create the underlying disk file storage for the relation. This only
 * creates the main fork; additional forks are created lazily by the
 * modules that need them.
 *
 * This function is transactional. The creation is WAL-logged, and if the
 * transaction aborts later on, the storage will be destroyed.
 */
SMgrRelation
RelationCreateStorage(RelFileNode rnode, char relpersistence)
{
	PendingRelDelete *pending;
	SMgrRelation srel;
	BackendId	backend;
	bool		needs_wal;

	Assert(!IsInParallelMode());	/* couldn't update pendingSyncHash */

	switch (relpersistence)
	{
		case RELPERSISTENCE_TEMP:
			backend = BackendIdForTempRelations();
			needs_wal = false;
			break;
		case RELPERSISTENCE_UNLOGGED:
			backend = InvalidBackendId;
			needs_wal = false;
			break;
		case RELPERSISTENCE_PERMANENT:
			backend = InvalidBackendId;
			needs_wal = true;
			break;
		default:
			elog(ERROR, "invalid relpersistence: %c", relpersistence);
			return NULL;		/* placate compiler */
	}

	srel = smgropen(rnode, backend);
	smgrcreate(srel, MAIN_FORKNUM, false);

	if (needs_wal)
		log_smgrcreate(&srel->smgr_rnode.node, MAIN_FORKNUM);

	/* Add the relation to the list of stuff to delete at abort */
	pending = (PendingRelDelete *)
		MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
	pending->relnode = rnode;
	pending->backend = backend;
	pending->atCommit = false;	/* delete if abort */
	pending->nestLevel = GetCurrentTransactionNestLevel();
	pending->next = pendingDeletes;
	pendingDeletes = pending;

	if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded())
	{
		Assert(backend == InvalidBackendId);
		AddPendingSync(&rnode);
	}

	return srel;
}
  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值