Postgresql源码（45）SysCache内存结构与搜索流程分析

高铭杰

已于 2022-05-10 21:48:44 修改

阅读量899

点赞数

分类专栏： pgsql 文章标签： postgresql 哈希算法数据库

于 2022-05-09 21:36:04 首次发布

本文链接：https://blog.csdn.net/jackgo73/article/details/124674144

版权

pgsql 专栏收录该内容

273 篇文章 122 订阅

订阅专栏

本文详细介绍了PostgreSQL中系统缓存（CatCache）的查询步骤和内存结构，包括单条查询和多条查询的流程。查询首先通过计算哈希值定位到bucket，然后遍历bucket中的双向链表找到匹配的元组，未找到时会查询系统表并更新缓存。此外，还讨论了LRU策略和负缓存的概念。

摘要由CSDN通过智能技术生成

1 内存结构和查询步骤

查询步骤概要

使用SysCacheIdentifier在CatCache数组中找到对应的CatCache
计算hash，按数组index找到bucket
找到bucket后，在bucket双向链表中遍历找到CatCTup，元组记录在其中；找到后调整到双向链表头（LRU）

多条查询步骤概要

cc_lists用与多条数据查询
计算hash，按顺序匹配每个catclist，找到catclist
如果List可用直接返回。注意catclist中再结构体后面记录了该List指向的所有CatCTup的指针，catclist并不维护tuple内存信息，只是指向CatCTup结构，具体的元组信息（封装在CatCTup中）还是放在bucket中维护。

内存结构见下图：
请添加图片描述

2 单条查询步骤

提供了四个入口函数，用于不同查询条件个数的场景

extern HeapTuple SearchSysCache1(int cacheId,
								 Datum key1);
extern HeapTuple SearchSysCache2(int cacheId,
								 Datum key1, Datum key2);
extern HeapTuple SearchSysCache3(int cacheId,
								 Datum key1, Datum key2, Datum key3);
extern HeapTuple SearchSysCache4(int cacheId,
								 Datum key1, Datum key2, Datum key3, Datum key4);

最后全部调入

static inline HeapTuple
SearchCatCacheInternal(CatCache *cache,
					   int nkeys,
					   Datum v1,
					   Datum v2,
					   Datum v3,
					   Datum v4)
{
	Datum		arguments[CATCACHE_MAXKEYS];
	uint32		hashValue;
	Index		hashIndex;
	dlist_iter	iter;
	dlist_head *bucket;
	CatCTup    *ct;

	/*
	 * one-time startup overhead for each cache
	 */
	if (unlikely(cache->cc_tupdesc == NULL))
		CatalogCacheInitializeCache(cache);

	/* Initialize local parameter array */
	arguments[0] = v1;
	arguments[1] = v2;
	arguments[2] = v3;
	arguments[3] = v4;

【第一步】算hash找桶

	hashValue = CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4);
	hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets);

【第二步】遍历桶的dlist，找到和nkeys都匹配的tuple

	bucket = &cache->cc_bucket[hashIndex];
	dlist_foreach(iter, bucket)
	{
		ct = dlist_container(CatCTup, cache_elem, iter.cur);

		if (ct->dead)
			continue;			/* ignore dead entries */

		if (ct->hash_value != hashValue)
			continue;			/* quickly skip entry if wrong hash val */

		if (!CatalogCacheCompareTuple(cache, nkeys, ct->keys, arguments))
			continue;

【第二步】找到了！放到dlist头部（LRU）

		dlist_move_head(bucket, &ct->cache_elem);

【第二步】找到的tuple是否有neg标记

找到了没有negative标记的，把REF++，返回TUPLE。
找到了有negative标记的，这种tuple是SearchCatCacheMiss函数查完系统表后，没有匹配的元组，就会在cache中增加一个negative的tuple，表示系统表中没有，省去了下次还要搜索系统表的操作。

		/*
		 * If it's a positive entry, bump its refcount and return it. If it's
		 * negative, we can report failure to the caller.
		 */
		if (!ct->negative)
		{
			ResourceOwnerEnlargeCatCacheRefs(CurrentResourceOwner);
			ct->refcount++;
			ResourceOwnerRememberCatCacheRef(CurrentResourceOwner, &ct->tuple);
			return &ct->tuple;
		}
		else
		{
			return NULL;
		}
	}

【第三步】没找到，去IO对应的系统表

如果去系统表中找到了，构造一个tuple放入bucket的dlist中。
如果去系统表中没找到，构造一个neg tuple放入bucket的dlist中，表示这条没有，下次直接返回不必查询系统表。

	return SearchCatCacheMiss(cache, nkeys, hashValue, hashIndex, v1, v2, v3, v4);
}

3 多条查询步骤SearchCatCacheList

与#2类似：

CatCList *
SearchCatCacheList(CatCache *cache,
				   int nkeys,
				   Datum v1,
				   Datum v2,
				   Datum v3)
{
	...
	lHashValue = CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4);
	dlist_foreach(iter, &cache->cc_lists)
	{
		cl = dlist_container(CatCList, cache_elem, iter.cur);

		if (cl->dead)
			continue;			/* ignore dead entries */

		if (cl->hash_value != lHashValue)
			continue;			/* quickly skip entry if wrong hash val */

		/*
		 * see if the cached list matches our key.
		 */
		if (cl->nkeys != nkeys)
			continue;

		if (!CatalogCacheCompareTuple(cache, nkeys, cl->keys, arguments))
			continue;

与上面单条查询不同的是，这里没有bucket，需要按顺序遍历链表，找到hash匹配的。
找到后也是LRU到最前，然后返回即可。

		dlist_move_head(&cache->cc_lists, &cl->cache_elem);

		/* Bump the list's refcount and return it */
		ResourceOwnerEnlargeCatCacheListRefs(CurrentResourceOwner);
		cl->refcount++;
		ResourceOwnerRememberCatCacheListRef(CurrentResourceOwner, cl);
		return cl;
	}
	


	ctlist = NIL;

	PG_TRY();
	{
		ScanKeyData cur_skey[CATCACHE_MAXKEYS];
		Relation	relation;
		SysScanDesc scandesc;

		/*
		 * Ok, need to make a lookup in the relation, copy the scankey and
		 * fill out any per-call fields.
		 */
		memcpy(cur_skey, cache->cc_skey, sizeof(ScanKeyData) * cache->cc_nkeys);
		cur_skey[0].sk_argument = v1;
		cur_skey[1].sk_argument = v2;
		cur_skey[2].sk_argument = v3;
		cur_skey[3].sk_argument = v4;

		relation = table_open(cache->cc_reloid, AccessShareLock);

		scandesc = systable_beginscan(relation,
									  cache->cc_indexoid,
									  IndexScanOK(cache, cur_skey),
									  NULL,
									  nkeys,
									  cur_skey);

没找到，去系统表里面搜索。
注意搜完了都是添加到bucket中的，list需要的只是把指针记录下来。

		ordered = (scandesc->irel != NULL);

		while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
		{
			uint32		hashValue;
			Index		hashIndex;
			bool		found = false;
			dlist_head *bucket;

			/*
			 * See if there's an entry for this tuple already.
			 */
			ct = NULL;
			hashValue = CatalogCacheComputeTupleHashValue(cache, cache->cc_nkeys, ntp);
			hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets);

			bucket = &cache->cc_bucket[hashIndex];
			dlist_foreach(iter, bucket)
			{
				ct = dlist_container(CatCTup, cache_elem, iter.cur);

				if (ct->dead || ct->negative)
					continue;	/* ignore dead and negative entries */

				if (ct->hash_value != hashValue)
					continue;	/* quickly skip entry if wrong hash val */

				if (!ItemPointerEquals(&(ct->tuple.t_self), &(ntp->t_self)))
					continue;	/* not same tuple */

				/*
				 * Found a match, but can't use it if it belongs to another
				 * list already
				 */
				if (ct->c_list)
					continue;

				found = true;
				break;			/* A-OK */
			}

			if (!found)
			{
				ct = CatalogCacheCreateEntry(cache, ntp, arguments,
											 hashValue, hashIndex,
											 false);
			}
			ctlist = lappend(ctlist, ct);
			ct->refcount++;
		}

		systable_endscan(scandesc);

		table_close(relation, AccessShareLock);

		/* Now we can build the CatCList entry. */
		oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
		nmembers = list_length(ctlist);
		cl = (CatCList *)
			palloc(offsetof(CatCList, members) + nmembers * sizeof(CatCTup *));

		/* Extract key values */
		CatCacheCopyKeys(cache->cc_tupdesc, nkeys, cache->cc_keyno,
						 arguments, cl->keys);
		MemoryContextSwitchTo(oldcxt);

	}
	...

	cl->cl_magic = CL_MAGIC;
	cl->my_cache = cache;
	cl->refcount = 0;			/* for the moment */
	cl->dead = false;
	cl->ordered = ordered;
	cl->nkeys = nkeys;
	cl->hash_value = lHashValue;
	cl->n_members = nmembers;

	i = 0;
	foreach(ctlist_item, ctlist)
	{
		cl->members[i++] = ct = (CatCTup *) lfirst(ctlist_item);
		Assert(ct->c_list == NULL);
		ct->c_list = cl;
		/* release the temporary refcount on the member */
		Assert(ct->refcount > 0);
		ct->refcount--;
		/* mark list dead if any members already dead */
		if (ct->dead)
			cl->dead = true;
	}
	Assert(i == nmembers);

构造完成，挂到cc_lists前面，完成搜索。cl->refcount++

	dlist_push_head(&cache->cc_lists, &cl->cache_elem);

	/* Finally, bump the list's refcount and return it */
	cl->refcount++;
	ResourceOwnerRememberCatCacheListRef(CurrentResourceOwner, cl);

...
	return cl;
}

ps. 补充

PG的双向链表的几个宏实现很漂亮，有兴趣可以研究下。

ilist.h
https://github.com/postgres/postgres/blob/master/src/include/lib/ilist.h

使用时只需要将dlist_node放入struct的任意位置即可，该struct就具有了dlist的所有功能。

struct dlist_node
{
	dlist_node *prev;
	dlist_node *next;
};

typedef struct dlist_head
{
	dlist_node	head;
} dlist_head;

// 遍历
dlist_foreach(iter, bucket)
	{
		ct = dlist_container(CatCTup, cache_elem, iter.cur);
...
...