本小节将着重讲解PostgreSQL内存上下文的工作机制,包括初始化创建、内存上下文的分配与重分配
知识回顾:postgres 源码解析29 内存管理–1
源码解析
1 内存上下文的初始化
每个backend 进程在使用内存上下文之前必须进行初始化,该操作入口为MemoryContextInit,主要工作是创建TopMemoryContext和 ErrorContex,后续操作所需的内存空间将从TopMemoryContext下继续分配。
- TopMemoryContext:该节点分配后将一直存在,直至系统退出后释放。该节点下面分配的子节点占用的空间均来自本身TopMemoryContext占用空间。
- ErrorContex:该节点是TopMemoryContext第一个节点,是错误恢复处理的永久性内存上下文。
void
MemoryContextInit(void)
{
AssertState(TopMemoryContext == NULL);
/*
* First, initialize TopMemoryContext, which is the parent of all others.
*/
// 初始化TopMemoryContext,所有子节点的父节点
TopMemoryContext = AllocSetContextCreate((MemoryContext) NULL,
"TopMemoryContext",
ALLOCSET_DEFAULT_SIZES);
/*
* Not having any other place to point CurrentMemoryContext, make it point
* to TopMemoryContext. Caller should change this soon!
*/
CurrentMemoryContext = TopMemoryContext;
/*
* Initialize ErrorContext as an AllocSetContext with slow growth rate ---
* we don't really expect much to be allocated in it. More to the point,
* require it to contain at least 8K at all times. This is the only case
* where retained memory in a context is *essential* --- we want to be
* sure ErrorContext still has some memory even if we've run out
* elsewhere! Also, allow allocations in ErrorContext within a critical
* section. Otherwise a PANIC will cause an assertion failure in the error
* reporting code, before printing out the real cause of the failure.
*
* This should be the last step in this function, as elog.c assumes memory
* management works once ErrorContext is non-null.
*/
ErrorContext = AllocSetContextCreate(TopMemoryContext,
"ErrorContext",
8 * 1024,
8 * 1024,
8 * 1024);
MemoryContextAllowInCriticalSection(ErrorContext, true);
}
/*
2 内存上下文的创建
AllocSetContextCreateInternal总体流程:
1)首先判断在空闲内存块数组中是否存在所需内存块,存在则可使用,并将其从空闲数组中移除,指向父节点;
2)如未找到,则初始化一个指定大小的内存块,设置 AllocSet相关结构体信息,最后调用 MemoryContextCreate函数在父节点下创建内存上下文;
/*
* AllocSetContextCreateInternal
* Create a new AllocSet context.
*
* parent: parent context, or NULL if top-level context // 父节点上下文
* name: name of context (must be statically allocated) // debug 用
* minContextSize: minimum context size // 最小分配块大小
* initBlockSize: initial allocation block size // 初始分配块大小
* maxBlockSize: maximum allocation block size // 最大分配块大小
*
* Most callers should abstract the context size parameters using a macro
* such as ALLOCSET_DEFAULT_SIZES.
*
* Note: don't call this directly; go through the wrapper macro
* AllocSetContextCreate.
*/
MemoryContext
AllocSetContextCreateInternal(MemoryContext parent,
const char *name,
Size minContextSize,
Size initBlockSize,
Size maxBlockSize)
{
int freeListIndex;
Size firstBlockSize;
AllocSet set;
AllocBlock block;
/* Assert we padded AllocChunkData properly */
// 校验
StaticAssertStmt(ALLOC_CHUNKHDRSZ == MAXALIGN(ALLOC_CHUNKHDRSZ),
"sizeof(AllocChunkData) is not maxaligned");
StaticAssertStmt(offsetof(AllocChunkData, aset) + sizeof(MemoryContext) ==
ALLOC_CHUNKHDRSZ,
"padding calculation in AllocChunkData is wrong");
/*
* First, validate allocation parameters. Once these were regular runtime
* test and elog's, but in practice Asserts seem sufficient because nobody
* varies their parameters at runtime. We somewhat arbitrarily enforce a
* minimum 1K block size.
*/
Assert(initBlockSize == MAXALIGN(initBlockSize) &&
initBlockSize >= 1024);
Assert(maxBlockSize == MAXALIGN(maxBlockSize) &&
maxBlockSize >= initBlockSize &&
AllocHugeSizeIsValid(maxBlockSize)); /* must be safe to double */
Assert(minContextSize == 0 ||
(minContextSize == MAXALIGN(minContextSize) &&
minContextSize >= 1024 &&
minContextSize <= maxBlockSize));
/*
* Check whether the parameters match either available freelist. We do
* not need to demand a match of maxBlockSize.
*/
if (minContextSize == ALLOCSET_DEFAULT_MINSIZE &&
initBlockSize == ALLOCSET_DEFAULT_INITSIZE)
freeListIndex = 0;
else if (minContextSize == ALLOCSET_SMALL_MINSIZE &&
initBlockSize == ALLOCSET_SMALL_INITSIZE)
freeListIndex = 1;
else
freeListIndex = -1;
/*
* If a suitable freelist entry exists, just recycle that context.
*/
/// 在空闲链获取相应的上下文
if (freeListIndex >= 0)
{
AllocSetFreeList *freelist = &context_freelists[freeListIndex];
if (freelist->first_free != NULL)
{
/* Remove entry from freelist */
set = freelist->first_free;
freelist->first_free = (AllocSet) set->header.nextchild;
freelist->num_free--;
/* Update its maxBlockSize; everything else should be OK */
set->maxBlockSize = maxBlockSize;
/* Reinitialize its header, installing correct name and parent */
MemoryContextCreate((MemoryContext) set,
T_AllocSetContext,
&AllocSetMethods,
parent,
name);
((MemoryContext) set)->mem_allocated =
set->keeper->endptr - ((char *) set);
return (MemoryContext) set;
}
}
/* Determine size of initial block */
firstBlockSize = MAXALIGN(sizeof(AllocSetContext)) +
ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ;
if (minContextSize != 0)
firstBlockSize = Max(firstBlockSize, minContextSize);
else
firstBlockSize = Max(firstBlockSize, initBlockSize);
/*
* Allocate the initial block. Unlike other aset.c blocks, it starts with
* the context header and its block header follows that.
*/
set = (AllocSet) malloc(firstBlockSize);
if (set == NULL)
{
if (TopMemoryContext)
MemoryContextStats(TopMemoryContext);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory"),
errdetail("Failed while creating memory context \"%s\".",
name)));
}
/*
* Avoid writing code that can fail between here and MemoryContextCreate;
* we'd leak the header/initial block if we ereport in this stretch.
*/
// 设置相关字段信息
/* Fill in the initial block's block header */
block = (AllocBlock) (((char *) set) + MAXALIGN(sizeof(AllocSetContext)));
block->aset = set;
block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ;
block->endptr = ((char *) set) + firstBlockSize;
block->prev = NULL;
block->next = NULL;
/* Mark unallocated space NOACCESS; leave the block header alone. */
VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, block->endptr - block->freeptr);
/* Remember block as part of block list */
set->blocks = block;
/* Mark block as not to be released at reset time */
set->keeper = block;
/* Finish filling in aset-specific parts of the context header */
MemSetAligned(set->freelist, 0, sizeof(set->freelist));
set->initBlockSize = initBlockSize;
set->maxBlockSize = maxBlockSize;
set->nextBlockSize = initBlockSize;
set->freeListIndex = freeListIndex;
/*
* Compute the allocation chunk size limit for this context. It can't be
* more than ALLOC_CHUNK_LIMIT because of the fixed number of freelists.
* If maxBlockSize is small then requests exceeding the maxBlockSize, or
* even a significant fraction of it, should be treated as large chunks
* too. For the typical case of maxBlockSize a power of 2, the chunk size
* limit will be at most 1/8th maxBlockSize, so that given a stream of
* requests that are all the maximum chunk size we will waste at most
* 1/8th of the allocated space.
*
* We have to have allocChunkLimit a power of two, because the requested
* and actually-allocated sizes of any chunk must be on the same side of
* the limit, else we get confused about whether the chunk is "big".
*
* Also, allocChunkLimit must not exceed ALLOCSET_SEPARATE_THRESHOLD.
*/
StaticAssertStmt(ALLOC_CHUNK_LIMIT == ALLOCSET_SEPARATE_THRESHOLD,
"ALLOC_CHUNK_LIMIT != ALLOCSET_SEPARATE_THRESHOLD");
set->allocChunkLimit = ALLOC_CHUNK_LIMIT;
while ((Size) (set->allocChunkLimit + ALLOC_CHUNKHDRSZ) >
(Size) ((maxBlockSize - ALLOC_BLOCKHDRSZ) / ALLOC_CHUNK_FRACTION))
set->allocChunkLimit >>= 1;
/* Finally, do the type-independent part of context creation */
MemoryContextCreate((MemoryContext) set,
T_AllocSetContext,
&AllocSetMethods,
parent,
name);
((MemoryContext) set)->mem_allocated = firstBlockSize;
return (MemoryContext) set;
}
3 MemoryContextCreate
该函数是内存上下文创建的真正执行函数,其将已申请的内存块链接至父节点的firstchild位置
/*
* MemoryContextCreate
* Context-type-independent part of context creation.
*
* This is only intended to be called by context-type-specific
* context creation routines, not by the unwashed masses.
*
* The memory context creation procedure goes like this:
* 1. Context-type-specific routine makes some initial space allocation,
* including enough space for the context header. If it fails,
* it can ereport() with no damage done.
* 2. Context-type-specific routine sets up all type-specific fields of
* the header (those beyond MemoryContextData proper), as well as any
* other management fields it needs to have a fully valid context.
* Usually, failure in this step is impossible, but if it's possible
* the initial space allocation should be freed before ereport'ing.
* 3. Context-type-specific routine calls MemoryContextCreate() to fill in
* the generic header fields and link the context into the context tree.
* 4. We return to the context-type-specific routine, which finishes
* up type-specific initialization. This routine can now do things
* that might fail (like allocate more memory), so long as it's
* sure the node is left in a state that delete will handle.
*
* node: the as-yet-uninitialized common part of the context header node.
* tag: NodeTag code identifying the memory context type.
* methods: context-type-specific methods (usually statically allocated).
* parent: parent context, or NULL if this will be a top-level context.
* name: name of context (must be statically allocated).
*
* Context routines generally assume that MemoryContextCreate can't fail,
* so this can contain Assert but not elog/ereport.
*/
void
MemoryContextCreate(MemoryContext node,
NodeTag tag,
const MemoryContextMethods *methods,
MemoryContext parent,
const char *name)
{
/* Creating new memory contexts is not allowed in a critical section */
Assert(CritSectionCount == 0);
/* Initialize all standard fields of memory context header */
node->type = tag;
node->isReset = true;
node->methods = methods;
node->parent = parent;
node->firstchild = NULL;
node->mem_allocated = 0;
node->prevchild = NULL;
node->name = name;
node->ident = NULL;
node->reset_cbs = NULL;
/* OK to link node into context tree */
if (parent)
{
node->nextchild = parent->firstchild;
if (parent->firstchild != NULL)
parent->firstchild->prevchild = node;
parent->firstchild = node;
/* inherit allowInCritSection flag from parent */
node->allowInCritSection = parent->allowInCritSection;
}
else
{
node->nextchild = NULL;
node->allowInCritSection = false;
}
VALGRIND_CREATE_MEMPOOL(node, 0, false);
}
4 内存上下文中内存的分配
该操作定义在MemoryContextAlloc函数中,其本质是调用上下文函数中alloc方法来实现指定内存的分配。
/*
* MemoryContextAlloc
* Allocate space within the specified context.
*
* This could be turned into a macro, but we'd have to import
* nodes/memnodes.h into postgres.h which seems a bad idea.
*/
void *
MemoryContextAlloc(MemoryContext context, Size size)
{
void *ret;
AssertArg(MemoryContextIsValid(context));
AssertNotInCriticalSection(context);
if (!AllocSizeIsValid(size))
elog(ERROR, "invalid memory alloc request size %zu", size);
context->isReset = false;
ret = context->methods->alloc(context, size);
if (unlikely(ret == NULL))
{
MemoryContextStats(TopMemoryContext); // OOM
/*
* Here, and elsewhere in this module, we show the target context's
* "name" but not its "ident" (if any) in user-visible error messages.
* The "ident" string might contain security-sensitive data, such as
* values in SQL commands.
*/
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory"),
errdetail("Failed on request of size %zu in memory context \"%s\".",
size, context->name)));
}
VALGRIND_MEMPOOL_ALLOC(context, ret, size);
return ret;
}
5 内存上下文的重分配
该操作定义在AllocSetReset函数中,其主要功能是充分配指定内存上下文的内存
/*
* AllocSetReset
* Frees all memory which is allocated in the given set.
*
* Actually, this routine has some discretion about what to do.
* It should mark all allocated chunks freed, but it need not necessarily
* give back all the resources the set owns. Our actual implementation is
* that we give back all but the "keeper" block (which we must keep, since
* it shares a malloc chunk with the context header). In this way, we don't
* thrash malloc() when a context is repeatedly reset after small allocations,
* which is typical behavior for per-tuple contexts.
*/
static void
AllocSetReset(MemoryContext context)
{
AllocSet set = (AllocSet) context;
AllocBlock block;
Size keepersize PG_USED_FOR_ASSERTS_ONLY
= set->keeper->endptr - ((char *) set);
AssertArg(AllocSetIsValid(set));
#ifdef MEMORY_CONTEXT_CHECKING
/* Check for corruption and leaks before freeing */
AllocSetCheck(context);
#endif
/* Clear chunk freelists */
MemSetAligned(set->freelist, 0, sizeof(set->freelist));
block = set->blocks;
/* New blocks list will be just the keeper block */
set->blocks = set->keeper;
while (block != NULL)
{
AllocBlock next = block->next;
if (block == set->keeper)
{
/* Reset the block, but don't return it to malloc */
char *datastart = ((char *) block) + ALLOC_BLOCKHDRSZ;
#ifdef CLOBBER_FREED_MEMORY
wipe_mem(datastart, block->freeptr - datastart);
#else
/* wipe_mem() would have done this */
VALGRIND_MAKE_MEM_NOACCESS(datastart, block->freeptr - datastart);
#endif
block->freeptr = datastart;
block->prev = NULL;
block->next = NULL;
}
else
{
/* Normal case, release the block */
context->mem_allocated -= block->endptr - ((char *) block);
#ifdef CLOBBER_FREED_MEMORY
wipe_mem(block, block->freeptr - ((char *) block));
#endif
free(block);
}
block = next;
}
Assert(context->mem_allocated == keepersize);
/* Reset block size allocation sequence, too */
set->nextBlockSize = set->initBlockSize;
}
6 内存上下文的释放
1 AllocSetFree
释放指定内存上下中的内存片操作定义在AllocSetFree函数中,其主要功能是释放指定上下文已分配的内存片,并将其添加至相应大小的Freelist链表中;但针对大于 allocChunkLimit的内存块会直接从上下文中释放。
/*
* AllocSetFree
* Frees allocated memory; memory is removed from the set.
*/
static void
AllocSetFree(MemoryContext context, void *pointer)
{
AllocSet set = (AllocSet) context;
AllocChunk chunk = AllocPointerGetChunk(pointer);
/* Allow access to private part of chunk header. */
VALGRIND_MAKE_MEM_DEFINED(chunk, ALLOCCHUNK_PRIVATE_LEN);
if (chunk->size > set->allocChunkLimit)
{
/*
* Big chunks are certain to have been allocated as single-chunk
* blocks. Just unlink that block and return it to malloc().
*/
AllocBlock block = (AllocBlock) (((char *) chunk) - ALLOC_BLOCKHDRSZ);
/*
* Try to verify that we have a sane block pointer: it should
* reference the correct aset, and freeptr and endptr should point
* just past the chunk.
*/
if (block->aset != set ||
block->freeptr != block->endptr ||
block->freeptr != ((char *) block) +
(chunk->size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ))
elog(ERROR, "could not find block containing chunk %p", chunk);
/* OK, remove block from aset's list and free it */
if (block->prev)
block->prev->next = block->next;
else
set->blocks = block->next;
if (block->next)
block->next->prev = block->prev;
context->mem_allocated -= block->endptr - ((char *) block);
free(block);
}
else
{
/* Normal case, put the chunk into appropriate freelist */
int fidx = AllocSetFreeIndex(chunk->size);
chunk->aset = (void *) set->freelist[fidx];
set->freelist[fidx] = chunk;
}
}
2 AllocSetDelete
释放指定内存上下文所有的内存块,该函数定义在 AllocSetDelete中
/*
* AllocSetDelete
* Frees all memory which is allocated in the given set,
* in preparation for deletion of the set.
*
* Unlike AllocSetReset, this *must* free all resources of the set.
*/
static void
AllocSetDelete(MemoryContext context)
{
AllocSet set = (AllocSet) context;
AllocBlock block = set->blocks;
Size keepersize PG_USED_FOR_ASSERTS_ONLY
= set->keeper->endptr - ((char *) set);
AssertArg(AllocSetIsValid(set));
/*
* If the context is a candidate for a freelist, put it into that freelist
* instead of destroying it.
*/
// 首先找到空闲链表,进行内存释放
if (set->freeListIndex >= 0)
{
AllocSetFreeList *freelist = &context_freelists[set->freeListIndex];
/*
* Reset the context, if it needs it, so that we aren't hanging on to
* more than the initial malloc chunk.
*/
if (!context->isReset)
MemoryContextResetOnly(context);
/*
* If the freelist is full, just discard what's already in it. See
* comments with context_freelists[].
*/
if (freelist->num_free >= MAX_FREE_CONTEXTS)
{
while (freelist->first_free != NULL)
{
AllocSetContext *oldset = freelist->first_free;
freelist->first_free = (AllocSetContext *) oldset->header.nextchild;
freelist->num_free--;
/* All that remains is to free the header/initial block */
free(oldset);
}
Assert(freelist->num_free == 0);
}
/* Now add the just-deleted context to the freelist. */
set->header.nextchild = (MemoryContext) freelist->first_free;
freelist->first_free = set;
freelist->num_free++;
return;
}
/* Free all blocks, except the keeper which is part of context header */
//后续释放所有快
while (block != NULL)
{
AllocBlock next = block->next;
if (block != set->keeper)
context->mem_allocated -= block->endptr - ((char *) block);
if (block != set->keeper)
free(block);
block = next;
}
Assert(context->mem_allocated == keepersize);
/* Finally, free the context header, including the keeper block */
free(set);
}