Memory allocation strategies
c/c_api_internal.h
// Memory allocation strategies. kTfLiteMmapRo is for read-only memory-mapped
// data (or data externally allocated). kTfLiteArenaRw is arena allocated
// data. kTfLiteDynamic is for tensors that are allocated during evaluation.
typedef enum {
kTfLiteMemNone = 0,
kTfLiteMmapRo,
kTfLiteArenaRw,
kTfLiteArenaRwPersistent,
kTfLiteDynamic, //和kTfLIteArenaRwxx是并列的没有在arena中实现
}
kTfLiteDynamic
kTfLiteDynamic is for tensors that are allocated during evaluation,还是有不少代码使用该类型
./kernels/embedding_lookup_sparse.cc:111: output->allocation_type = kTfLiteDynamic;
./kernels/kernel_util.h:76: return tensor->allocation_type == kTfLiteDynamic;
./kernels/kernel_util.h:81: if (tensor->allocation_type != kTfLiteDynamic) {
./kernels/kernel_util.h:82: tensor->allocation_type = kTfLiteDynamic;
./kernels/transpose_conv.cc:127: im2col->allocation_type = kTfLiteDynamic;
./string_util.cc:113: tensor_buffer, bytes, kTfLiteDynamic, tensor->allocation,
./delegates/flex/kernel.cc:236: // mark them as kTfLiteDynamic.
./delegates/flex/buffer_map_test.cc:42: tensor->allocation_type = kTfLiteDynamic;
./delegates/flex/buffer_map_test.cc:60: tensor->allocation_type = kTfLiteDynamic;
./arena_planner_test.cc:386: (*graph.tensors())[1].allocation_type = kTfLiteDynamic;
./c/c_api_internal.c:75: if (t->allocation_type == kTfLiteDynamic && t->data.raw) {
./c/c_api_internal.c:105: if (tensor->allocation_type != kTfLiteDynamic) {
./c/c_api_internal.h:210:// data. kTfLiteDynamic is for tensors that are allocated during evaluation.
./c/c_api_internal.h:216: kTfLiteDynamic,
./c/c_api_internal.h:291:// types other than kTfLiteDynamic will be ignored.
./interpreter.cc:67:// Returns true if at least one tensor in the given list is kTfLiteDynamic.
./interpreter.cc:73: if (tensor.allocation_type == kTfLiteDynamic) {
./interpreter.cc:843: allocation_type = kTfLiteDynamic;
./interpreter.cc:867: tensor->allocation_type == kTfLiteDynamic ||
./interpreter.cc:880: // Realloc space for kTfLiteDynamic tensors.
./interpreter.cc:887: if (tensor->allocation_type != kTfLiteDynamic) {
./interpreter.h:516: // type kTfLiteDynamic it will also be allocated new memory.
./optional_debug_tools.cc:67: case kTfLiteDynamic:
./optional_debug_tools.cc:68: return "kTfLiteDynamic";
kTfLiteArenaRw and kTfLiteArenaRwPersistent
两者是不同的内存分配策略,但分配的方式是一样的都是Arena: kTfLiteArenaRwPersistent分配的内存不会和其他共享,就是说他的内容可跟踪,不会被其他覆盖。
class: arena_planner继承class MemoryPlanner, arena_planner包含class SimpleMemoryArena, SimpleMemoryArena实现具体的实际的内存分配、释放
arena_planner.cc
// A memory planner that makes all the allocations using arenas.
//
// Before a model is executed by the interpreter, this class
1] determines when each tensor needs to be allocated and deallocated,
2] and preallocates all the necessary memory (the PlanAllocations phase).
3] It then assigns portions of this memory buffer to each tensor (the ExecuteAllocations phase).
4] Tensors may share some of the buffer if a tensor B is to be allocated after another tensor A has been deallocated.
AllocationInfo和ArenaAlloc
两个变量容易混在一起,ArenaAlloc定义在最底层的文件用于表示一个memory分配请求;
而AllocationInfo表示的是谁node请求的为那个tensor进行allocate/deallocate 操作。
struct AllocationInfo {
// The node index requesting this allocation.//node去请求为它使用的tensor分配memory
int node;
// The tensor index to be allocated or deallocated.
int tensor;
// Whether to allocate or deallocate:q
enum Type { ALLOC, DEALLOC } type;
};
// This little structure holds the offset and the size for a dynamic memory
// allocation in the memory arena. When the arena is committed and the
// underlying buffer is set, the alloc can be resolved into an actual memory
// pointer.
struct ArenaAlloc {
ArenaAlloc() : offset(0), size(0) {}
size_t offset;
size_t size;
inline bool operator<(const ArenaAlloc& other) const {
return offset < other.offset;
}
};
ArenaPlanner的成员变量
通过包含成员变量arena_ and persistent_arena_ 调用SimpleMemoryArena的成员函数
class ArenaPlanner : public MemoryPlanner {//成员变量
TfLiteContext* context_;
std::unique_ptr<GraphInfo> graph_info_;
// Stores allocation data for all tensors.
std::vector<ArenaAlloc> allocs_;
// A chronological list of instructions to allocated and deallocate tensors,
// reflecting the way they are used in the graph.
std::vector<AllocationInfo> alloc_queue_;
// Raw memory buffer that is allocated for all temporary and graph outputs.
// that are declared kTfLiteArenaRw.
SimpleMemoryArena arena_;
// Raw memory buffer that is allocated for persistent tensors that are
// declared as kTfLiteArenaRwPersistent.
SimpleMemoryArena persistent_arena_;
// Ensure that the memory self-allocated for inputs is never reused by the
// allocator. This allows for example, multiple runs without getting
// unpredictable results.
bool preserve_inputs_;
// If true, then no overlapping of memory areas is done, meaning intermediates
// results can be queried after running (modulo running delegates).
bool preserve_intermediates_;
// Number of bytes that tensor buffers should be aligned to.
int tensor_alignment_;
}
simple_memory_arena.h
ArenaPlanner的最终实现是通过SimpleMemoryArena实现的
// This small class is responsible for allocating, deallocating and reusing
// dynamic memory from a common underlying buffer. The arena can be used in
// scenarios when the pattern of memory allocations and deallocations is
// repetitive, e.g. running NN inference in multiple iterations. Note that
// zero-sized allocations are explicitly allowed, and will resolve to null.
class SimpleMemoryArena {
//成员变量
private:
bool committed_;
size_t arena_alignment_;
size_t high_water_mark_;
std::unique_ptr<char[]> underlying_buffer_;
size_t underlying_buffer_size_;
char* underlying_buffer_aligned_ptr_;
// TODO(maciekc): add list iterator to the ArenaAlloc to lookup quickly.
std::list<ArenaAlloc> allocs_;
};
Allocate/Deallocate
allocate/deallocate并没有对内存的实际操作(分配或释放),只是对std::list<ArenaAlloc> allocs_的操作。
arena.Allocate(&context, 32, 2047, &allocs[0])参数分别是; 分配的地址32边界对齐,大小也会对齐如2047对齐到2048
分配的结果保存到allocs: [size, offset]
TEST(SimpleMemoryArenaTest, BasicArenaOperations) {
TfLiteContext context;
SimpleMemoryArena arena(64); //arena_alignment:64
ArenaAlloc allocs[6];
arena.Allocate(&context, 32, 2047, &allocs[0]);
arena.Allocate(&context, 32, 2047, &allocs[1]);
arena.Allocate(&context, 32, 2047, &allocs[2]);
arena.Deallocate(&context, allocs[0]); //Deallocate, but memory can used
arena.Allocate(&context, 32, 1023, &allocs[3]);
arena.Allocate(&context, 32, 2047, &allocs[4]);
arena.Deallocate(&context, allocs[1]);
arena.Allocate(&context, 32, 1023, &allocs[5]);
EXPECT_EQ(allocs[0].offset, 0);
EXPECT_EQ(allocs[1].offset, 2048);
EXPECT_EQ(allocs[2].offset, 4096);
EXPECT_EQ(allocs[3].offset, 0);
EXPECT_EQ(allocs[4].offset, 6144);
EXPECT_EQ(allocs[5].offset, 1024);
}
commit
真正分配内存是commit,最终调用的函数是new char[]
TEST(SimpleMemoryArenaTest, TestAfterClear) {
TfLiteContext context;
SimpleMemoryArena arena(64);
ArenaAlloc allocs[9];
arena.Allocate(&context, 32, 2047, &allocs[0]);
arena.Allocate(&context, 32, 2047, &allocs[1]);
arena.Allocate(&context, 32, 2047, &allocs[2]);
arena.Commit(&context);
EXPECT_EQ(allocs[0].offset, 0);
EXPECT_EQ(allocs[1].offset, 2048);
EXPECT_EQ(allocs[2].offset, 4096);
}
真正分配memory的地方
TfLiteStatus SimpleMemoryArena::Commit(TfLiteContext* context) {
if (required_size > underlying_buffer_size_) {
char* new_alloc = new char[required_size];
char* new_underlying_buffer_aligned_ptr = reinterpret_cast<char*>(
AlignTo(arena_alignment_, reinterpret_cast<intptr_t>(new_alloc)));
committed_ = true;
return underlying_buffer_ != nullptr ? kTfLiteOk : kTfLiteError;
}
获得分配内存的方法:ResolveAlloc
获得申请到的内存的方法:
arena.ResolveAlloc(&context, alloc, &resolved_ptr)
/*preserve_inputs=*/true什么意思
preserve_inputs_ is true, make ure they never be overwritten.(保留内存不被overwite)
TEST_F(ArenaPlannerTest, SimpleGraphInputsPreserved) {
TestGraph graph({0, 1}, preserve_inputs指的是graph的input不是某个node的input
{
/* in, out, tmp */
{{0, 1}, {2}, {}}, // First op
{{2, 0}, {4, 5}, {}}, // Second op
{{4, 5}, {3}, {}} // Third op
},
{3});
SetGraph(&graph, /*preserve_inputs=*/true);
Execute(0, 10);
// Alloc(+) and dealloc(-) order: +0 +1 +2 +4 +5 -2 +3 -4 -5 [0, 1没有被dealloc]
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
// Because we are keeping the inputs alive until the end (due to
// preserve_inputs=true), the output tensor will not be able to use that
// space. It will end up using the same are as tensor #2.
EXPECT_EQ(GetOffset(3), GetOffsetAfter(1));
}
OptionalTensor
是可选的tensor对应的index是 -1, 不分配内存,不影响alloca, dealloc
#define kOptionalTensor (-1)
// Queue all graph inputs for allocation. If preserve_inputs_ is true, make
// sure they never be overwritten.
for (int tensor_index : graph_info_->inputs()) {
if (tensor_index != kOptionalTensor) {
TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
}
}
TEST_F(ArenaPlannerTest, SimpleGraphWithOptionals) {
TestGraph graph({0, -1, 1},
{
/* in, out, tmp */
{{0, 1}, {2}, {}}, // First op
{{2, 0}, {4, 5}, {}}, // Second op
{{4, -1, 5}, {3}, {}} // Third op, with optional
},
{3});
SetGraph(&graph);
Execute(0, 10);
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(3), 0);
}
SetGraph(&graph, /*preserve_inputs=*/true);//说的不是node的input
Execute(0, 10);
// Alloc(+) and dealloc(-) order: +0 +1 +2 +4 +5 -2 +3 -4 -5
Variable tensors have to be `kTfLiteArenaRwPersistent
TEST_F(ArenaPlannerTest, SimpleGraphWithPersistentTensor) {
TestGraph graph({0, -1, 1},
{
/* in, out, tmp */
{{0, 1}, {2}, {}}, // First op
{{2, 0}, {4}, {5}}, // Second op, with persistent
{{4, -1}, {3}, {}} // Third op, with optional
},
{3});
// Make #1 persistent so it goes into its own arena.
(*graph.tensors())[1].allocation_type = kTfLiteArenaRwPersistent;
// The only use case for kTfLiteArenaRwPersistent is variable tensor now.
graph.SetVariables({1});
SetGraph(&graph);
Execute(0, 10);
// Make sure #0 and #1 were given different memory locations (because they
// will both have offset=0, in different arenas.)
EXPECT_NE((*graph.tensors())[0].data.raw, (*graph.tensors())[1].data.raw);
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(3), 0);
}
kTfLiteArenaRwPersistent 保存variable 类型tensor的地方
TEST_F(ArenaPlannerTest, SimpleGraphWithPersistentTensor) {
TestGraph graph({0, -1, 1},
{
/* in, out, tmp */
{{0, 1}, {2}, {}}, // First op
{{2, 0}, {4}, {5}}, // Second op, with persistent
{{4, -1}, {3}, {}} // Third op, with optional
},
{3});
// Make #1 persistent so it goes into its own arena.
(*graph.tensors())[1].allocation_type = kTfLiteArenaRwPersistent;
// The only use case for kTfLiteArenaRwPersistent is variable tensor now.
graph.SetVariables({1});
SetGraph(&graph);
Execute(0, 10);
// Make sure #0 and #1 were given different memory locations (because they
// will both have offset=0, in different arenas.)
EXPECT_NE((*graph.tensors())[0].data.raw, (*graph.tensors())[1].data.raw);
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(3), 0);
}