dpdk内存管理分析
文章目录
1.1 简述
dpdk使用了hugepage和numa感知进行构建内存管理。采用大页内存的好处一方面可以将内存空间固定住(使其不被swap),另一方面也能有效提升TLB缓存的命中效率。
内存管理初始化流程简化如下:(其余初始化功能请参考其余文章)
int
rte_eal_init(int argc, char **argv)
{
// 初始化rte_config 将rte_config->mem_config进行mmap
if (rte_config_init() < 0) {
rte_eal_init_alert("Cannot init config");
return -1;
}
if (internal_conf->no_hugetlbfs == 0) {
/* rte_config isn't initialized yet */
ret = internal_conf->process_type == RTE_PROC_PRIMARY ?
// 收集系统可用大页内存 保存在internal_config->hupage_info[3]
eal_hugepage_info_init() :
eal_hugepage_info_read();
if (ret < 0) {
rte_eal_init_alert("Cannot get hugepage information.");
rte_errno = EACCES;
__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
return -1;
}
}
// memzone初始化
if (rte_eal_memzone_init() < 0) {
rte_eal_init_alert("Cannot init memzone");
rte_errno = ENODEV;
return -1;
}
// 大页内存映射,并且保存在rte_config->mem_config->memseg
if (rte_eal_memory_init() < 0) {
rte_eal_init_alert("Cannot init memory");
rte_errno = ENOMEM;
return -1;
}
// 将rte_config->mem_config->memseg插入rte_config->mem_config->malloc_heaps[]
if (rte_eal_malloc_heap_init() < 0) {
rte_eal_init_alert("Cannot init malloc heap");
rte_errno = ENODEV;
return -1;
}
}
1.2 rte_config_init
分析
主进程: 映射/var/run/config
,保存config->mem_config内容。然后保存config->mem_config->mem_cfg_addr以便从进程能够获取同样的虚拟地址。
辅助进程:第一次获取主进程的mem_cfg_addr,第二次使用相同地址访问config->mem_config
static int rte_config_init(void)
{
struct rte_config *config = rte_eal_get_configuration();
const struct internal_config *internal_conf =
eal_get_internal_configuration();
// 在eal_parse_args检测函数
config->process_type = internal_conf->process_type;
switch (config->process_type) {
case RTE_PROC_PRIMARY: // PRIMARY进程
if (rte_eal_config_create() < 0) // 主进程create文件 以及mmap 以及锁文件 以及保存一些变量
return -1;
eal_mcfg_update_from_internal(); // 从internal_conf里面取一些变量
// 给ret_config->mem_config
break;
case RTE_PROC_SECONDARY: // SECONDARY进程
if (rte_eal_config_attach() < 0) // attach 主进程的值
return -1;
eal_mcfg_wait_complete(); // 等待ret_config->mem_config->magic变RTE_MAGIC
if (eal_mcfg_check_version() < 0) { // 版本检测
RTE_LOG(ERR, EAL, "Primary and secondary process DPDK version mismatch\n");
return -1;
}
if (rte_eal_config_reattach() < 0) // 重新attach
return -1;
if (!__rte_mp_enable()) { // mp 开启
RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
return -1;
}
eal_mcfg_update_internal(); // 从ret_config->mem_config里面取一些变量给internal_conf
break;
case RTE_PROC_AUTO:
case RTE_PROC_INVALID:
RTE_LOG(ERR, EAL, "Invalid process type %d\n",
config->process_type);
return -1;
}
return 0;
}
/* create memory configuration in shared/mmap memory. Take out
* a write lock on the memsegs, so we can auto-detect primary/secondary.
* This means we never close the file while running (auto-close on exit).
* We also don't lock the whole file, so that in future we can use read-locks
* on other parts, e.g. memzones, to detect if there are running secondary
* processes. */
static int
rte_eal_config_create(void)
{
struct rte_config *config = rte_eal_get_configuration();
size_t page_sz = sysconf(_SC_PAGE_SIZE);
size_t cfg_len = sizeof(*config->mem_config);
size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz);
void *rte_mem_cfg_addr, *mapped_mem_cfg_addr;
int retval;
const struct internal_config *internal_conf =
eal_get_internal_configuration();
// /var/run/config路径
const char *pathname = eal_runtime_config_path();
if (internal_conf->no_shconf)
return 0;
/* map the config before hugepage address so that we don't waste a page */
// 在大页面地址之前映射配置,这样我们就不会浪费页面
if (internal_conf->base_virtaddr != 0)
rte_mem_cfg_addr = (void *)
RTE_ALIGN_FLOOR(internal_conf->base_virtaddr -
sizeof(struct rte_mem_config), page_sz);
else
rte_mem_cfg_addr = NULL;
// 创建/var/run/config
if (mem_cfg_fd < 0){
mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600);
if (mem_cfg_fd < 0) {
RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n",
pathname);
return -1;
}
}
// 将文件大小改为sizeof(*config->mem_config)
retval = ftruncate(mem_cfg_fd, cfg_len);
if (retval < 0){
close(mem_cfg_fd);
mem_cfg_fd = -1;
RTE_LOG(ERR, EAL, "Cannot resize '%s' for rte_mem_config\n",
pathname);
return -1;
}
// 加锁
retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
if (retval < 0){
close(mem_cfg_fd);
mem_cfg_fd = -1;
RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another primary "
"process running?\n", pathname);
return -1;
}
/* reserve space for config */
// 为config预留空间
rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr,
&cfg_len_aligned, page_sz, 0, 0);
if (rte_mem_cfg_addr == NULL) {
RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n");
close(mem_cfg_fd);
mem_cfg_fd = -1;
return -1;
}
/* remap the actual file into the space we've just reserved */
// mmap config文件
mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr,
cfg_len_aligned, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0);
if (mapped_mem_cfg_addr == MAP_FAILED) {
munmap(rte_mem_cfg_addr, cfg_len);
close(mem_cfg_fd);
mem_cfg_fd = -1;
RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n");
return -1;
}
// 将config->mem_config保存到映射文件rte_mem_cfg_addr
memcpy(rte_mem_cfg_addr, config->mem_config, sizeof(struct rte_mem_config));
// 将映射的文件rte_mem_cfg_addr保存到config->mem_config
config->mem_config = rte_mem_cfg_addr;
/* store address of the config in the config itself so that secondary
* processes could later map the config into this exact location
*/
// 保存变量以便从进程能够获取
config->mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
config->mem_config->dma_maskbits = 0;
return 0;
}
1.3 eal_hugepage_info_init
的分析
收集所有/sys/kernel/mm/hugepages
大页
/*
* when we initialize the hugepage info, everything goes
* to socket 0 by default. it will later get sorted by memory
* initialization procedure.
*/
int
eal_hugepage_info_init(void)
{
struct hugepage_info *hpi, *tmp_hpi;
unsigned int i;
struct internal_config *internal_conf =
eal_get_internal_configuration();
// 收集所有的大页
if (hugepage_info_init() < 0)
return -1;
/* for no shared files mode, we're done */
if (internal_conf->no_shconf)
return 0;
hpi = &internal_conf->hugepage_info[0];
// 进行mmap映射
tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
sizeof(internal_conf->hugepage_info));
if (tmp_hpi == NULL) {
RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
return -1;
}
// 拷贝hpi倒共享内存
memcpy(tmp_hpi, hpi, sizeof(internal_conf->hugepage_info));
/* we've copied file descriptors along with everything else, but they
* will be invalid in secondary process, so overwrite them
*/
for (i = 0; i < RTE_DIM(internal_conf->hugepage_info); i++) {
struct hugepage_info *tmp = &tmp_hpi[i];
tmp->lock_descriptor = -1;
}
if (munmap(tmp_hpi, sizeof(internal_conf->hugepage_info)) < 0) {
RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
return -1;
}
return 0;
}
static int hugepage_info_init(void)
{ const char dirent_start_text[] = "hugepages-";
const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
unsigned int i, num_sizes = 0;
uint64_t reusable_bytes;
unsigned int reusable_pages;
DIR *dir;
struct dirent *dirent;
struct internal_config *internal_conf =
eal_get_internal_configuration();
// 打开文件夹/sys/kernel/mm/hugepages
dir = opendir(sys_dir_path);
if (dir == NULL) {
RTE_LOG(ERR, EAL,
"Cannot open directory %s to read system hugepage info\n",
sys_dir_path);
return -1;
}
// 遍历文件夹
for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) {
struct hugepage_info *hpi;
// 如果不以hugepages-开头 则跳过该目录
if (strncmp(dirent->d_name, dirent_start_text,
dirent_start_len) != 0)
continue;
// 超过3页 跳过
if (num_sizes >= MAX_HUGEPAGE_SIZES)
break;
hpi = &internal_conf->hugepage_info[num_sizes];
// 获取大小 赋值给internal_conf->hugepage_info[0-2]
hpi->hugepage_sz =
rte_str_to_size(&dirent->d_name[dirent_start_len]);
/* first, check if we have a mountpoint */
// 检查是否有mountpoint点 在/proc/mounts文件夹里面 没有则继续 跳过本次循环
if (get_hugepage_dir(hpi->hugepage_sz,
hpi->hugedir, sizeof(hpi->hugedir)) < 0) {
uint32_t num_pages;
// 如果没有mount 则打印日志
num_pages = get_num_hugepages(dirent->d_name,
hpi->hugepage_sz, 0);
if (num_pages > 0)
RTE_LOG(NOTICE, EAL,
"%" PRIu32 " hugepages of size "
"%" PRIu64 " reserved, but no mounted "
"hugetlbfs found for that size\n",
num_pages, hpi->hugepage_sz);
/* if we have kernel support for reserving hugepages
* through mmap, and we're in in-memory mode, treat this
* page size as valid. we cannot be in legacy mode at
* this point because we've checked this earlier in the
* init process.
*/
#ifdef MAP_HUGE_SHIFT
if (internal_conf->in_memory) {
RTE_LOG(DEBUG, EAL, "In-memory mode enabled, "
"hugepages of size %" PRIu64 " bytes "
"will be allocated anonymously\n",
hpi->hugepage_sz);
calc_num_pages(hpi, dirent, 0);
num_sizes++;
}
#endif
continue;
}
/* try to obtain a writelock */
hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY);
/* if blocking lock failed */
if (flock(hpi->lock_descriptor, LOCK_EX) == -1) {
RTE_LOG(CRIT, EAL,
"Failed to lock hugepage directory!\n");
break;
}
/*
* Check for existing hugepage files and either remove them
* or count how many of them can be reused.
*/
// 检查现有的大页面文件并删除它们或计算其中有大可以重复使用。
// 获取有多少页可用
reusable_pages = 0;
if (!internal_conf->hugepage_file.unlink_existing) {
reusable_bytes = 0;
if (inspect_hugedir(hpi->hugedir,
&reusable_bytes) < 0)
break;
RTE_ASSERT(reusable_bytes % hpi->hugepage_sz == 0);
reusable_pages = reusable_bytes / hpi->hugepage_sz;
} else if (clear_hugedir(hpi->hugedir) < 0) {
break;
}
// 该函数里面有尝试将page和socket进行绑定 如果绑定失败
// 则将所有大小放入hpi->num_pages[0]位置
calc_num_pages(hpi, dirent, reusable_pages);
num_sizes++;
}
closedir(dir);
/* something went wrong, and we broke from the for loop above */
if (dirent != NULL)
return -1;
internal_conf->num_hugepage_sizes = num_sizes;
/* sort the page directory entries by size, largest to smallest */
// 从大到小排序
qsort(&internal_conf->hugepage_info[0], num_sizes,
sizeof(internal_conf->hugepage_info[0]), compare_hpi);
/* now we have all info, check we have at least one valid size */
// 如果有可用 则返回0
for (i = 0; i < num_sizes; i++) {
/* pages may no longer all be on socket 0, so check all */
unsigned int j, num_pages = 0;
struct hugepage_info *hpi = &internal_conf->hugepage_info[i];
for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
num_pages += hpi->num_pages[j];
if (num_pages > 0)
return 0;
}
/* no valid hugepage mounts available, return error */
return -1;
}
1.4 rte_eal_memzone_init
的分析
初始rte_config->mem_config, 以及对rte_config->mem_config->memzones申请内存
int rte_eal_memzone_init(void)
{
struct rte_mem_config *mcfg;
int ret = 0;
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
rte_rwlock_write_lock(&mcfg->mlock);
// 主进程rte_fbarray_init 从进程attach
if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
rte_fbarray_init(&mcfg->memzones, "memzone",
RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
ret = -1;
} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
rte_fbarray_attach(&mcfg->memzones)) {
RTE_LOG(ERR, EAL, "Cannot attach to memzone list\n");
ret = -1;
}
rte_rwlock_write_unlock(&mcfg->mlock);
return ret;
}
int rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
unsigned int elt_sz)
{
size_t page_sz, mmap_len;
char path[PATH_MAX];
struct used_mask *msk;
struct mem_area *ma = NULL;
void *data = NULL;
int fd = -1;
const struct internal_config *internal_conf =
eal_get_internal_configuration();
if (arr == NULL) {
rte_errno = EINVAL;
return -1;
}
// 参数检查
if (fully_validate(name, elt_sz, len))
return -1;
/* allocate mem area before doing anything */
// 便于链接到mem_area_tailq
ma = malloc(sizeof(*ma));
if (ma == NULL) {
rte_errno = ENOMEM;
return -1;
}
// 获取pagesize
page_sz = rte_mem_page_size();
if (page_sz == (size_t)-1) {
free(ma);
return -1;
}
/* calculate our memory limits */
// 计算内存大小
mmap_len = calc_data_size(page_sz, elt_sz, len);
// 会去mmap mmap_len大小 递减
data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0);
if (data == NULL) {
free(ma);
return -1;
}
rte_spinlock_lock(&mem_area_lock);
fd = -1;
if (internal_conf->no_shconf) {
/* remap virtual area as writable */
static const int flags = RTE_MAP_FORCE_ADDRESS |
RTE_MAP_PRIVATE | RTE_MAP_ANONYMOUS;
// 重新mmap mmap_len大小
void *new_data = rte_mem_map(data, mmap_len,
RTE_PROT_READ | RTE_PROT_WRITE, flags, fd, 0);
if (new_data == NULL) {
RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
__func__, rte_strerror(rte_errno));
goto fail;
}
} else {
eal_get_fbarray_path(path, sizeof(path), name);
/*
* Each fbarray is unique to process namespace, i.e. the
* filename depends on process prefix. Try to take out a lock
* and see if we succeed. If we don't, someone else is using it
* already.
*/
fd = eal_file_open(path, EAL_OPEN_CREATE | EAL_OPEN_READWRITE);
if (fd < 0) {
RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n",
__func__, path, rte_strerror(rte_errno));
goto fail;
} else if (eal_file_lock(
fd, EAL_FLOCK_EXCLUSIVE, EAL_FLOCK_RETURN)) {
RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n",
__func__, path, rte_strerror(rte_errno));
rte_errno = EBUSY;
goto fail;
}
/* take out a non-exclusive lock, so that other processes could
* still attach to it, but no other process could reinitialize
* it.
*/
if (eal_file_lock(fd, EAL_FLOCK_SHARED, EAL_FLOCK_RETURN))
goto fail;
if (resize_and_map(fd, path, data, mmap_len))
goto fail;
}
// 初始化ma 便于链接
ma->addr = data;
ma->len = mmap_len;
ma->fd = fd;
/* do not close fd - keep it until detach/destroy */
TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next);
/* initialize the data */
// 清空
memset(data, 0, mmap_len);
/* populate data structure */
// 将map的内存链接到rte_fbarray里
strlcpy(arr->name, name, sizeof(arr->name));
arr->data = data;
arr->len = len;
arr->elt_sz = elt_sz;
arr->count = 0;
msk = get_used_mask(data, elt_sz, len);
msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN));
rte_rwlock_init(&arr->rwlock);
rte_spinlock_unlock(&mem_area_lock);
return 0;
fail:
if (data)
rte_mem_unmap(data, mmap_len);
if (fd >= 0)
close(fd);
free(ma);
rte_spinlock_unlock(&mem_area_lock);
return -1;
}
1.5 rte_eal_memory_init
的分析
int
rte_eal_memory_init(void)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
const struct internal_config *internal_conf =
eal_get_internal_configuration();
int retval;
RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n");
if (!mcfg)
return -1;
/* lock mem hotplug here, to prevent races while we init */
rte_mcfg_mem_read_lock();
//
if (rte_eal_memseg_init() < 0)
goto fail;
if (eal_memalloc_init() < 0)
goto fail;
retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
rte_eal_hugepage_init() :
rte_eal_hugepage_attach();
if (retval < 0)
goto fail;
if (internal_conf->no_shconf == 0 && rte_eal_memdevice_init() < 0)
goto fail;
return 0;
fail:
rte_mcfg_mem_read_unlock();
return -1;
}
对于rte_eal_memseg_init
函数,分析64位主进程
int
eal_dynmem_memseg_lists_init(void)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
struct memtype {
uint64_t page_sz;
int socket_id;
} *memtypes = NULL;
int i, hpi_idx, msl_idx, ret = -1; /* fail unless told to succeed */
struct rte_memseg_list *msl;
uint64_t max_mem, max_mem_per_type;
unsigned int max_seglists_per_type;
unsigned int n_memtypes, cur_type;
struct internal_config *internal_conf =
eal_get_internal_configuration();
/* no-huge does not need this at all */
if (internal_conf->no_hugetlbfs)
return 0;
/*
* figuring out amount of memory we're going to have is a long and very
* involved process. the basic element we're operating with is a memory
* type, defined as a combination of NUMA node ID and page size (so that
* e.g. 2 sockets with 2 page sizes yield 4 memory types in total).
*
* deciding amount of memory going towards each memory type is a
* balancing act between maximum segments per type, maximum memory per
* type, and number of detected NUMA nodes. the goal is to make sure
* each memory type gets at least one memseg list.
*
* the total amount of memory is limited by RTE_MAX_MEM_MB value.
*
* the total amount of memory per type is limited by either
* RTE_MAX_MEM_MB_PER_TYPE, or by RTE_MAX_MEM_MB divided by the number
* of detected NUMA nodes. additionally, maximum number of segments per
* type is also limited by RTE_MAX_MEMSEG_PER_TYPE. this is because for
* smaller page sizes, it can take hundreds of thousands of segments to
* reach the above specified per-type memory limits.
*
* additionally, each type may have multiple memseg lists associated
* with it, each limited by either RTE_MAX_MEM_MB_PER_LIST for bigger
* page sizes, or RTE_MAX_MEMSEG_PER_LIST segments for smaller ones.
*
* the number of memseg lists per type is decided based on the above
* limits, and also taking number of detected NUMA nodes, to make sure
* that we don't run out of memseg lists before we populate all NUMA
* nodes with memory.
*
* we do this in three stages. first, we collect the number of types.
* then, we figure out memory constraints and populate the list of
* would-be memseg lists. then, we go ahead and allocate the memseg
* lists.
*/
/*
* 计算出我们将拥有的内存量是一个漫长而非常的过程
* 涉及的过程。我们操作的基本元素是内存
* 类型,定义为 NUMA 节点 ID 和页面大小的组合(这样
* 例如2 个具有 2 个页面大小的套接字总共产生 4 种内存类型)。
*
* 决定每种内存类型的内存量是
* 每种类型的最大段数、每个类型的最大内存之间的平衡行为
* 类型和检测到的 NUMA 节点数。目标是确保
* 每种内存类型至少有一个 memseg 列表。
*
* 内存总量受 RTE_MAX_MEM_MB 值限制。
*
* 每种类型的内存总量受任一限制
* RTE_MAX_MEM_MB_PER_TYPE,或由 RTE_MAX_MEM_MB 除以数量
* 检测到的 NUMA 节点。此外,每个最大段数
* 类型也受 RTE_MAX_MEMSEG_PER_TYPE 限制。这是因为对于
* 较小的页面大小,可能需要数十万个段
* 达到上述指定的每种类型的内存限制。
*
* 此外,每种类型可能有多个关联的 memseg 列表
* 有了它,每个都受 RTE_MAX_MEM_MB_PER_LIST 的限制以获得更大
* 页面大小,或较小的 RTE_MAX_MEMSEG_PER_LIST 段。
*
* 每种类型的 memseg 列表的数量基于上述决定
* 限制,并获取检测到的 NUMA 节点的数量,以确保
* 在我们填充所有 NUMA 之前,我们不会用完 memseg 列表
* 带内存的节点。
*
* 我们分三个阶段进行。首先,我们收集类型的数量。
* 然后,我们计算出内存限制并填充列表
* 可能的 memseg 列表。然后,我们继续分配 memseg
* 列表。
*/
/* create space for mem types */
// 进行组合
n_memtypes = internal_conf->num_hugepage_sizes * rte_socket_count();
memtypes = calloc(n_memtypes, sizeof(*memtypes));
if (memtypes == NULL) {
RTE_LOG(ERR, EAL, "Cannot allocate space for memory types\n");
return -1;
}
/* populate mem types */
// 进行组合
cur_type = 0;
for (hpi_idx = 0; hpi_idx < (int) internal_conf->num_hugepage_sizes;
hpi_idx++) {
struct hugepage_info *hpi;
uint64_t hugepage_sz;
hpi = &internal_conf->hugepage_info[hpi_idx];
hugepage_sz = hpi->hugepage_sz;
for (i = 0; i < (int) rte_socket_count(); i++, cur_type++) {
int socket_id = rte_socket_id_by_idx(i);
#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
/* we can still sort pages by socket in legacy mode */
if (!internal_conf->legacy_mem && socket_id > 0)
break;
#endif
memtypes[cur_type].page_sz = hugepage_sz;
memtypes[cur_type].socket_id = socket_id;
RTE_LOG(DEBUG, EAL, "Detected memory type: "
"socket_id:%u hugepage_sz:%" PRIu64 "\n",
socket_id, hugepage_sz);
}
}
/* number of memtypes could have been lower due to no NUMA support */
n_memtypes = cur_type;
/* set up limits for types */
max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
max_mem_per_type = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20,
max_mem / n_memtypes);
/*
* limit maximum number of segment lists per type to ensure there's
* space for memseg lists for all NUMA nodes with all page sizes
*/
max_seglists_per_type = RTE_MAX_MEMSEG_LISTS / n_memtypes;
if (max_seglists_per_type == 0) {
RTE_LOG(ERR, EAL, "Cannot accommodate all memory types, please increase %s\n",
RTE_STR(RTE_MAX_MEMSEG_LISTS));
goto out;
}
/* go through all mem types and create segment lists */
// 遍历所有memroy 类型 创建segment lists
msl_idx = 0;
for (cur_type = 0; cur_type < n_memtypes; cur_type++) {
unsigned int cur_seglist, n_seglists, n_segs;
unsigned int max_segs_per_type, max_segs_per_list;
struct memtype *type = &memtypes[cur_type];
uint64_t max_mem_per_list, pagesz;
int socket_id;
pagesz = type->page_sz;
socket_id = type->socket_id;
/*
* we need to create segment lists for this type. we must take
* into account the following things:
*
* 1. total amount of memory we can use for this memory type
* 2. total amount of memory per memseg list allowed
* 3. number of segments needed to fit the amount of memory
* 4. number of segments allowed per type
* 5. number of segments allowed per memseg list
* 6. number of memseg lists we are allowed to take up
*/
/*
* 我们需要为这种类型创建段列表。 我们必须采取
* 考虑以下几点:
*
* 1. 我们可以用于这种内存类型的内存总量
* 2. 每个 memseg 列表允许的内存总量
* 3. 适合内存量所需的段数
* 4. 每种类型允许的段数
* 5. 每个 memseg 列表允许的段数
* 6. 我们被允许占用的 memseg 列表的数量
*/
/* calculate how much segments we will need in total */
max_segs_per_type = max_mem_per_type / pagesz;
/* limit number of segments to maximum allowed per type */
max_segs_per_type = RTE_MIN(max_segs_per_type,
(unsigned int)RTE_MAX_MEMSEG_PER_TYPE);
/* limit number of segments to maximum allowed per list */
max_segs_per_list = RTE_MIN(max_segs_per_type,
(unsigned int)RTE_MAX_MEMSEG_PER_LIST);
/* calculate how much memory we can have per segment list */
max_mem_per_list = RTE_MIN(max_segs_per_list * pagesz,
(uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20);
/* calculate how many segments each segment list will have */
n_segs = RTE_MIN(max_segs_per_list, max_mem_per_list / pagesz);
/* calculate how many segment lists we can have */
n_seglists = RTE_MIN(max_segs_per_type / n_segs,
max_mem_per_type / max_mem_per_list);
/* limit number of segment lists according to our maximum */
n_seglists = RTE_MIN(n_seglists, max_seglists_per_type);
RTE_LOG(DEBUG, EAL, "Creating %i segment lists: "
"n_segs:%i socket_id:%i hugepage_sz:%" PRIu64 "\n",
n_seglists, n_segs, socket_id, pagesz);
/* create all segment lists */
for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) {
if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
RTE_LOG(ERR, EAL,
"No more space in memseg lists, please increase %s\n",
RTE_STR(RTE_MAX_MEMSEG_LISTS));
goto out;
}
msl = &mcfg->memsegs[msl_idx++];
if (eal_memseg_list_init(msl, pagesz, n_segs,
socket_id, cur_seglist, true))
goto out;
if (eal_memseg_list_alloc(msl, 0)) {
RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
goto out;
}
}
}
/* we're successful */
ret = 0;
out:
free(memtypes);
return ret;
}
其中eal_memseg_list_init会调用rte_fbarray_init函数
int
eal_memseg_list_init_named(struct rte_memseg_list *msl, const char *name,
uint64_t page_sz, int n_segs, int socket_id, bool heap)
{
// 这里会mmap n_segs*sizeof(struct rte_memseg)大小的内存
if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
sizeof(struct rte_memseg))) {
RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
rte_strerror(rte_errno));
return -1;
}
msl->page_sz = page_sz;
msl->socket_id = socket_id;
msl->base_va = NULL;
msl->heap = heap;
RTE_LOG(DEBUG, EAL,
"Memseg list allocated at socket %i, page size 0x%"PRIx64"kB\n",
socket_id, page_sz >> 10);
return 0;
}
而eal_memseg_list_alloc执行
int
eal_memseg_list_alloc(struct rte_memseg_list *msl, int reserve_flags)
{
size_t page_sz, mem_sz;
void *addr;
page_sz = msl->page_sz;
mem_sz = page_sz * msl->memseg_arr.len;
// 底层会重新mmap
addr = eal_get_virtual_area(
msl->base_va, &mem_sz, page_sz, 0, reserve_flags);
if (addr == NULL) {
#ifndef RTE_EXEC_ENV_WINDOWS
/* The hint would be misleading on Windows, because address
* is by default system-selected (base VA = 0).
* However, this function is called from many places,
* including common code, so don't duplicate the message.
*/
if (rte_errno == EADDRNOTAVAIL)
RTE_LOG(ERR, EAL, "Cannot reserve %llu bytes at [%p] - "
"please use '--" OPT_BASE_VIRTADDR "' option\n",
(unsigned long long)mem_sz, msl->base_va);
#endif
return -1;
}
msl->base_va = addr;
msl->len = mem_sz;
RTE_LOG(DEBUG, EAL, "VA reserved for memseg list at %p, size %zx\n",
addr, mem_sz);
return 0;
}
对于eal_memalloc_init
函数, 实际执行alloc_list, 其中fd_list是全局变量
static int
alloc_list(int list_idx, int len)
{
int *data;
int i;
const struct internal_config *internal_conf =
eal_get_internal_configuration();
/* single-file segments mode does not need fd list */
if (!internal_conf->single_file_segments) {
/* ensure we have space to store fd per each possible segment */
data = malloc(sizeof(int) * len);
if (data == NULL) {
RTE_LOG(ERR, EAL, "Unable to allocate space for file descriptors\n");
return -1;
}
/* set all fd's as invalid */
for (i = 0; i < len; i++)
data[i] = -1;
fd_list[list_idx].fds = data;
fd_list[list_idx].len = len;
} else {
fd_list[list_idx].fds = NULL;
fd_list[list_idx].len = 0;
}
fd_list[list_idx].count = 0;
fd_list[list_idx].memseg_list_fd = -1;
return 0;
}
对于rte_eal_hugepage_init
函数,该函数是实际映射大页
首先分析legacy模式
static int
eal_legacy_hugepage_init(void)
{
struct rte_mem_config *mcfg;
struct hugepage_file *hugepage = NULL, *tmp_hp = NULL;
struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
struct internal_config *internal_conf =
eal_get_internal_configuration();
uint64_t memory[RTE_MAX_NUMA_NODES];
unsigned hp_offset;
int i, j;
int nr_hugefiles, nr_hugepages = 0;
void *addr;
memset(used_hp, 0, sizeof(used_hp));
/* get pointer to global configuration */
// 获取全局的mem_config 该区域是被共享的
mcfg = rte_eal_get_configuration()->mem_config;
/* hugetlbfs can be disabled */
if (internal_conf->no_hugetlbfs) {
void *prealloc_addr;
size_t mem_sz;
struct rte_memseg_list *msl;
int n_segs, fd, flags;
#ifdef MEMFD_SUPPORTED
int memfd;
#endif
uint64_t page_sz;
/* nohuge mode is legacy mode */
internal_conf->legacy_mem = 1;
/* nohuge mode is single-file segments mode */
internal_conf->single_file_segments = 1;
/* create a memseg list */
msl = &mcfg->memsegs[0];
mem_sz = internal_conf->memory;
page_sz = RTE_PGSIZE_4K;
n_segs = mem_sz / page_sz;
if (eal_memseg_list_init_named(
msl, "nohugemem", page_sz, n_segs, 0, true)) {
return -1;
}
/* set up parameters for anonymous mmap */
fd = -1;
flags = MAP_PRIVATE | MAP_ANONYMOUS;
#ifdef MEMFD_SUPPORTED
/* create a memfd and store it in the segment fd table */
memfd = memfd_create("nohuge", 0);
if (memfd < 0) {
RTE_LOG(DEBUG, EAL, "Cannot create memfd: %s\n",
strerror(errno));
RTE_LOG(DEBUG, EAL, "Falling back to anonymous map\n");
} else {
/* we got an fd - now resize it */
if (ftruncate(memfd, internal_conf->memory) < 0) {
RTE_LOG(ERR, EAL, "Cannot resize memfd: %s\n",
strerror(errno));
RTE_LOG(ERR, EAL, "Falling back to anonymous map\n");
close(memfd);
} else {
/* creating memfd-backed file was successful.
* we want changes to memfd to be visible to
* other processes (such as vhost backend), so
* map it as shared memory.
*/
RTE_LOG(DEBUG, EAL, "Using memfd for anonymous memory\n");
fd = memfd;
flags = MAP_SHARED;
}
}
#endif
/* preallocate address space for the memory, so that it can be
* fit into the DMA mask.
*/
if (eal_memseg_list_alloc(msl, 0)) {
RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
return -1;
}
prealloc_addr = msl->base_va;
addr = mmap(prealloc_addr, mem_sz, PROT_READ | PROT_WRITE,
flags | MAP_FIXED, fd, 0);
if (addr == MAP_FAILED || addr != prealloc_addr) {
RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
strerror(errno));
munmap(prealloc_addr, mem_sz);
return -1;
}
/* we're in single-file segments mode, so only the segment list
* fd needs to be set up.
*/
if (fd != -1) {
if (eal_memalloc_set_seg_list_fd(0, fd) < 0) {
RTE_LOG(ERR, EAL, "Cannot set up segment list fd\n");
/* not a serious error, proceed */
}
}
eal_memseg_list_populate(msl, addr, n_segs);
if (mcfg->dma_maskbits &&
rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
RTE_LOG(ERR, EAL,
"%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask.\n",
__func__);
if (rte_eal_iova_mode() == RTE_IOVA_VA &&
rte_eal_using_phys_addrs())
RTE_LOG(ERR, EAL,
"%s(): Please try initializing EAL with --iova-mode=pa parameter.\n",
__func__);
goto fail;
}
return 0;
}
/* calculate total number of hugepages available. at this point we haven't
* yet started sorting them so they all are on socket 0 */
/* 计算可用的大页面总数。 之前calculate里面有放入num_pages[0]*/
for (i = 0; i < (int) internal_conf->num_hugepage_sizes; i++) {
/* meanwhile, also initialize used_hp hugepage sizes in used_hp */
used_hp[i].hugepage_sz = internal_conf->hugepage_info[i].hugepage_sz;
nr_hugepages += internal_conf->hugepage_info[i].num_pages[0];
}
/*
* allocate a memory area for hugepage table.
* this isn't shared memory yet. due to the fact that we need some
* processing done on these pages, shared memory will be created
* at a later stage.
*/
/*
* 为大页表分配内存区域。经过一些处理后,会将值拷贝到共享内存
*/
tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file));
if (tmp_hp == NULL)
goto fail;
memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file));
hp_offset = 0; /* where we start the current page size entries */
huge_register_sigbus();
/* make a copy of socket_mem, needed for balanced allocation. */
/* 拷贝副本,用于平衡分配。 */
for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
memory[i] = internal_conf->socket_mem[i];
/* map all hugepages and sort them */
/* 映射所有大页面并对其进行排序 */
for (i = 0; i < (int)internal_conf->num_hugepage_sizes; i++) {
unsigned pages_old, pages_new;
struct hugepage_info *hpi;
/*
* we don't yet mark hugepages as used at this stage, so
* we just map all hugepages available to the system
* all hugepages are still located on socket 0
*/
/*
* 我们还没有在这个阶段将大页面标记为使用,所以
* 我们只映射系统可用的所有大页面
* 所有大页面仍然位于套接字 0
*/
hpi = &internal_conf->hugepage_info[i];
if (hpi->num_pages[0] == 0)
continue;
/* map all hugepages available */
pages_old = hpi->num_pages[0];
// 这里去map了所有大页
pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, memory);
if (pages_new < pages_old) {
RTE_LOG(DEBUG, EAL,
"%d not %d hugepages of size %u MB allocated\n",
pages_new, pages_old,
(unsigned)(hpi->hugepage_sz / 0x100000));
int pages = pages_old - pages_new;
nr_hugepages -= pages;
hpi->num_pages[0] = pages_new;
if (pages_new == 0)
continue;
}
// 设置物理地址
if (rte_eal_using_phys_addrs() &&
rte_eal_iova_mode() != RTE_IOVA_VA) {
/* find physical addresses for each hugepage */
if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
"for %u MB pages\n",
(unsigned int)(hpi->hugepage_sz / 0x100000));
goto fail;
}
} else {
/* set physical addresses for each hugepage */
if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
RTE_LOG(DEBUG, EAL, "Failed to set phys addr "
"for %u MB pages\n",
(unsigned int)(hpi->hugepage_sz / 0x100000));
goto fail;
}
}
// 解析maps文件 获取物理地址 进行比对 然后设置socket id
if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n",
(unsigned)(hpi->hugepage_sz / 0x100000));
goto fail;
}
qsort(&tmp_hp[hp_offset], hpi->num_pages[0],
sizeof(struct hugepage_file), cmp_physaddr);
/* we have processed a num of hugepages of this size, so inc offset */
// 进行了一系列的map 所以进行偏移
hp_offset += hpi->num_pages[0];
}
huge_recover_sigbus();
// 获取hugepage memory size
if (internal_conf->memory == 0 && internal_conf->force_sockets == 0)
internal_conf->memory = eal_get_hugepage_mem_size();
// 记录页
nr_hugefiles = nr_hugepages;
/* clean out the numbers of pages */
// 情空
for (i = 0; i < (int) internal_conf->num_hugepage_sizes; i++)
for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
internal_conf->hugepage_info[i].num_pages[j] = 0;
/* get hugepages for each socket */
for (i = 0; i < nr_hugefiles; i++) {
int socket = tmp_hp[i].socket_id;
/* find a hugepage info with right size and increment num_pages */
/* 找到一个大小合适的大页面信息并增加 num_pages */
const int nb_hpsizes = RTE_MIN(MAX_HUGEPAGE_SIZES,
(int)internal_conf->num_hugepage_sizes);
for (j = 0; j < nb_hpsizes; j++) {
if (tmp_hp[i].size ==
internal_conf->hugepage_info[j].hugepage_sz) {
internal_conf->hugepage_info[j].num_pages[socket]++;
}
}
}
/* make a copy of socket_mem, needed for number of pages calculation */
for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
memory[i] = internal_conf->socket_mem[i];
/* calculate final number of pages */
nr_hugepages = eal_dynmem_calc_num_pages_per_socket(memory,
internal_conf->hugepage_info, used_hp,
internal_conf->num_hugepage_sizes);
/* error if not enough memory available */
if (nr_hugepages < 0)
goto fail;
/* reporting in! */
// 打个日志 美滋滋
for (i = 0; i < (int) internal_conf->num_hugepage_sizes; i++) {
for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
if (used_hp[i].num_pages[j] > 0) {
RTE_LOG(DEBUG, EAL,
"Requesting %u pages of size %uMB"
" from socket %i\n",
used_hp[i].num_pages[j],
(unsigned)
(used_hp[i].hugepage_sz / 0x100000),
j);
}
}
}
/* create shared memory */
// 创建共享内存
hugepage = create_shared_memory(eal_hugepage_data_path(),
nr_hugefiles * sizeof(struct hugepage_file));
if (hugepage == NULL) {
RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
goto fail;
}
memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file));
/*
* unmap pages that we won't need (looks at used_hp).
* also, sets final_va to NULL on pages that were unmapped.
*/
/*
* 取消映射我们不需要的页面(查看 used_hp)。
* 此外,在未映射的页面上将 final_va 设置为 NULL。
*/
if (unmap_unneeded_hugepages(tmp_hp, used_hp,
internal_conf->num_hugepage_sizes) < 0) {
RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n");
goto fail;
}
/*
* copy stuff from malloc'd hugepage* to the actual shared memory.
* this procedure only copies those hugepages that have orig_va
* not NULL. has overflow protection.
*/
// 拷贝到共享内存
if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles,
tmp_hp, nr_hugefiles) < 0) {
RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n");
goto fail;
}
#ifndef RTE_ARCH_64
/* for legacy 32-bit mode, we did not preallocate VA space, so do it */
if (internal_conf->legacy_mem &&
prealloc_segments(hugepage, nr_hugefiles)) {
RTE_LOG(ERR, EAL, "Could not preallocate VA space for hugepages\n");
goto fail;
}
#endif
/* remap all pages we do need into memseg list VA space, so that those
* pages become first-class citizens in DPDK memory subsystem
*/
// 重新remap需要的 memseg list
if (remap_needed_hugepages(hugepage, nr_hugefiles)) {
RTE_LOG(ERR, EAL, "Couldn't remap hugepage files into memseg lists\n");
goto fail;
}
/* free the hugepage backing files */
if (internal_conf->hugepage_file.unlink_before_mapping &&
unlink_hugepage_files(tmp_hp, internal_conf->num_hugepage_sizes) < 0) {
RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n");
goto fail;
}
/* free the temporary hugepage table */
free(tmp_hp);
tmp_hp = NULL;
munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
hugepage = NULL;
/* we're not going to allocate more pages, so release VA space for
* unused memseg lists
*/
// 释放无用的rte_memseg_list
for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
struct rte_memseg_list *msl = &mcfg->memsegs[i];
size_t mem_sz;
/* skip inactive lists */
if (msl->base_va == NULL)
continue;
/* skip lists where there is at least one page allocated */
if (msl->memseg_arr.count > 0)
continue;
/* this is an unused list, deallocate it */
mem_sz = msl->len;
munmap(msl->base_va, mem_sz);
msl->base_va = NULL;
msl->heap = 0;
/* destroy backing fbarray */
rte_fbarray_destroy(&msl->memseg_arr);
}
if (mcfg->dma_maskbits &&
rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
RTE_LOG(ERR, EAL,
"%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask.\n",
__func__);
goto fail;
}
return 0;
fail:
huge_recover_sigbus();
free(tmp_hp);
if (hugepage != NULL)
munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
return -1;
}
1.6 rte_eal_malloc_heap_init的分析
int rte_eal_malloc_heap_init(void)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
unsigned int i;
const struct internal_config *internal_conf =
eal_get_internal_configuration();
if (internal_conf->match_allocations)
RTE_LOG(DEBUG, EAL, "Hugepages will be freed exactly as allocated.\n");
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
/* assign min socket ID to external heaps */
mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID;
/* assign names to default DPDK heaps */
// 拷贝名字
for (i = 0; i < rte_socket_count(); i++) {
struct malloc_heap *heap = &mcfg->malloc_heaps[i];
char heap_name[RTE_HEAP_NAME_MAX_LEN];
int socket_id = rte_socket_id_by_idx(i);
snprintf(heap_name, sizeof(heap_name),
"socket_%i", socket_id);
strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
heap->socket_id = socket_id;
}
}
// 注册对应的mp函数指针处理函数
if (register_mp_requests()) {
RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess actions\n");
rte_mcfg_mem_read_unlock();
return -1;
}
/* unlock mem hotplug here. it's safe for primary as no requests can
* even come before primary itself is fully initialized, and secondaries
* do not need to initialize the heap.
*/
rte_mcfg_mem_read_unlock();
/* secondary process does not need to initialize anything */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
/* add all IOVA-contiguous areas to the heap */
// 将所有 IOVA 连续区域添加到堆中
// 将memseg放入 malloc_heap,首尾各分配一个 malloc_elem,后者指向前者,将前者插入free_head
/* 该函数会执行malloc_add_seg
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
int i, ms_idx, ret = 0;
for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
struct rte_memseg_list *msl = &mcfg->memsegs[i];
const struct rte_memseg *ms;
struct rte_fbarray *arr;
if (msl->memseg_arr.count == 0)
continue;
arr = &msl->memseg_arr;
ms_idx = rte_fbarray_find_next_used(arr, 0);
while (ms_idx >= 0) {
int n_segs;
size_t len;
ms = rte_fbarray_get(arr, ms_idx);
/* find how many more segments there are, starting with
* this one.
*/
n_segs = rte_fbarray_find_contig_used(arr, ms_idx);
len = n_segs * msl->page_sz;
// 这里操作的是rte_config->mem_config->memsegs[i], rte_memseg ms = rte_fbarray_get(arr, ms_idx);
ret = func(msl, ms, len, arg);
if (ret)
return ret;
ms_idx = rte_fbarray_find_next_used(arr,
ms_idx + n_segs);
}
}
*/
return rte_memseg_contig_walk(malloc_add_seg, NULL);
}
/*
* Expand the heap with a memory area.
*/
static struct malloc_elem * malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl,
void *start, size_t len, bool dirty)
{
struct malloc_elem *elem = start;
// elem进行初始化 加上header tailer 要注意msl 之后进行分配也是这个msl
malloc_elem_init(elem, heap, msl, len, elem, len, dirty);
// 插入
malloc_elem_insert(elem);
// 进行连接
elem = malloc_elem_join_adjacent_free(elem);
// 插入free list
malloc_elem_free_list_insert(elem);
return elem;
}
static int malloc_add_seg(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, size_t len, void *arg __rte_unused)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
struct rte_memseg_list *found_msl;
struct malloc_heap *heap;
int msl_idx, heap_idx;
if (msl->external)
return 0;
// 从mcfg->malloc_heaps找一个msl->socket_id相等的heap idx
heap_idx = malloc_socket_to_heap_id(msl->socket_id);
if (heap_idx < 0) {
RTE_LOG(ERR, EAL, "Memseg list has invalid socket id\n");
return -1;
}
heap = &mcfg->malloc_heaps[heap_idx];
/* msl is const, so find it */
msl_idx = msl - mcfg->memsegs;
if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
return -1;
// 注意msl
found_msl = &mcfg->memsegs[msl_idx];
// 将memseg放入 malloc_heap
malloc_heap_add_memory(heap, found_msl, ms->addr, len,
ms->flags & RTE_MEMSEG_FLAG_DIRTY);
heap->total_size += len;
RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20,
msl->socket_id);
return 0;
}