DPDK所有的宏定义基本都在rte_config.h(需要编译DPDK之后才会在目录中生成)中,所以第一个头文件应该为#include <rte_config.h>
DPDK首先初始化了Environment Abstraction Layer(EAL),EAL主要提供了以下功能:
• Intel® DPDK loading and launching
• Support for multi-process and multi-thread execution types
• Core affinity/assignment procedures
• System memory allocation/de-allocation
• Atomic/lock operations
• Time reference
• PCI bus access
• Trace and debug functions
• CPU feature identification
• Interrupt handling
• Alarm operations
主要的初始化动作:
配置初始化
内存初始化
内存池初始化
队列初始化
告警初始化
中断初始化
PCI 初始化
定时器初始化
检测内存本地化(NUMA)
插件初始化
主线程初始化
轮询设备初始化
建立主从线程通道
将从线程设置在等待模式
PCI 设备的探测与初始化
EAL初始化参数:
- -c COREMASK:要使用CPU core16进制掩码。注意core编号在不同的平台不一样,需要事先确定好。
- -n NUM:每个处理器socket的内存通道数
- -b domain:bus:devid.func:网口黑名单,EAL不能使用的PCI设备(可以同时存在多个-b选项)
- –socket-mem:在指定socket上分配大页内存
- -m MB:指定分配大大页内存数,不限处理器的socket。加以使用—socket-mem代替这个参数
- -r NUM:内存的rank数
- -v:显示程序版本号
- –huge-dir:大页内存的挂载点
- –file-prefix:大页内存文件的前缀
- –proc-type:进程类型(primary,secondary,auto)
- –xen-dom0:支持程序在Xen Domain0中非大页内存下运行
- –vmware-tsc-map:使用VMware TSC代替本地的RDTSC
- –base-virtaddr :指定虚拟地址的基址
- –vfio-intr:指定VFIO使用的中断类型(如果不是用VFIO则无效)
-c是必须的,其它都是可选的。
初始化程序
主要是在eal.c文件的rte_eal_init(int argc, char **argv)
int rte_eal_init(int argc, char **argv)
{
int i, fctret, ret;
pthread_t thread_id;
static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
const char *logid;
char cpuset[RTE_CPU_AFFINITY_STR_LEN];
char thread_name[RTE_MAX_THREAD_NAME_LEN];
/* 只允许运行一次 */
if (!rte_atomic32_test_and_set(&run_once))
return -1;
logid = strrchr(argv[0], '/');
logid = strdup(logid ? logid + 1: argv[0]);
thread_id = pthread_self();
if (rte_eal_log_early_init() < 0)
rte_panic("Cannot init early logs\n");
eal_log_level_parse(argc, argv);
/* set log level as early as possible */
rte_set_log_level(internal_config.log_level);
/* 获取系统中的CPU数量 */
if (rte_eal_cpu_init() < 0)
rte_panic("Cannot detect lcores\n");
/* 根据命令行参数初始化internal_config */
fctret = eal_parse_args(argc, argv);
if (fctret < 0)
exit(1);
/* 获取系统中hugepage种类以及数量信息到internal_config.hugepage_info,用于后续内存初始化 */
if (internal_config.no_hugetlbfs == 0 &&
internal_config.process_type != RTE_PROC_SECONDARY &&
internal_config.xen_dom0_support == 0 &&
eal_hugepage_info_init() < 0)
rte_panic("Cannot get hugepage information\n");
/* 获取系统中所有hugepage内存大小,计算方法hugepage_sz*num_pages */
if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
if (internal_config.no_hugetlbfs)
internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
}
if (internal_config.vmware_tsc_map == 1) {
#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
rte_cycles_vmware_tsc_map = 1;
RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
"you must have monitor_control.pseudo_perfctr = TRUE\n");
#else
RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
"RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
#endif
}
rte_srand(rte_rdtsc());
/* 在/var/run或者用户的home目录创建.rte_config文件用于存储内存配置信息(rte_mem_config结构)
如果process type为RTE_PROC_SECONDARY则等待PRIMARY完成内存初始化 */
rte_config_init();
/* 扫描系统中所有的PCI设备,并创建对应的device结构链到device_list中 */
if (rte_eal_pci_init() < 0)
rte_panic("Cannot init PCI\n");
#ifdef VFIO_PRESENT
if (rte_eal_vfio_setup() < 0)
rte_panic("Cannot init VFIO\n");
#endif
#ifdef RTE_LIBRTE_IVSHMEM
if (rte_eal_ivshmem_init() < 0)
rte_panic("Cannot init IVSHMEM\n");
#endif
/* 初始化rte_config->mem_config,并映射hugepage到挂载目录下的文件rte_mapxx */
if (rte_eal_memory_init() < 0)
rte_panic("Cannot init memory\n");
/* the directories are locked during eal_hugepage_info_init */
eal_hugedirs_unlock();
/* memzone可用内存初始化 */
if (rte_eal_memzone_init() < 0)
rte_panic("Cannot init memzone\n");
if (rte_eal_tailqs_init() < 0)
rte_panic("Cannot init tail queues for objects\n");
#ifdef RTE_LIBRTE_IVSHMEM
if (rte_eal_ivshmem_obj_init() < 0)
rte_panic("Cannot init IVSHMEM objects\n");
#endif
if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
rte_panic("Cannot init logs\n");
if (rte_eal_alarm_init() < 0)
rte_panic("Cannot init interrupt-handling thread\n");
/* 定时器 */
if (rte_eal_timer_init() < 0)
rte_panic("Cannot init HPET or TSC timers\n");
/* 检查master core所在socket是否有内存 */
eal_check_mem_on_local_socket();
if (eal_plugins_init() < 0)
rte_panic("Cannot init plugins\n");
/* master线程绑定CPU */
eal_thread_init_master(rte_config.master_lcore);
ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
rte_config.master_lcore, (int)thread_id, cpuset,
ret == 0 ? "" : "...");
if (rte_eal_dev_init() < 0)
rte_panic("Cannot init pmd devices\n");
/* 创建与收包驱动通信用管道并初始化中断处理线程 */
if (rte_eal_intr_init() < 0)
rte_panic("Cannot init interrupt-handling thread\n");
/* 创建lcore的主线程 */
RTE_LCORE_FOREACH_SLAVE(i) {
/*
* create communication pipes between master thread
* and children
*/
if (pipe(lcore_config[i].pipe_master2slave) < 0)
rte_panic("Cannot create pipe\n");
if (pipe(lcore_config[i].pipe_slave2master) < 0)
rte_panic("Cannot create pipe\n");
lcore_config[i].state = WAIT;
/* create a thread for each lcore */
ret = pthread_create(&lcore_config[i].thread_id, NULL,
eal_thread_loop, NULL);
if (ret != 0)
rte_panic("Cannot create thread\n");
/* Set thread_name for aid in debugging. */
snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
"lcore-slave-%d", i);
ret = rte_thread_setname(lcore_config[i].thread_id,
thread_name);
if (ret != 0)
RTE_LOG(DEBUG, EAL,
"Cannot set name for lcore thread\n");
}
/*
* Launch a dummy function on all slave lcores, so that master lcore
* knows they are all ready when this function returns.
*/
rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
rte_eal_mp_wait_lcore();
/* Probe & Initialize PCI devices */
if (rte_eal_pci_probe())
rte_panic("Cannot probe PCI\n");
rte_eal_mcfg_complete();
}