VPP源码(版本18.01)从三个维度进行分析,仅作为新手学习入门的几个点:
1. 进程启动流程加载plugin流程;
2. 命令行打开业务开关流程;
3. 报文收发包及转发流程。
一. 进程启动流程:
1. vpp -c /etc/vpp/startup.conf运行阶段,加载很多.so库。
vlib_plugin_early_init:356: plugin path /home/share/vpp/build-root/install-vpp_debug-native/vpp/lib64/vpp_plugins
load_one_plugin:184: Loaded plugin: acl_plugin.so (Access Control Lists)
load_one_plugin:184: Loaded plugin: dpdk_plugin.so (Data Plane Development Kit (DPDK))
load_one_plugin:184: Loaded plugin: flowprobe_plugin.so (Flow per Packet)
load_one_plugin:184: Loaded plugin: flowtable_plugin.so (sample of flowtable)
load_one_plugin:184: Loaded plugin: gtpu_plugin.so (GTPv1-U)
load_one_plugin:184: Loaded plugin: ila_plugin.so (Identifier-locator addressing for IPv6)
load_one_plugin:184: Loaded plugin: ioam_plugin.so (Inbound OAM)
load_one_plugin:114: Plugin disabled (default): ixge_plugin.so
load_one_plugin:184: Loaded plugin: kubeproxy_plugin.so (kube-proxy data plane)
load_one_plugin:184: Loaded plugin: l2e_plugin.so (L2 Emulation)
load_one_plugin:184: Loaded plugin: lb_plugin.so (Load Balancer)
load_one_plugin:184: Loaded plugin: libsixrd_plugin.so (IPv6 Rapid Deployment on IPv4 Infrastructure (RFC5969))
load_one_plugin:184: Loaded plugin: memif_plugin.so (Packet Memory Interface (experimetal))
load_one_plugin:184: Loaded plugin: nat_plugin.so (Network Address Translation)
load_one_plugin:184: Loaded plugin: pktdump_plugin.so (Sample of VPP Plugin)
load_one_plugin:184: Loaded plugin: pppoe_plugin.so (PPPoE)
load_one_plugin:184: Loaded plugin: stn_plugin.so (VPP Steals the NIC for Container integration)
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/vxlan_gpe_ioam_export_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/kubeproxy_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/gtpu_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/stn_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/acl_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/ioam_export_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/udp_ping_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/lb_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/ioam_pot_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/nat_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/ioam_vxlan_gpe_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/pppoe_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/dpdk_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/ioam_trace_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/flowprobe_test_plugin.so
load_one_plugin:63: Loaded plugin: /usr/lib/vpp_api_test_plugins/memif_test_plugin.so
vlib_pci_bind_to_uio: Skipping PCI device 0000:0b:00.0 as host interface eth1 is up
dpdk_config:1240: EAL init args: -c e -n 4 --huge-dir /run/vpp/hugepages --file-prefix vpp -b 0000:0b:00.0 --master-lcore 1 --socket-mem 64
EAL: 512 hugepages of size 2097152 reserved, but no mounted hugetlbfs found for that size
EAL: VFIO support initialized
EAL: Invalid NUMA socket, default to 0
EAL: Invalid NUMA socket, default to 0
EAL: Invalid NUMA socket, default to 0
DPDK physical memory layout:
Segment 0: IOVA:0x100000000, len:1073741824, virt:0x7fc500000000, socket_id:0, hugepage_sz:1073741824, nchannel:0, nrank:0
unix_physmem_region_iommu_register: ioctl (VFIO_IOMMU_MAP_DMA): Invalid argument
0: dpdk_ipsec_process:1011: not enough DPDK crypto resources, default to OpenSSL
0: dpdk_lib_init:225: DPDK drivers found 3 ports...
_______ _ _ _____ ___
__/ __/ _ \ (_)__ | | / / _ \/ _ \
_/ _// // / / / _ \ | |/ / ___/ ___/
/_/ /____(_)_/\___/ |___/_/ /_/
先看下vpp原码是如何加载.so库:main函数初始化阶段,先加载了vlib_plugin_early_init函数
int
vlib_plugin_early_init (vlib_main_t * vm)
{
plugin_main_t *pm = &vlib_plugin_main;
if (pm->plugin_path == 0)
pm->plugin_path = format (0, "%s%c", vlib_plugin_path, 0);
clib_warning ("plugin path %s", pm->plugin_path);
pm->plugin_by_name_hash = hash_create_string (0, sizeof (uword));
pm->vlib_main = vm;
return vlib_load_new_plugins (pm, 1 /* from_early_init */ );
}
接下来在vlib_load_new_plugins函数遍历所有插件路径,并在load_one_plugin加载.so
int
vlib_load_new_plugins (plugin_main_t * pm, int from_early_init)
{
DIR *dp;
struct dirent *entry;
struct stat statb;
uword *p;
plugin_info_t *pi;
u8 **plugin_path;
u32 *load_fail_indices = 0;
int i;
plugin_path = split_plugin_path (pm);
for (i = 0; i < vec_len (plugin_path); i++)
{
dp = opendir ((char *) plugin_path[i]);
if (dp == 0)
continue;
while ((entry = readdir (dp)))
{
u8 *plugin_name;
u8 *filename;
if (pm->plugin_name_filter)
{
int j;
for (j = 0; j < vec_len (pm->plugin_name_filter); j++)
if (entry->d_name[j] != pm->plugin_name_filter[j])
goto next;
}
filename = format (0, "%s/%s%c", plugin_path[i], entry->d_name, 0);
/* Only accept .so */
char *ext = strrchr ((const char *) filename, '.');
/* unreadable */
if (!ext || (strcmp (ext, ".so") != 0) ||
stat ((char *) filename, &statb) < 0)
{
ignore:
vec_free (filename);
continue;
}
/* a dir or other things which aren't plugins */
if (!S_ISREG (statb.st_mode))
goto ignore;
plugin_name = format (0, "%s%c", entry->d_name, 0);
/* Have we seen this plugin already? */
p = hash_get_mem (pm->plugin_by_name_hash, plugin_name);
if (p == 0)
{
/* No, add it to the plugin vector */
vec_add2 (pm->plugin_info, pi, 1);
pi->name = plugin_name;
pi->filename = filename;
pi->file_info = statb;
hash_set_mem (pm->plugin_by_name_hash, plugin_name,
pi - pm->plugin_info);
}
next:
;
}
closedir (dp);
vec_free (plugin_path[i]);
}
vec_free (plugin_path);
/*
* Sort the plugins by name. This is important.
* API traces contain absolute message numbers.
* Loading plugins in directory (vs. alphabetical) order
* makes trace replay incredibly fragile.
*/
vec_sort_with_function (pm->plugin_info, plugin_name_sort_cmp);
/*
* Attempt to load the plugins
*/
for (i = 0; i < vec_len (pm->plugin_info); i++)
{
pi = vec_elt_at_index (pm->plugin_info, i);
if (load_one_plugin (pm, pi, from_early_init))
{
/* Make a note of any which fail to load */
vec_add1 (load_fail_indices, i);
hash_unset_mem (pm->plugin_by_name_hash, pi->name);
vec_free (pi->name);
vec_free (pi->filename);
}
}
/* Remove plugin info vector elements corresponding to load failures */
if (vec_len (load_fail_indices) > 0)
{
for (i = vec_len (load_fail_indices) - 1; i >= 0; i--)
vec_delete (pm->plugin_info, 1, load_fail_indices[i]);
vec_free (load_fail_indices);
}
/* Recreate the plugin name hash */
for (i = 0; i < vec_len (pm->plugin_info); i++)
{
pi = vec_elt_at_index (pm->plugin_info, i);
hash_unset_mem (pm->plugin_by_name_hash, pi->name);
hash_set_mem (pm->plugin_by_name_hash, pi->name, pi - pm->plugin_info);
}
return 0;
}
在load_one_plugin函数中,先读取可执行程序header信息(代码参考elf.c);
并显示调用对应的动态链接库(dlopen,dlsym);
static int
load_one_plugin (plugin_main_t * pm, plugin_info_t * pi, int from_early_init)
{
void *handle;
clib_error_t *error;
elf_main_t em = { 0 };
elf_section_t *section;
u8 *data;
char *version_required;
vlib_plugin_registration_t *reg;
plugin_config_t *pc = 0;
uword *p;
if (elf_read_file (&em, (char *) pi->filename))
return -1;
error = elf_get_section_by_name (&em, ".vlib_plugin_registration",
§ion);
if (error)
{
clib_warning ("Not a plugin: %s\n", (char *) pi->name);
return -1;
}
data = elf_get_section_contents (&em, section->index, 1);
reg = (vlib_plugin_registration_t *) data;
if (vec_len (data) != sizeof (*reg))
{
clib_warning ("vlib_plugin_registration size mismatch in plugin %s\n",
(char *) pi->name);
goto error;
}
p = hash_get_mem (pm->config_index_by_name, pi->name);
if (p)
{
pc = vec_elt_at_index (pm->configs, p[0]);
if (pc->is_disabled)
{
clib_warning ("Plugin disabled: %s", pi->name);
goto error;
}
if (reg->default_disabled && pc->is_enabled == 0)
{
clib_warning ("Plugin disabled (default): %s", pi->name);
goto error;
}
}
else if (reg->default_disabled)
{
clib_warning ("Plugin disabled (default): %s", pi->name);
goto error;
}
version_required = str_array_to_vec ((char *) ®->version_required,
sizeof (reg->version_required));
if ((strlen (version_required) > 0) &&
(strncmp (vlib_plugin_app_version, version_required,
strlen (version_required))))
{
clib_warning ("Plugin %s version mismatch: %s != %s",
pi->name, vlib_plugin_app_version, reg->version_required);
if (!(pc && pc->skip_version_check == 1))
{
vec_free (version_required);
goto error;
}
}
vec_free (version_required);
vec_free (data);
elf_main_free (&em);
handle = dlopen ((char *) pi->filename, RTLD_LAZY);
if (handle == 0)
{
clib_warning ("%s", dlerror ());
clib_warning ("Failed to load plugin '%s'", pi->name);
os_exit (1);
}
pi->handle = handle;
reg = dlsym (pi->handle, "vlib_plugin_registration");
if (reg == 0)
{
/* This should never happen unless somebody chagnes registration macro */
clib_warning ("Missing plugin registration in plugin '%s'", pi->name);
os_exit (1);
}
pi->reg = reg;
pi->version = str_array_to_vec ((char *) ®->version,
sizeof (reg->version));
if (reg->early_init)
{
clib_error_t *(*ei) (vlib_main_t *);
void *h;
h = dlsym (pi->handle, reg->early_init);
if (h)
{
ei = h;
error = (*ei) (pm->vlib_main);
if (error)
{
clib_error_report (error);
os_exit (1);
}
}
else
clib_warning ("Plugin %s: early init function %s set but not found",
(char *) pi->name, reg->early_init);
}
if (reg->description)
clib_warning ("Loaded plugin: %s (%s)", pi->name, reg->description);
else
clib_warning ("Loaded plugin: %s", pi->name);
return 0;
error:
vec_free (data);
elf_main_free (&em);
return -1;
}
业务注册时使用 VLIB_PLUGIN_REGISTER注册插件section
#define VLIB_PLUGIN_REGISTER() \
vlib_plugin_registration_t vlib_plugin_registration \
__attribute__((__section__(".vlib_plugin_registration")))
2. 线程及协程初始化
vpp加载完插件,首先在主线程使用协程机制来处理业务(协程是一种轻量级线程,有自己堆栈)
int
vlib_unix_main (int argc, char *argv[])
{
vlib_main_t *vm = &vlib_global_main; /* one and only time for this! */
unformat_input_t input;
clib_error_t *e;
int i;
vm->argv = (u8 **) argv;
vm->name = argv[0];
vm->heap_base = clib_mem_get_heap ();
ASSERT (vm->heap_base);
unformat_init_command_line (&input, (char **) vm->argv);
if ((e = vlib_plugin_config (vm, &input)))
{
clib_error_report (e);
return 1;
}
unformat_free (&input);
i = vlib_plugin_early_init (vm);
if (i)
return i;
unformat_init_command_line (&input, (char **) vm->argv);
if (vm->init_functions_called == 0)
vm->init_functions_called = hash_create (0, /* value bytes */ 0);
e = vlib_call_all_config_functions (vm, &input, 1 /* early */ );
if (e != 0)
{
clib_error_report (e);
return 1;
}
unformat_free (&input);
vlib_thread_stack_init (0);
__os_thread_index = 0;
vm->thread_index = 0;
i = clib_calljmp (thread0, (uword) vm,
(void *) (vlib_thread_stacks[0] + ?*二维数组*/
VLIB_THREAD_STACK_SIZE));/*协程,calljmp调用完继续执行,此时main和协程分开执行*/
return i;
}
vpp使用的clib_calljmp, clib_longjmp, clib_calljmp实现协程机制;其中几个函数作用是:
clib_setjmp 设置跳转的标签
clib_longjmp跳转指定的标签
clib_calljmp(入口函数,参数,栈)
在thread0主线程的协程内初始化工作线程
/* Called early in the init sequence */
clib_error_t *
vlib_thread_init (vlib_main_t * vm)
{
vlib_thread_main_t *tm = &vlib_thread_main;
vlib_worker_thread_t *w;
vlib_thread_registration_t *tr;
u32 n_vlib_mains = 1;
u32 first_index = 1;
u32 i;
uword *avail_cpu;
/* get bitmaps of active cpu cores and sockets */
tm->cpu_core_bitmap =
clib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online");
tm->cpu_socket_bitmap =
clib_sysfs_list_to_bitmap ("/sys/devices/system/node/online");
avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap);
/* skip cores */
for (i = 0; i < tm->skip_cores; i++)
{
uword c = clib_bitmap_first_set (avail_cpu);
if (c == ~0)
return clib_error_return (0, "no available cpus to skip");
avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
}
/* grab cpu for main thread */
if (!tm->main_lcore)
{
tm->main_lcore = clib_bitmap_first_set (avail_cpu);
if (tm->main_lcore == (u8) ~ 0)
return clib_error_return (0, "no available cpus to be used for the"
" main thread");
}
else
{
if (clib_bitmap_get (avail_cpu, tm->main_lcore) == 0)
return clib_error_return (0, "cpu %u is not available to be used"
" for the main thread", tm->main_lcore);
}
avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0);
/* assume that there is socket 0 only if there is no data from sysfs */
if (!tm->cpu_socket_bitmap)
tm->cpu_socket_bitmap = clib_bitmap_set (0, 0, 1);
/* pin main thread to main_lcore */
if (tm->cb.vlib_thread_set_lcore_cb)
{
tm->cb.vlib_thread_set_lcore_cb (0, tm->main_lcore);
}
else
{
cpu_set_t cpuset;
CPU_ZERO (&cpuset);
CPU_SET (tm->main_lcore, &cpuset);
pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset);
}
/* as many threads as stacks... */
vec_validate_aligned (vlib_worker_threads, vec_len (vlib_thread_stacks) - 1,
CLIB_CACHE_LINE_BYTES);
/* Preallocate thread 0 */
_vec_len (vlib_worker_threads) = 1;
w = vlib_worker_threads;
w->thread_mheap = clib_mem_get_heap ();
w->thread_stack = vlib_thread_stacks[0];
w->lcore_id = tm->main_lcore;
w->lwp = syscall (SYS_gettid);
w->thread_id = pthread_self ();
tm->n_vlib_mains = 1;
if (tm->sched_policy != ~0)
{
struct sched_param sched_param;
if (!sched_getparam (w->lwp, &sched_param))
{
if (tm->sched_priority != ~0)
sched_param.sched_priority = tm->sched_priority;
sched_setscheduler (w->lwp, tm->sched_policy, &sched_param);
}
}
/* assign threads to cores and set n_vlib_mains */
tr = tm->next;
while (tr)
{
vec_add1 (tm->registrations, tr);
tr = tr->next;
}
vec_sort_with_function (tm->registrations, sort_registrations_by_no_clone);
for (i = 0; i < vec_len (tm->registrations); i++)
{
int j;
tr = tm->registrations[i];
tr->first_index = first_index;
first_index += tr->count;
n_vlib_mains += (tr->no_data_structure_clone == 0) ? tr->count : 0;
/* construct coremask */
if (tr->use_pthreads || !tr->count)
continue;
if (tr->coremask)
{
uword c;
/* *INDENT-OFF* */
clib_bitmap_foreach (c, tr->coremask, ({
if (clib_bitmap_get(avail_cpu, c) == 0)
return clib_error_return (0, "cpu %u is not available to be used"
" for the '%s' thread",c, tr->name);
avail_cpu = clib_bitmap_set(avail_cpu, c, 0);
}));
/* *INDENT-ON* */
}
else
{
for (j = 0; j < tr->count; j++)
{
uword c = clib_bitmap_first_set (avail_cpu);
if (c == ~0)
return clib_error_return (0,
"no available cpus to be used for"
" the '%s' thread", tr->name);
avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
tr->coremask = clib_bitmap_set (tr->coremask, c, 1);
}
}
}
clib_bitmap_free (avail_cpu);
tm->n_vlib_mains = n_vlib_mains;
vec_validate_aligned (vlib_worker_threads, first_index - 1,
CLIB_CACHE_LINE_BYTES);
return 0;
}
vlib_main函数内有初始化vlib_worker_threads工作线程。相关配置可参考:/etc/vpp/startup.conf
工作线程注册函数为VLIB_MAIN_LOOP_ENTER_FUNCTION并在start_workers启动工作线程
VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers);
static clib_error_t *
start_workers (vlib_main_t * vm)
{
int i, j;
vlib_worker_thread_t *w;
vlib_main_t *vm_clone;
void *oldheap;
vlib_thread_main_t *tm = &vlib_thread_main;
vlib_thread_registration_t *tr;
vlib_node_runtime_t *rt;
u32 n_vlib_mains = tm->n_vlib_mains;
u32 worker_thread_index;
u8 *main_heap = clib_mem_get_per_cpu_heap ();
mheap_t *main_heap_header = mheap_header (main_heap);
vec_reset_length (vlib_worker_threads);
/* Set up the main thread */
vec_add2_aligned (vlib_worker_threads, w, 1, CLIB_CACHE_LINE_BYTES);
w->elog_track.name = "main thread";
elog_track_register (&vm->elog_main, &w->elog_track);
if (vec_len (tm->thread_prefix))
{
w->name = format (0, "%v_main%c", tm->thread_prefix, '\0');
vlib_set_thread_name ((char *) w->name);
}
/*
* Truth of the matter: we always use at least two
* threads. So, make the main heap thread-safe
* and make the event log thread-safe.
*/
main_heap_header->flags |= MHEAP_FLAG_THREAD_SAFE;
vm->elog_main.lock =
clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
vm->elog_main.lock[0] = 0;
if (n_vlib_mains > 1)
{
/* Replace hand-crafted length-1 vector with a real vector */
vlib_mains = 0;
vec_validate_aligned (vlib_mains, tm->n_vlib_mains - 1,
CLIB_CACHE_LINE_BYTES);
_vec_len (vlib_mains) = 0;
vec_add1_aligned (vlib_mains, vm, CLIB_CACHE_LINE_BYTES);
vlib_worker_threads->wait_at_barrier =
clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
vlib_worker_threads->workers_at_barrier =
clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
vlib_worker_threads->node_reforks_required =
clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
/* Ask for an initial barrier sync */
*vlib_worker_threads->workers_at_barrier = 0;
*vlib_worker_threads->wait_at_barrier = 1;
/* Without update or refork */
*vlib_worker_threads->node_reforks_required = 0;
vm->need_vlib_worker_thread_node_runtime_update = 0;
/* init timing */
vm->barrier_epoch = 0;
vm->barrier_no_close_before = 0;
worker_thread_index = 1;
for (i = 0; i < vec_len (tm->registrations); i++)
{
vlib_node_main_t *nm, *nm_clone;
vlib_buffer_main_t *bm_clone;
vlib_buffer_free_list_t *fl_clone, *fl_orig;
vlib_buffer_free_list_t *orig_freelist_pool;
int k;
tr = tm->registrations[i];
if (tr->count == 0)
continue;
for (k = 0; k < tr->count; k++)
{
vlib_node_t *n;
vec_add2 (vlib_worker_threads, w, 1);
if (tr->mheap_size)
w->thread_mheap =
mheap_alloc (0 /* use VM */ , tr->mheap_size);
else
w->thread_mheap = main_heap;
w->thread_stack =
vlib_thread_stack_init (w - vlib_worker_threads);
w->thread_function = tr->function;
w->thread_function_arg = w;
w->instance_id = k;
w->registration = tr;
w->elog_track.name =
(char *) format (0, "%s %d", tr->name, k + 1);
vec_add1 (w->elog_track.name, 0);
elog_track_register (&vm->elog_main, &w->elog_track);
if (tr->no_data_structure_clone)
continue;
/* Fork vlib_global_main et al. Look for bugs here */
oldheap = clib_mem_set_heap (w->thread_mheap);
vm_clone = clib_mem_alloc (sizeof (*vm_clone));
clib_memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone));
vm_clone->thread_index = worker_thread_index;
vm_clone->heap_base = w->thread_mheap;
vm_clone->mbuf_alloc_list = 0;
vm_clone->init_functions_called =
hash_create (0, /* value bytes */ 0);
vm_clone->pending_rpc_requests = 0;
vec_validate (vm_clone->pending_rpc_requests, 0);
_vec_len (vm_clone->pending_rpc_requests) = 0;
memset (&vm_clone->random_buffer, 0,
sizeof (vm_clone->random_buffer));
nm = &vlib_mains[0]->node_main;
nm_clone = &vm_clone->node_main;
/* fork next frames array, preserving node runtime indices */
nm_clone->next_frames = vec_dup (nm->next_frames);
for (j = 0; j < vec_len (nm_clone->next_frames); j++)
{
vlib_next_frame_t *nf = &nm_clone->next_frames[j];
u32 save_node_runtime_index;
u32 save_flags;
save_node_runtime_index = nf->node_runtime_index;
save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
vlib_next_frame_init (nf);
nf->node_runtime_index = save_node_runtime_index;
nf->flags = save_flags;
}
/* fork the frame dispatch queue */
nm_clone->pending_frames = 0;
vec_validate (nm_clone->pending_frames, 10); /* $$$$$?????? */
_vec_len (nm_clone->pending_frames) = 0;
/* fork nodes */
nm_clone->nodes = 0;
/* Allocate all nodes in single block for speed */
n = clib_mem_alloc_no_fail (vec_len (nm->nodes) * sizeof (*n));
for (j = 0; j < vec_len (nm->nodes); j++)
{
clib_memcpy (n, nm->nodes[j], sizeof (*n));
/* none of the copied nodes have enqueue rights given out */
n->owner_node_index = VLIB_INVALID_NODE_INDEX;
memset (&n->stats_total, 0, sizeof (n->stats_total));
memset (&n->stats_last_clear, 0,
sizeof (n->stats_last_clear));
vec_add1 (nm_clone->nodes, n);
n++;
}
nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
vec_foreach (rt,
nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
{
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
rt->thread_index = vm_clone->thread_index;
/* copy initial runtime_data from node */
if (n->runtime_data && n->runtime_data_bytes > 0)
clib_memcpy (rt->runtime_data, n->runtime_data,
clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
n->runtime_data_bytes));
}
nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
{
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
rt->thread_index = vm_clone->thread_index;
/* copy initial runtime_data from node */
if (n->runtime_data && n->runtime_data_bytes > 0)
clib_memcpy (rt->runtime_data, n->runtime_data,
clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
n->runtime_data_bytes));
}
nm_clone->processes = vec_dup (nm->processes);
/* zap the (per worker) frame freelists, etc */
nm_clone->frame_sizes = 0;
nm_clone->frame_size_hash = hash_create (0, sizeof (uword));
/* Packet trace buffers are guaranteed to be empty, nothing to do here */
clib_mem_set_heap (oldheap);
vec_add1_aligned (vlib_mains, vm_clone, CLIB_CACHE_LINE_BYTES);
vm_clone->error_main.counters =
vec_dup (vlib_mains[0]->error_main.counters);
vm_clone->error_main.counters_last_clear =
vec_dup (vlib_mains[0]->error_main.counters_last_clear);
/* Fork the vlib_buffer_main_t free lists, etc. */
bm_clone = vec_dup (vm_clone->buffer_main);
vm_clone->buffer_main = bm_clone;
orig_freelist_pool = bm_clone->buffer_free_list_pool;
bm_clone->buffer_free_list_pool = 0;
/* *INDENT-OFF* */
pool_foreach (fl_orig, orig_freelist_pool,
({
pool_get_aligned (bm_clone->buffer_free_list_pool,
fl_clone, CLIB_CACHE_LINE_BYTES);
ASSERT (fl_orig - orig_freelist_pool
== fl_clone - bm_clone->buffer_free_list_pool);
fl_clone[0] = fl_orig[0];
fl_clone->buffers = 0;
fl_clone->n_alloc = 0;
}));
/* *INDENT-ON* */
worker_thread_index++;
}
}
}
else
{
/* only have non-data-structure copy threads to create... */
for (i = 0; i < vec_len (tm->registrations); i++)
{
tr = tm->registrations[i];
for (j = 0; j < tr->count; j++)
{
vec_add2 (vlib_worker_threads, w, 1);
if (tr->mheap_size)
w->thread_mheap =
mheap_alloc (0 /* use VM */ , tr->mheap_size);
else
w->thread_mheap = main_heap;
w->thread_stack =
vlib_thread_stack_init (w - vlib_worker_threads);
w->thread_function = tr->function;
w->thread_function_arg = w;
w->instance_id = j;
w->elog_track.name =
(char *) format (0, "%s %d", tr->name, j + 1);
w->registration = tr;
vec_add1 (w->elog_track.name, 0);
elog_track_register (&vm->elog_main, &w->elog_track);
}
}
}
worker_thread_index = 1;
for (i = 0; i < vec_len (tm->registrations); i++)
{
clib_error_t *err;
int j;
tr = tm->registrations[i];
if (tr->use_pthreads || tm->use_pthreads)
{
for (j = 0; j < tr->count; j++)
{
w = vlib_worker_threads + worker_thread_index++;
err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn,
w, 0);
if (err)
clib_error_report (err);
}
}
else
{
uword c;
/* *INDENT-OFF* */
clib_bitmap_foreach (c, tr->coremask, ({
w = vlib_worker_threads + worker_thread_index++;
err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn,
w, c);
if (err)
clib_error_report (err);
}));
/* *INDENT-ON* */
}
}
vlib_worker_thread_barrier_sync (vm);
vlib_worker_thread_barrier_release (vm);
return 0;
}
vpp为一个主线程,多个工作线程,并且在线程内有协程机制;
充分利用了多核线程亲和性和协程切换的低开销。
3. 静态节点注册和动态节点编排
静态node通过VLIB_REGISTER_NODE注册,在编译时加载;
main初始化流程在vlib_register_all_static_nodes内初始化,并在register_node进行编排
void
vlib_register_all_static_nodes (vlib_main_t * vm)
{
vlib_node_registration_t *r;
static char *null_node_error_strings[] = {
"blackholed packets",
};
static vlib_node_registration_t null_node_reg = {
.function = null_node_fn,
.vector_size = sizeof (u32),
.name = "null-node",
.n_errors = 1,
.error_strings = null_node_error_strings,
};
/* make sure that node index 0 is not used by
real node */
register_node (vm, &null_node_reg);
r = vm->node_main.node_registrations;
while (r)
{
register_node (vm, r);
r = r->next_registration;
}
}
动态节点是可以动态加载的,即在程序运行时加载:
vpp内vnet_config_add_feature,vnet_config_del_feature,find_config_with_features有实现。
static vnet_config_t *
find_config_with_features (vlib_main_t * vm,
vnet_config_main_t * cm,
vnet_config_feature_t * feature_vector)
{
u32 last_node_index = ~0;
vnet_config_feature_t *f;
u32 *config_string;
uword *p;
vnet_config_t *c;
config_string = cm->config_string_temp;
cm->config_string_temp = 0;
if (config_string)
_vec_len (config_string) = 0;
vec_foreach (f, feature_vector)
{
/* Connect node graph. */
f->next_index = add_next (vm, cm, last_node_index, f->node_index);
last_node_index = f->node_index;
/* Store next index in config string. */
vec_add1 (config_string, f->next_index);
/* Store feature config. */
vec_add (config_string, f->feature_config, vec_len (f->feature_config));
}
/* Terminate config string with next for end node. */
if (last_node_index == ~0 || last_node_index != cm->end_node_index)
{
u32 next_index = add_next (vm, cm, last_node_index, cm->end_node_index);
vec_add1 (config_string, next_index);
}
/* See if config string is unique. */
p = hash_get_mem (cm->config_string_hash, config_string);
if (p)
{
/* Not unique. Share existing config. */
cm->config_string_temp = config_string; /* we'll use it again later. */
free_feature_vector (feature_vector);
c = pool_elt_at_index (cm->config_pool, p[0]);
}
else
{
u32 *d;
pool_get (cm->config_pool, c);
c->index = c - cm->config_pool;
c->features = feature_vector;
c->config_string_vector = config_string;
/* Allocate copy of config string in heap.
VLIB buffers will maintain pointers to heap as they read out
configuration data. */
c->config_string_heap_index
= heap_alloc (cm->config_string_heap, vec_len (config_string) + 1,
c->config_string_heap_handle);
/* First element in heap points back to pool index. */
d =
vec_elt_at_index (cm->config_string_heap,
c->config_string_heap_index);
d[0] = c->index;
clib_memcpy (d + 1, config_string, vec_bytes (config_string));
hash_set_mem (cm->config_string_hash, config_string, c->index);
c->reference_count = 0; /* will be incremented by caller. */
}
return c;
}
二、命令行配置分析
命令行是在协程中实现,代码如下:
/* Called in main stack. */
static_always_inline uword
vlib_process_startup (vlib_main_t * vm, vlib_process_t * p, vlib_frame_t * f)
{
vlib_process_bootstrap_args_t a;
uword r;
a.vm = vm;
a.process = p;
a.frame = f;
r = clib_setjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_RETURN);
if (r == VLIB_PROCESS_RETURN_LONGJMP_RETURN)
r = clib_calljmp (vlib_process_bootstrap, pointer_to_uword (&a),
(void *) p->stack + (1 << p->log2_n_stack_bytes));
return r;
}
unix_cli_process注册node回调,业务模块通过VLIB_CLI_COMMAND注册的命令都是在此函数中回调unix_cli_process_input
/** Store a new CLI session.
* @param name The name of the session.
* @param fd The file descriptor for the session I/O.
* @return The session ID.
*/
static u32
unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd)
{
unix_main_t *um = &unix_main;
clib_file_main_t *fm = &file_main;
unix_cli_file_t *cf;
clib_file_t template = { 0 };
vlib_main_t *vm = um->vlib_main;
vlib_node_t *n;
name = (char *) format (0, "unix-cli-%s", name);
if (vec_len (cm->unused_cli_process_node_indices) > 0)
{
uword l = vec_len (cm->unused_cli_process_node_indices);
/* Find node and give it new name. */
n = vlib_get_node (vm, cm->unused_cli_process_node_indices[l - 1]);
vec_free (n->name);
n->name = (u8 *) name;
vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
_vec_len (cm->unused_cli_process_node_indices) = l - 1;
}
else
{
static vlib_node_registration_t r = {
.function = unix_cli_process,
.type = VLIB_NODE_TYPE_PROCESS,
.process_log2_n_stack_bytes = 16,
};
r.name = name;
vlib_register_node (vm, &r);
vec_free (name);
n = vlib_get_node (vm, r.index);
}
pool_get (cm->cli_file_pool, cf);
memset (cf, 0, sizeof (*cf));
template.read_function = unix_cli_read_ready;
template.write_function = unix_cli_write_ready;
template.error_function = unix_cli_error_detected;
template.file_descriptor = fd;
template.private_data = cf - cm->cli_file_pool;
cf->process_node_index = n->index;
cf->clib_file_index = clib_file_add (fm, &template);
cf->output_vector = 0;
cf->input_vector = 0;
vlib_start_process (vm, n->runtime_index);
vlib_process_t *p = vlib_get_process_from_node (vm, n);
p->output_function = unix_vlib_cli_output;
p->output_function_arg = cf - cm->cli_file_pool;
return cf - cm->cli_file_pool;
}
三、报文转发
#0 dpdk_device_input (dm=0x7fc770b66900 <dpdk_main>, xd=0x7fc771c7cc40,
node=0x7fc771c50228, thread_index=2, queue_id=0, maybe_multiseg=1)
at /home/share/vpp/build-data/../src/plugins/dpdk/device/node.c:224
#1 0x00007fc76fd6082b in dpdk_input_avx2 (vm=0x7fc771c62f78,
node=0x7fc771c50228, f=0x0)
at /home/share/vpp/build-data/../src/plugins/dpdk/device/node.c:606
#2 0x00007fc7b32b2c9a in dispatch_node (vm=0x7fc771c62f78, node=0x7fc771c50228,
type=VLIB_NODE_TYPE_INPUT, dispatch_state=VLIB_NODE_STATE_POLLING,
frame=0x0, last_time_stamp=7989642574756)
at /home/share/vpp/build-data/../src/vlib/main.c:988
#3 0x00007fc7b32b4a79 in vlib_main_or_worker_loop (vm=0x7fc771c62f78, is_main=0)
at /home/share/vpp/build-data/../src/vlib/main.c:1506
#4 0x00007fc7b32b5520 in vlib_worker_loop (vm=0x7fc771c62f78)
at /home/share/vpp/build-data/../src/vlib/main.c:1634
#5 0x00007fc7b32f6de7 in vlib_worker_thread_fn (arg=0x7fc770ef4580)
at /home/share/vpp/build-data/../src/vlib/threads.c:1736
#6 0x00007fc7b21fbdc4 in clib_calljmp ()
at /home/share/vpp/build-data/../src/vppinfra/longjmp.S:110
#7 0x00007fc60cb8fd70 in ?? ()
#8 0x00007fc7b32f1e56 in vlib_worker_thread_bootstrap_fn (arg=0x7fc770ef4580)
at /home/share/vpp/build-data/../src/vlib/threads.c:680
vpp可以使用dpdk收发包,也可以不使用,从以运行栈来看,工作线程也是协程机制。处理node报文如下:
1. 业务通过VLIB_REGISTER_NODE注册的node,vpp是先获取的node index;
2. VLIB_NODE_TYPE_PRE_INPUT 类似epoll机制检测io有无数据;
3. VLIB_NODE_TYPE_INPUT 接收dpdk_device_input,调用dpdk接口接收使用,然后调用dpdk_rx_burst进行收包处理
/*
* This function is used when there are no worker threads.
* The main thread performs IO and forwards the packets.
*/
static_always_inline u32
dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
vlib_node_runtime_t * node, u32 thread_index, u16 queue_id,
int maybe_multiseg)
{
u32 n_buffers;
u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
u32 n_left_to_next, *to_next;
u32 mb_index;
vlib_main_t *vm = vlib_get_main ();
uword n_rx_bytes = 0;
u32 n_trace, trace_cnt __attribute__ ((unused));
vlib_buffer_free_list_t *fl;
vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, thread_index);
if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
return 0;
n_buffers = dpdk_rx_burst (dm, xd, queue_id);
if (n_buffers == 0)
{
return 0;
}
vec_reset_length (xd->d_trace_buffers[thread_index]);
trace_cnt = n_trace = vlib_get_trace_count (vm, node);
if (n_trace > 0)
{
u32 n = clib_min (n_trace, n_buffers);
mb_index = 0;
while (n--)
{
struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++];
vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);/*mbuf转换成buffer*/
vec_add1 (xd->d_trace_buffers[thread_index],
vlib_get_buffer_index (vm, b));
}
}
fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
/* Update buffer template */
vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
bt->error = node->errors[DPDK_ERROR_NONE];
/* as DPDK is allocating empty buffers from mempool provided before interface
start for each queue, it is safe to store this in the template */
bt->buffer_pool_index = xd->buffer_pool_for_queue[queue_id];
mb_index = 0;
while (n_buffers > 0)
{
vlib_buffer_t *b0, *b1, *b2, *b3;
u32 bi0, next0;
u32 bi1, next1;
u32 bi2, next2;
u32 bi3, next3;
u8 error0, error1, error2, error3;
i16 offset0, offset1, offset2, offset3;
u64 or_ol_flags;
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_buffers >= 12 && n_left_to_next >= 4)
{
struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
/* prefetches are interleaved with the rest of the code to reduce
pressure on L1 cache */
dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 8]);
dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 4]);
mb0 = xd->rx_vectors[queue_id][mb_index];
mb1 = xd->rx_vectors[queue_id][mb_index + 1];
mb2 = xd->rx_vectors[queue_id][mb_index + 2];
mb3 = xd->rx_vectors[queue_id][mb_index + 3];
ASSERT (mb0);
ASSERT (mb1);
ASSERT (mb2);
ASSERT (mb3);
if (maybe_multiseg)
{
if (PREDICT_FALSE (mb0->nb_segs > 1))
dpdk_prefetch_buffer (mb0->next);
if (PREDICT_FALSE (mb1->nb_segs > 1))
dpdk_prefetch_buffer (mb1->next);
if (PREDICT_FALSE (mb2->nb_segs > 1))
dpdk_prefetch_buffer (mb2->next);
if (PREDICT_FALSE (mb3->nb_segs > 1))
dpdk_prefetch_buffer (mb3->next);
}
/*一次处理四个mbuf*/
b0 = vlib_buffer_from_rte_mbuf (mb0);
b1 = vlib_buffer_from_rte_mbuf (mb1);
b2 = vlib_buffer_from_rte_mbuf (mb2);
b3 = vlib_buffer_from_rte_mbuf (mb3);
dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 9]);
dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 5]);
clib_memcpy64_x4 (b0, b1, b2, b3, bt);
dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 10]);
dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 7]);
bi0 = vlib_get_buffer_index (vm, b0);
bi1 = vlib_get_buffer_index (vm, b1);
bi2 = vlib_get_buffer_index (vm, b2);
bi3 = vlib_get_buffer_index (vm, b3);
to_next[0] = bi0;
to_next[1] = bi1;
to_next[2] = bi2;
to_next[3] = bi3;
to_next += 4;
n_left_to_next -= 4;
if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
{
next0 = next1 = next2 = next3 = xd->per_interface_next_index;
}
else
{
next0 = dpdk_rx_next_from_etype (mb0);
next1 = dpdk_rx_next_from_etype (mb1);
next2 = dpdk_rx_next_from_etype (mb2);
next3 = dpdk_rx_next_from_etype (mb3);
}
dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 11]);
dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 6]);
or_ol_flags = (mb0->ol_flags | mb1->ol_flags |
mb2->ol_flags | mb3->ol_flags);
if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD))
{
dpdk_rx_error_from_mb (mb0, &next0, &error0);
dpdk_rx_error_from_mb (mb1, &next1, &error1);
dpdk_rx_error_from_mb (mb2, &next2, &error2);
dpdk_rx_error_from_mb (mb3, &next3, &error3);
b0->error = node->errors[error0];
b1->error = node->errors[error1];
b2->error = node->errors[error2];
b3->error = node->errors[error3];
}
offset0 = device_input_next_node_advance[next0];
b0->current_data = mb0->data_off + offset0 - RTE_PKTMBUF_HEADROOM;
b0->flags |= device_input_next_node_flags[next0];
vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
vnet_buffer (b0)->l2_hdr_offset =
mb0->data_off - RTE_PKTMBUF_HEADROOM;
b0->current_length = mb0->data_len - offset0;
n_rx_bytes += mb0->pkt_len;
offset1 = device_input_next_node_advance[next1];
b1->current_data = mb1->data_off + offset1 - RTE_PKTMBUF_HEADROOM;
b1->flags |= device_input_next_node_flags[next1];
vnet_buffer (b1)->l3_hdr_offset = b1->current_data;
vnet_buffer (b1)->l2_hdr_offset =
mb1->data_off - RTE_PKTMBUF_HEADROOM;
b1->current_length = mb1->data_len - offset1;
n_rx_bytes += mb1->pkt_len;
offset2 = device_input_next_node_advance[next2];
b2->current_data = mb2->data_off + offset2 - RTE_PKTMBUF_HEADROOM;
b2->flags |= device_input_next_node_flags[next2];
vnet_buffer (b2)->l3_hdr_offset = b2->current_data;
vnet_buffer (b2)->l2_hdr_offset =
mb2->data_off - RTE_PKTMBUF_HEADROOM;
b2->current_length = mb2->data_len - offset2;
n_rx_bytes += mb2->pkt_len;
offset3 = device_input_next_node_advance[next3];
b3->current_data = mb3->data_off + offset3 - RTE_PKTMBUF_HEADROOM;
b3->flags |= device_input_next_node_flags[next3];
vnet_buffer (b3)->l3_hdr_offset = b3->current_data;
vnet_buffer (b3)->l2_hdr_offset =
mb3->data_off - RTE_PKTMBUF_HEADROOM;
b3->current_length = mb3->data_len - offset3;
n_rx_bytes += mb3->pkt_len;
/* Process subsequent segments of multi-segment packets */
if (maybe_multiseg)
{
dpdk_process_subseq_segs (vm, b0, mb0, fl);
dpdk_process_subseq_segs (vm, b1, mb1, fl);
dpdk_process_subseq_segs (vm, b2, mb2, fl);
dpdk_process_subseq_segs (vm, b3, mb3, fl);
}
/*
* Turn this on if you run into
* "bad monkey" contexts, and you want to know exactly
* which nodes they've visited... See main.c...
*/
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1);
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2);
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3);
/* Do we have any driver RX features configured on the interface? */
vnet_feature_start_device_input_x4 (xd->vlib_sw_if_index,
&next0, &next1, &next2, &next3,
b0, b1, b2, b3);
vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
to_next, n_left_to_next,
bi0, bi1, bi2, bi3,
next0, next1, next2, next3);
n_buffers -= 4;
mb_index += 4;
}
while (n_buffers > 0 && n_left_to_next > 0)
{
struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index];
if (PREDICT_TRUE (n_buffers > 3))
{
dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 2]);
dpdk_prefetch_ethertype (xd->rx_vectors[queue_id]
[mb_index + 1]);
}
ASSERT (mb0);
b0 = vlib_buffer_from_rte_mbuf (mb0);
/* Prefetch one next segment if it exists. */
if (PREDICT_FALSE (mb0->nb_segs > 1))
dpdk_prefetch_buffer (mb0->next);
clib_memcpy (b0, bt, CLIB_CACHE_LINE_BYTES);
bi0 = vlib_get_buffer_index (vm, b0);
to_next[0] = bi0;
to_next++;
n_left_to_next--;
if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
next0 = xd->per_interface_next_index;
else
next0 = dpdk_rx_next_from_etype (mb0);
dpdk_rx_error_from_mb (mb0, &next0, &error0);
b0->error = node->errors[error0];
offset0 = device_input_next_node_advance[next0];
b0->current_data = mb0->data_off + offset0 - RTE_PKTMBUF_HEADROOM;
b0->flags |= device_input_next_node_flags[next0];
vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
vnet_buffer (b0)->l2_hdr_offset =
mb0->data_off - RTE_PKTMBUF_HEADROOM;
b0->current_length = mb0->data_len - offset0;
n_rx_bytes += mb0->pkt_len;
/* Process subsequent segments of multi-segment packets */
dpdk_process_subseq_segs (vm, b0, mb0, fl);
/*
* Turn this on if you run into
* "bad monkey" contexts, and you want to know exactly
* which nodes they've visited... See main.c...
*/
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
/* Do we have any driver RX features configured on the interface? */
vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0,
b0);
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
bi0, next0);
n_buffers--;
mb_index++;
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[thread_index]) > 0))
{
dpdk_rx_trace (dm, node, xd, queue_id,
xd->d_trace_buffers[thread_index],
vec_len (xd->d_trace_buffers[thread_index]));
vlib_set_trace_count (vm, node,
n_trace -
vec_len (xd->d_trace_buffers[thread_index]));
}
vlib_increment_combined_counter
(vnet_get_main ()->interface_main.combined_sw_if_counters
+ VNET_INTERFACE_COUNTER_RX,
thread_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes);
vnet_device_increment_rx_packets (thread_index, mb_index);
return mb_index;
}
新的体会:技术积累需要循序渐进,日拱一卒,不期速成。然而技术的积累需要满足当前市场环境的需求,也要避免重复造轮子。很多开源软件设计思想有很大的参考价值。