1. 前言
profile是BCC tool中较常使用的到的一个工具,本文通过对该工具较为深入的分析来帮助大家去更深入的了解bcc tool的写法以及原理。
欢迎转载,本文原址:eBPF—BCC: profile源码解读
2. 源码解析
2.1 bpf code
# initialize BPF & perf_events
b = BPF(text=bpf_text)
2.2 创建map
BPF program有2个map,会先通过bpf syscall在kernel中申请map,counts用来存放key_t,stack_traces存储栈回溯。
struct key_t {
u32 pid;
u64 kernel_ip;
int user_stack_id;
int kernel_stack_id;
char name[TASK_COMM_LEN];
};
BPF_HASH(counts, struct key_t);
BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
/* log of `strace -e bpf,perf_event_open,ioctl profile-bpfcc` */
bpf(BPF_MAP_CREATE, {
map_type=BPF_MAP_TYPE_STACK_TRACE, key_size=4, value_size=1016, max_entries=16384, map_flags=0, inner_map_fd=0, map_name="stack_traces", map_ifindex=0, btf_fd=0, btf_key_type_id=0, btf_value_type_id=0}, 120) = 4
bpf(BPF_MAP_CREATE, {
map_type=BPF_MAP_TYPE_HASH, key_size=48, value_size=8, max_entries=10240, map_flags=0, inner_map_fd=0, map_name="counts", map_ifindex=0, btf_fd=3, btf_key_type_id=2, btf_value_type_id=6}, 120) = 5
先来看一下如何在kernel中申请map
static int map_create(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_map_memory mem;
struct bpf_map *map;
int f_flags;
int err;
err = CHECK_ATTR(BPF_MAP_CREATE);
if (err)
return -EINVAL;
f_flags = bpf_get_file_flag(attr->map_flags);
if (f_flags < 0)
return f_flags;
if (numa_node != NUMA_NO_NODE &&
((unsigned int)numa_node >= nr_node_ids ||
!node_online(numa_node)))
return -EINVAL;
/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
map = find_and_alloc_map(attr); // 根据map的类型分配空间,创建map结构体,并为其编号,以后利用编号寻找生成的map
if (IS_ERR(map))
return PTR_ERR(map);
err = bpf_obj_name_cpy(map->name, attr->map_name);
if (err)
goto free_map;
atomic_set(&map->refcnt, 1);
atomic_set(&map->usercnt, 1);
if (attr->btf_key_type_id || attr->btf_value_type_id) {
struct btf *btf;
if (!attr->btf_value_type_id) {
err = -EINVAL;
goto free_map;
}
btf = btf_get_by_fd(attr->btf_fd);
if (IS_ERR(btf)) {
err = PTR_ERR(btf);
goto free_map;
}
err = map_check_btf(map, btf, attr->btf_key_type_id,
attr->btf_value_type_id);
if (err) {
btf_put(btf);
goto free_map;
}
map->btf = btf;
map->btf_key_type_id = attr->btf_key_type_id;
map->btf_value_type_id = attr->btf_value_type_id;
} else {
map->spin_lock_off = -EINVAL;
}
err = security_bpf_map_alloc(map);
if (err)
goto free_map;
err = bpf_map_alloc_id(map); // 分配一个id
if (err)
goto free_map_sec;
err = bpf_map_new_fd(map, f_flags); // 分配一个fd
if (err < 0) {
/* failed to allocate fd.
* bpf_map_put_with_uref() is needed because the above
* bpf_map_alloc_id() has published the map
* to the userspace and the userspace may
* have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
*/
bpf_map_put_with_uref(map);
return err;
}
return err;
free_map_sec:
security_bpf_map_free(map);
free_map:
btf_put(map->btf);
bpf_map_charge_move(&mem, &map->memory);
map->ops->map_free(map);
bpf_map_charge_finish(&mem);
return err;
}
static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
{
const struct bpf_map_ops *ops;
u32 type = attr->map_type;
struct