前面章节分析了ovs与datapath通过netlink通信机制,本章节重点分析下ovs与datapath的具体交互流程。
1.交互消息分类
datapath在初始时注册6类消息,用于ovs与datapath消息传递。
MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);
我们以OVS_DATAPATH_FAMILY为例看一下消息的注册内容
static const struct genl_ops dp_datapath_genl_ops[] = {
{ .cmd = OVS_DP_CMD_NEW,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = datapath_policy,
.doit = ovs_dp_cmd_new
},
{ .cmd = OVS_DP_CMD_DEL,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = datapath_policy,
.doit = ovs_dp_cmd_del
},
{ .cmd = OVS_DP_CMD_GET,
.flags = 0, /* OK for unprivileged users. */
.policy = datapath_policy,
.doit = ovs_dp_cmd_get,
.dumpit = ovs_dp_cmd_dump
},
{ .cmd = OVS_DP_CMD_SET,
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = datapath_policy,
.doit = ovs_dp_cmd_set,
},
};
static struct genl_family dp_datapath_genl_family __ro_after_init = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_DATAPATH_FAMILY,
.version = OVS_DATAPATH_VERSION,
.maxattr = OVS_DP_ATTR_MAX,
.netnsok = true,
.parallel_ops = true,
.ops = dp_datapath_genl_ops,
.n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
.mcgrps = &ovs_dp_datapath_multicast_group,
.n_mcgrps = 1,
.module = THIS_MODULE,
};
共注册了4类cmd操作NEW/DEL/GET/SET
2 datapath处理消息流程
2.1 OVS_DP_CMD_NEW
首先分析创建bridge桥的cmd处理流程OVS_DP_CMD_NEW
OVS_DP_CMD_NEW
|---ovs_dp_cmd_new
|---ovs_flow_tbl_init
|---new_vport
|---ovs_vport_add
|---ovs_vport_lookup //根据internal类型的端口注册的是 ovs_internal_vport_ops
|---internal_dev_create
|---ovs_vport_alloc 创建vport,注册ops最关键的是send函数internal_dev_recv
|---alloc_netdev 创建netdev内核中的dev (vport->dev = dev internal_dev->vport = vport) ,建立一一对应关系
|---do_setup 最关键的是注册netdev_ops (internal_dev_netdev_ops),其中有ndo_start_xmit 为internal_dev_xmit
|---register_netdevice 把vport->dev注册到内核,由内核管理
vport与对应的内核dev的关系
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
struct vport_parms parms;
struct sk_buff *reply;
struct datapath *dp;
struct vport *vport;
struct ovs_net *ovs_net;
int err, i;
err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) //必须协带name和pid两个属性
goto err;
reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
goto err_free_reply;
ovs_dp_set_net(dp, sock_net(skb->sk));
/* Allocate table. */
err = ovs_flow_tbl_init(&dp->table); 初始化flow table实例
if (err)
goto err_free_dp;
dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
if (!dp->stats_percpu) {
err = -ENOMEM;
goto err_destroy_table;
}
dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
sizeof(struct hlist_head),
GFP_KERNEL);
if (!dp->ports) {
err = -ENOMEM;
goto err_destroy_percpu;
}
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&dp->ports[i]);
err = ovs_meters_init(dp);
if (err)
goto err_destroy_ports_array;
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
parms.type = OVS_VPORT_TYPE_INTERNAL; //internal类型的端口
parms.options = NULL;
parms.dp = dp; //分配的dp结构
parms.port_no = OVSP_LOCAL; //端口号为0
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
ovs_dp_change(dp, a); //记录用户属性user_features
/* So far only local changes have been made, now need the lock. */
ovs_lock();
vport = new_vport(&parms); 创建vport口,后面会重点分析创建vport口的过程
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
if (err == -EBUSY)
err = -EEXIST;
if (err == -EEXIST) {
/* An outdated user space instance that does not understand
* the concept of user_features has attempted to create a new
* datapath and is likely to reuse it. Drop all user features.
*/
if (info->genlhdr->version < OVS_DP_VER_FEATURES)
ovs_dp_reset_user_features(skb, info);
}
goto err_destroy_meters;
}
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_DP_CMD_NEW);
BUG_ON(err < 0);
ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
ovs_unlock();
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
err:
return err;
}
static struct vport *new_vport(const struct vport_parms *parms)
{
struct vport *vport;
vport = ovs_vport_add(parms);
if (!IS_ERR(vport)) {
struct datapath *dp = parms->dp;
struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
hlist_add_head_rcu(&vport->dp_hash_node, head); //vport加入到dp桥的hash链表中
}
return vport;
}
struct vport *ovs_vport_add(const struct vport_parms *parms)
{
struct vport_ops *ops;
struct vport *vport;
/*
根据端口类型查找注册的ops,internal类型的端口注册的ops为:
static struct vport_ops ovs_internal_vport_ops = {
.type = OVS_VPORT_TYPE_INTERNAL,
.create = internal_dev_create,
.destroy = internal_dev_destroy,
.send = internal_dev_recv,
};*/
ops = ovs_vport_lookup(parms);
if (ops) {
struct hlist_head *bucket;
if (!try_module_get(ops->owner))
return ERR_PTR(-EAFNOSUPPORT);
vport = ops->create(parms); //internal_dev_create
if (IS_ERR(vport)) {
module_put(ops->owner);
return vport;
}
bucket = hash_bucket(ovs_dp_get_net(vport->dp),
ovs_vport_name(vport)); //ovs_port_name函数获取的是 vport->dev->name,内核中的dev name
hlist_add_head_rcu(&vport->hash_node, bucket); //vport加入到net的管理列表
return vport;
}
/* Unlock to attempt module load and return -EAGAIN if load
* was successful as we need to restart the port addition
* workflow.
*/
ovs_unlock();
request_module("vport-type-%d", parms->type);
ovs_lock();
if (!ovs_vport_lookup(parms))
return ERR_PTR(-EAFNOSUPPORT);
else
return ERR_PTR(-EAGAIN);
}
2.2 OVS_VPORT_CMD_NEW
接下来再分个vport的创建流程,前面部分与上面分析的OVS_DP_CMD_NEW的类似,只是在根据端口类型与内核进行关联的时候有所区别。
ovs_vport_cmd_new
|---new_vport
|---ovs_vport_add 创建成功后,会把vport加入dp链表
|--- ovs_vport_lookup 根据要创建的port类型查找注册的ops
|---vport = ops->create(parms); 调用的是 netdev_create 具体的注册位置在vport-netdev.c
|---ovs_vport_alloc 创建vport接触,注册send函数
|---ovs_netdev_link
|---dev_get_by_name(ovs_dp_get_net(vport->dp), name); 通过name查询dev
|---netdev_rx_handler_register 挂在接收函数netdev_frame_hook和参数vport
|---dev_set_promiscuity 打开混杂模式
|---vport->dev->priv_flags |= IFF_OVS_DATAPATH; 设置dev的flags
netdev_rx_handler_register()
linux 内核实现的一个函数,为网络设备 dev 注册一个handler_frame_hook,rx_handle_data 指向的是handler_frame_hook 内存的区域,这个 handler 以后会被__netif_receive_skb()呼叫,就是说netdev_rx_handler_register(netdev_vport->dev,netdev_frame_hook,vport);在收到packet 后会调用 netdev_frame_hook 函数处理