roce分为软件实现的roce和硬件实现的roce,硬件roce就是在普通网卡中添加一个模块,其中pf和vf 都可以使用roce
由于roce是网卡的一部分,因此roce在初始化的时候是包含在网卡初始化中的过程中,例如下面这个roce就会注册
自己的初始化函数到网卡驱动中
static const struct hnae3_client_ops hns_roce_hw_v2_ops = {
.init_instance = hns_roce_hw_v2_init_instance,
.uninit_instance = hns_roce_hw_v2_uninit_instance,
};
static struct hnae3_client hns_roce_hw_v2_client = {
.name = "hns_roce_hw_v2",
.type = HNAE3_CLIENT_ROCE,
.ops = &hns_roce_hw_v2_ops,
};
static int __init hns_roce_hw_v2_init(void)
{
return hnae3_register_client(&hns_roce_hw_v2_client);
}
执行hnae3_register_client 后,就把roce的初始化函数添加到网卡驱动中
int hnae3_register_client(struct hnae3_client *client)
{
struct hnae3_client *client_tmp;
struct hnae3_ae_dev *ae_dev;
int ret = 0;
mutex_lock(&hnae3_common_lock);
/* one system should only have one client for every type */
#检查一下roce是否已经加入到网口的初始化列表中,如果已经加入了,则不能重复加载,直接退出
list_for_each_entry(client_tmp, &hnae3_client_list, node) {
if (client_tmp->type == client->type)
goto exit;
}
#把client加到入到网卡中
list_add_tail(&client->node, &hnae3_client_list);
#为网卡的每个口都添加roce的client
/* initialize the client on every matched port */
list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
/* if the client could not be initialized on current port, for
* any error reasons, move on to next available port
*/
ret = hnae3_match_n_instantiate(client, ae_dev, true);
if (ret)
dev_err(&ae_dev->pdev->dev,
"match
}
将roce添加到网卡驱动中后,就会调用ib_register_device来注册roce驱动
这个函数第一个参数是要注册的device,一般在注册之前会将这个device 结构体的成员变量填充好。
int ib_register_device(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
{
int ret;
struct ib_client *client;
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
struct device *parent = device->dev.parent;
WARN_ON_ONCE(device->dma_device);
#看这个设备是否提供dma操作的函数,这里dma操作分为一致性dma和流式dma
if (device->dev.dma_ops) {
/*
* The caller provided custom DMA operations. Copy the
* DMA-related fields that are used by e.g. dma_alloc_coherent()
* into device->dev.
*/
device->dma_device = &device->dev;
if (!device->dev.dma_mask) {
if (parent)
device->dev.dma_mask = parent->dma_mask;
else
WARN_ON_ONCE(true);
}
mutex_lock(&device_mutex);
#ib 设备申请name
if (strchr(device->name, '%')) {
ret = alloc_name(device->name);
if (ret)
goto out;
}
#检查device是否提供基础的roce功能
if (ib_device_check_mandatory(device)) {
ret = -EINVAL;
goto out;
}
#读取port和key
ret = read_port_immutable(device);
if (ret) {
pr_warn("Couldn't create per port immutable data %s\n",
device->name);
goto out;
}
ret = setup_port_pkey_list(device);
if (ret) {
pr_warn("Couldn't create per port_pkey_list\n");
goto out;
}
#为ib设备建立cache
ret = ib_cache_setup_one(device);
if (ret) {
pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
goto port_cleanup;
}
#注册cgroup
ret = ib_device_register_rdmacg(device);
if (ret) {
pr_warn("Couldn't register device with rdma cgroup\n");
goto cache_cleanup;
}
#查询设备的hw和attrs信息
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
pr_warn("Couldn't query the device attributes\n");
goto cg_cleanup;
}
#注册sysfs信息
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
pr_warn("Couldn't register device %s with driver model\n",
device->name);
goto cg_cleanup;
}
device->reg_state = IB_DEV_REGISTERED;
#增加context
list_for_each_entry(client, &client_list, list)
if (!add_client_context(device, client) && client->add)
client->add(device);
device->index = __dev_new_index();
down_write(&lists_rwsem);
#所有的roce设备都放在&device_list 中
list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem);
mutex_unlock(&device_mutex);
return 0;
}