roce驱动的路径在drivers/infiniband/hw/hns其入口函数在hns_roce_main.c 中
static struct platform_driver hns_roce_driver = {
.probe = hns_roce_probe,
.remove = hns_roce_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = hns_roce_of_match,
.acpi_match_table = ACPI_PTR(hns_roce_acpi_match),
},
};
module_platform_driver(hns_roce_driver);
其调用module_platform_driver 来初始化roce,以ACPI 为例,如果匹配hns_roce_acpi_match 中的id,则调用hns_roce_probe
static const struct acpi_device_id hns_roce_acpi_match[] = {
{ "HISI00D1", (kernel_ulong_t)&hns_roce_hw_v1 },
{},
};
static int hns_roce_probe(struct platform_device *pdev)
{
int ret;
struct hns_roce_dev *hr_dev;
struct device *dev = &pdev->dev;
//一个hns_roce_dev *hr_dev;结构并做基本的初始化,最后调用platform_set_drvdata(pdev, hr_dev);将hr_dev 设置为对应platform_device的drvdata.
hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev));
if (!hr_dev)
return -ENOMEM;
//清零hr_dev中除了ib_device的部分,因为这部分在调用ib_alloc_device 申请的时候已经初始化了
memset((u8 *)hr_dev + sizeof(struct ib_device), 0,
sizeof(struct hns_roce_dev) - sizeof(struct ib_device));
hr_dev->pdev = pdev;
platform_set_drvdata(pdev, hr_dev);
//调用dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) 设定dma_mask,这里其实没有必有加ULL,直接写64 就可以了
if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) &&
dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32ULL))) {
dev_err(dev, "Not usable DMA addressing mode\n");
ret = -EIO;
goto error_failed_get_cfg;
}
//得到acpi_device_id中的driver_data
ret = hns_roce_get_cfg(hr_dev);
if (ret) {
dev_err(dev, "Get Configuration failed!\n");
goto error_failed_get_cfg;
}
}
static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
{
int i;
int ret;
u8 phy_port;
int port_cnt = 0;
struct device *dev = &hr_dev->pdev->dev;
struct device_node *net_node;
struct net_device *netdev = NULL;
struct platform_device *pdev = NULL;
struct resource *res;
/* check if we are compatible with the underlying SoC */
//判断是通过dt还是acpi启动,如果dev_of_node返回非null,则是通过dt启动,反之如果is_acpi_device_node 返回非null则是ACPI启动.
if (dev_of_node(dev)) {
const struct of_device_id *of_id;
of_id = of_match_node(hns_roce_of_match, dev->of_node);
if (!of_id) {
dev_err(dev, "device is not compatible!\n");
return -ENXIO;
}
hr_dev->hw = (struct hns_roce_hw *)of_id->data;
if (!hr_dev->hw) {
dev_err(dev, "couldn't get H/W specific DT data!\n");
return -ENXIO;
}
} else if (is_acpi_device_node(dev->fwnode)) {
const struct acpi_device_id *acpi_id;
//根据匹配的id拿到driver_data
acpi_id = acpi_match_device(hns_roce_acpi_match, dev);
if (!acpi_id) {
dev_err(dev, "device is not compatible!\n");
return -ENXIO;
}
//将hr_dev->hw 指向acpi_id->driver_data。本例中的driver_data 就是hns_roce_hw_v1
hr_dev->hw = (struct hns_roce_hw *) acpi_id->driver_data;
if (!hr_dev->hw) {
dev_err(dev, "couldn't get H/W specific ACPI data!\n");
return -ENXIO;
}
} else {
dev_err(dev, "can't read compatibility data from DT or ACPI\n");
return -ENXIO;
}
/* get the mapped register base address */
//得到要map的io mem的基地址
res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
if (!res) {
dev_err(dev, "memory resource not found!\n");
return -EINVAL;
}
//map成虚拟地址
hr_dev->reg_base = devm_ioremap_resource(dev, res);
if (IS_ERR(hr_dev->reg_base))
return PTR_ERR(hr_dev->reg_base);
/* read the node_guid of IB device from the DT or ACPI */
//通过device_property_read_u8_array 的到node-guid,并把结果保存在hr_dev->ib_dev.node_guid,这个函数对dt和acpi都适用
ret = device_property_read_u8_array(dev, "node-guid",
(u8 *)&hr_dev->ib_dev.node_guid,
GUID_LEN);
if (ret) {
dev_err(dev, "couldn't get node_guid from DT or ACPI!\n");
return ret;
}
/* get the RoCE associated ethernet ports or netdevices */
for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) {
if (dev_of_node(dev)) {
net_node = of_parse_phandle(dev->of_node, "eth-handle",
i);
if (!net_node)
continue;
pdev = of_find_device_by_node(net_node);
} else if (is_acpi_device_node(dev->fwnode)) {
struct acpi_reference_args args;
struct fwnode_handle *fwnode;
//得到eth-handle,其类型是struct acpi_reference_args args;
ret = acpi_node_get_property_reference(dev->fwnode,
"eth-handle",
i, &args);
if (ret)
continue;
fwnode = acpi_fwnode_handle(args.adev);
//找到net_device 对应的platform_device
pdev = hns_roce_find_pdev(fwnode);
} else {
dev_err(dev, "cannot read data from DT or ACPI\n");
return -ENXIO;
}
if (pdev) {
//可见net_device 是最为platform_device的drvdata存在的
netdev = platform_get_drvdata(pdev);
phy_port = (u8)i;
//net_device 不为null的话,在hr_dev->iboe 中分别保存netdev和phy_port
if (netdev) {
hr_dev->iboe.netdevs[port_cnt] = netdev;
hr_dev->iboe.phy_port[port_cnt] = phy_port;
} else {
dev_err(dev, "no netdev found with pdev %s\n",
pdev->name);
return -ENODEV;
}
port_cnt++;
}
}
if (port_cnt == 0) {
dev_err(dev, "unable to get eth-handle for available ports!\n");
return -EINVAL;
}
hr_dev->caps.num_ports = port_cnt;
/* cmd issue mode: 0 is poll, 1 is event */
hr_dev->cmd_mod = 1;
hr_dev->loop_idc = 0;
/* read the interrupt names from the DT or ACPI */
ret = device_property_read_string_array(dev, "interrupt-names",
hr_dev->irq_names,
HNS_ROCE_MAX_IRQ_NUM);
if (ret < 0) {
dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n");
return ret;
}
//得到irq number
/* fetch the interrupt numbers */
for (i = 0; i < HNS_ROCE_MAX_IRQ_NUM; i++) {
hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i);
if (hr_dev->irq[i] <= 0) {
dev_err(dev, "platform get of irq[=%d] failed!\n", i);
return -EINVAL;
}
}
return 0;
}
我们看看device_property_read_u8_array的实现
/* read the node_guid of IB device from the DT or ACPI */
ret = device_property_read_u8_array(dev, "node-guid",
(u8 *)&hr_dev->ib_dev.node_guid,
GUID_LEN);
int device_property_read_u8_array(struct device *dev, const char *propname,
u8 *val, size_t nval)
{
return fwnode_property_read_u8_array(dev_fwnode(dev), propname, val, nval);
}
可见device_property_read_u8_array 对dt和acpi都适用,因为这里会调用dev_fwnode(dev)
static inline struct fwnode_handle *dev_fwnode(struct device *dev)
{
return IS_ENABLED(CONFIG_OF) && dev->of_node ?
&dev->of_node->fwnode : dev->fwnode;
}
可以看到无论如何都会返回正确的fwnode
总的来说本例中device_property_read_u8_array 就是读取"node-guid"的值
linux-81uf:/sys/class/infiniband/hns_0 # cat node_guid
009a:cd00:0001:0203
static struct platform_driver hns_roce_driver = {
.probe = hns_roce_probe,
.remove = hns_roce_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = hns_roce_of_match,
.acpi_match_table = ACPI_PTR(hns_roce_acpi_match),
},
};
module_platform_driver(hns_roce_driver);
其调用module_platform_driver 来初始化roce,以ACPI 为例,如果匹配hns_roce_acpi_match 中的id,则调用hns_roce_probe
static const struct acpi_device_id hns_roce_acpi_match[] = {
{ "HISI00D1", (kernel_ulong_t)&hns_roce_hw_v1 },
{},
};
static int hns_roce_probe(struct platform_device *pdev)
{
int ret;
struct hns_roce_dev *hr_dev;
struct device *dev = &pdev->dev;
//一个hns_roce_dev *hr_dev;结构并做基本的初始化,最后调用platform_set_drvdata(pdev, hr_dev);将hr_dev 设置为对应platform_device的drvdata.
hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev));
if (!hr_dev)
return -ENOMEM;
//清零hr_dev中除了ib_device的部分,因为这部分在调用ib_alloc_device 申请的时候已经初始化了
memset((u8 *)hr_dev + sizeof(struct ib_device), 0,
sizeof(struct hns_roce_dev) - sizeof(struct ib_device));
hr_dev->pdev = pdev;
platform_set_drvdata(pdev, hr_dev);
//调用dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) 设定dma_mask,这里其实没有必有加ULL,直接写64 就可以了
if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) &&
dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32ULL))) {
dev_err(dev, "Not usable DMA addressing mode\n");
ret = -EIO;
goto error_failed_get_cfg;
}
//得到acpi_device_id中的driver_data
ret = hns_roce_get_cfg(hr_dev);
if (ret) {
dev_err(dev, "Get Configuration failed!\n");
goto error_failed_get_cfg;
}
}
static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
{
int i;
int ret;
u8 phy_port;
int port_cnt = 0;
struct device *dev = &hr_dev->pdev->dev;
struct device_node *net_node;
struct net_device *netdev = NULL;
struct platform_device *pdev = NULL;
struct resource *res;
/* check if we are compatible with the underlying SoC */
//判断是通过dt还是acpi启动,如果dev_of_node返回非null,则是通过dt启动,反之如果is_acpi_device_node 返回非null则是ACPI启动.
if (dev_of_node(dev)) {
const struct of_device_id *of_id;
of_id = of_match_node(hns_roce_of_match, dev->of_node);
if (!of_id) {
dev_err(dev, "device is not compatible!\n");
return -ENXIO;
}
hr_dev->hw = (struct hns_roce_hw *)of_id->data;
if (!hr_dev->hw) {
dev_err(dev, "couldn't get H/W specific DT data!\n");
return -ENXIO;
}
} else if (is_acpi_device_node(dev->fwnode)) {
const struct acpi_device_id *acpi_id;
//根据匹配的id拿到driver_data
acpi_id = acpi_match_device(hns_roce_acpi_match, dev);
if (!acpi_id) {
dev_err(dev, "device is not compatible!\n");
return -ENXIO;
}
//将hr_dev->hw 指向acpi_id->driver_data。本例中的driver_data 就是hns_roce_hw_v1
hr_dev->hw = (struct hns_roce_hw *) acpi_id->driver_data;
if (!hr_dev->hw) {
dev_err(dev, "couldn't get H/W specific ACPI data!\n");
return -ENXIO;
}
} else {
dev_err(dev, "can't read compatibility data from DT or ACPI\n");
return -ENXIO;
}
/* get the mapped register base address */
//得到要map的io mem的基地址
res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
if (!res) {
dev_err(dev, "memory resource not found!\n");
return -EINVAL;
}
//map成虚拟地址
hr_dev->reg_base = devm_ioremap_resource(dev, res);
if (IS_ERR(hr_dev->reg_base))
return PTR_ERR(hr_dev->reg_base);
/* read the node_guid of IB device from the DT or ACPI */
//通过device_property_read_u8_array 的到node-guid,并把结果保存在hr_dev->ib_dev.node_guid,这个函数对dt和acpi都适用
ret = device_property_read_u8_array(dev, "node-guid",
(u8 *)&hr_dev->ib_dev.node_guid,
GUID_LEN);
if (ret) {
dev_err(dev, "couldn't get node_guid from DT or ACPI!\n");
return ret;
}
/* get the RoCE associated ethernet ports or netdevices */
for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) {
if (dev_of_node(dev)) {
net_node = of_parse_phandle(dev->of_node, "eth-handle",
i);
if (!net_node)
continue;
pdev = of_find_device_by_node(net_node);
} else if (is_acpi_device_node(dev->fwnode)) {
struct acpi_reference_args args;
struct fwnode_handle *fwnode;
//得到eth-handle,其类型是struct acpi_reference_args args;
ret = acpi_node_get_property_reference(dev->fwnode,
"eth-handle",
i, &args);
if (ret)
continue;
fwnode = acpi_fwnode_handle(args.adev);
//找到net_device 对应的platform_device
pdev = hns_roce_find_pdev(fwnode);
} else {
dev_err(dev, "cannot read data from DT or ACPI\n");
return -ENXIO;
}
if (pdev) {
//可见net_device 是最为platform_device的drvdata存在的
netdev = platform_get_drvdata(pdev);
phy_port = (u8)i;
//net_device 不为null的话,在hr_dev->iboe 中分别保存netdev和phy_port
if (netdev) {
hr_dev->iboe.netdevs[port_cnt] = netdev;
hr_dev->iboe.phy_port[port_cnt] = phy_port;
} else {
dev_err(dev, "no netdev found with pdev %s\n",
pdev->name);
return -ENODEV;
}
port_cnt++;
}
}
if (port_cnt == 0) {
dev_err(dev, "unable to get eth-handle for available ports!\n");
return -EINVAL;
}
hr_dev->caps.num_ports = port_cnt;
/* cmd issue mode: 0 is poll, 1 is event */
hr_dev->cmd_mod = 1;
hr_dev->loop_idc = 0;
/* read the interrupt names from the DT or ACPI */
ret = device_property_read_string_array(dev, "interrupt-names",
hr_dev->irq_names,
HNS_ROCE_MAX_IRQ_NUM);
if (ret < 0) {
dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n");
return ret;
}
//得到irq number
/* fetch the interrupt numbers */
for (i = 0; i < HNS_ROCE_MAX_IRQ_NUM; i++) {
hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i);
if (hr_dev->irq[i] <= 0) {
dev_err(dev, "platform get of irq[=%d] failed!\n", i);
return -EINVAL;
}
}
return 0;
}
我们看看device_property_read_u8_array的实现
/* read the node_guid of IB device from the DT or ACPI */
ret = device_property_read_u8_array(dev, "node-guid",
(u8 *)&hr_dev->ib_dev.node_guid,
GUID_LEN);
int device_property_read_u8_array(struct device *dev, const char *propname,
u8 *val, size_t nval)
{
return fwnode_property_read_u8_array(dev_fwnode(dev), propname, val, nval);
}
可见device_property_read_u8_array 对dt和acpi都适用,因为这里会调用dev_fwnode(dev)
static inline struct fwnode_handle *dev_fwnode(struct device *dev)
{
return IS_ENABLED(CONFIG_OF) && dev->of_node ?
&dev->of_node->fwnode : dev->fwnode;
}
可以看到无论如何都会返回正确的fwnode
总的来说本例中device_property_read_u8_array 就是读取"node-guid"的值
linux-81uf:/sys/class/infiniband/hns_0 # cat node_guid
009a:cd00:0001:0203