dpdk架构学习-pci设备与驱动

1、设备的驱动注册 (用户态驱动,主动注册驱动)

static struct rte_driver pmd_igb_drv = {
	.type = PMD_PDEV,
	.init = rte_igb_pmd_init,
};
#define PMD_REGISTER_DRIVER(d)\
void devinitfn_ ##d(void);\
void __attribute__((constructor, used)) devinitfn_ ##d(void)\
{\
	rte_eal_driver_register(&d);\
}
/* register a driver */
void
rte_eal_driver_register(struct rte_driver *driver)
{
	TAILQ_INSERT_TAIL(&dev_driver_list, driver, next);//将驱动挂在链表上
}
PMD_REGISTER_DRIVER(pmd_igb_drv);

调用流程:
rte_eal_init->rte_eal_dev_init->driver.init(for循环初始化dev_driver_list)。

driver.init: 调用如下(以igb_ethdev举例)

static struct eth_driver rte_igb_pmd = {
	{
		.name = "rte_igb_pmd",
		.id_table = pci_id_igb_map,
		.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
	},
	.eth_dev_init = eth_igb_dev_init,
	.dev_private_size = sizeof(struct e1000_adapter),
};

driver.init就是rte_igb_pmd_init函数
static int
rte_igb_pmd_init(const char *name __rte_unused, const char *params __rte_unused)
{
	rte_eth_driver_register(&rte_igb_pmd);
	return 0;
}

static int
rte_igb_pmd_init(const char *name __rte_unused, const char *params __rte_unused)
{
	rte_eth_driver_register(&rte_igb_pmd);
	return 0;
}
void
rte_eth_driver_register(struct eth_driver *eth_drv)
{
	eth_drv->pci_drv.devinit = rte_eth_dev_init;
	rte_eal_pci_register(&eth_drv->pci_drv);
}

/* register a driver */
void
rte_eal_pci_register(struct rte_pci_driver *driver)
{
	TAILQ_INSERT_TAIL(&pci_driver_list, driver, next);
}

即通过  PMD_REGISTER_DRIVER 注册 struct rte_driver驱动, 再通过rte_eal_init->rte_eal_dev_init->driver.init(for循环初始化dev_driver_list)注册  struct eth_driver,再注册 rte_pci_driver.

struct rte_driver 作为一个引线,为了引出 struct eth_driver。

数据结构层次如下:
struct eth_driver {
	struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
	eth_dev_init_t eth_dev_init;      /**< Device init function. */
	unsigned int dev_private_size;    /**< Size of device private data. */
};

struct rte_pci_driver {
	TAILQ_ENTRY(rte_pci_driver) next;       /**< Next in list. */
	const char *name;                       /**< Driver name. */
	pci_devinit_t *devinit;                 /**< Device init. function. */
	struct rte_pci_id *id_table;            /**< ID table, NULL terminated. */
	uint32_t drv_flags;                     /**< Flags contolling handling of device. */
};
其中struct rte_pci_id定义如下,为了记录pci设备信息:
struct rte_pci_id {
	uint16_t vendor_id;           /**< Vendor ID or PCI_ANY_ID. */
	uint16_t device_id;           /**< Device ID or PCI_ANY_ID. */
	uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
	uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */
};

对外操作统一呈现为 struct rte_pci_driver 结构进行操作,屏蔽差异,实现统一化(使用C语言实现了c++的继承)

2、设备注册
调用流程:
rte_eal_init->rte_eal_pci_init->pci_scan->pci_scan_one

/*
	1、扫描一个pci sysfs条目,并从中填充设备列表,读取出pci设备的 domain,bus, devid, function 信息
	2、通过 pci_scan_one 函数将设备加入设备链表中
	3、TAILQ_INSERT_TAIL(&pci_device_list, dev, next);加入设备链表
*/
static int pci_scan(void)
{
	struct dirent *e;
	DIR *dir;
	char dirname[PATH_MAX];
	uint16_t domain;
	uint8_t bus, devid, function;
#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
	dir = opendir(SYSFS_PCI_DEVICES);
	if (dir == NULL) {
		RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
			__func__, strerror(errno));
		return -1;
	}

	while ((e = readdir(dir)) != NULL) {
		if (e->d_name[0] == '.')
			continue;

		if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain,
				&bus, &devid, &function) != 0)
			continue;

		snprintf(dirname, sizeof(dirname), "%s/%s", SYSFS_PCI_DEVICES,
			 e->d_name);
		if (pci_scan_one(dirname, domain, bus, devid, function) < 0)
			goto error;
	}
	closedir(dir);
	return 0;

error:
	closedir(dir);
	return -1;
}

static int
pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
	     uint8_t devid, uint8_t function)
{
	char filename[PATH_MAX];
	unsigned long tmp;
	struct rte_pci_device *dev;

	dev = malloc(sizeof(*dev));
	if (dev == NULL) {
		return -1;
	}

	memset(dev, 0, sizeof(*dev));
	dev->addr.domain = domain;
	dev->addr.bus = bus;
	dev->addr.devid = devid;
	dev->addr.function = function;

	/* get vendor id */
	snprintf(filename, sizeof(filename), "%s/vendor", dirname);
	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
		free(dev);
		return -1;
	}
	dev->id.vendor_id = (uint16_t)tmp;

	/* get device id */
	snprintf(filename, sizeof(filename), "%s/device", dirname);
	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
		free(dev);
		return -1;
	}
	dev->id.device_id = (uint16_t)tmp;

	/* get subsystem_vendor id */
	snprintf(filename, sizeof(filename), "%s/subsystem_vendor",
		 dirname);
	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
		free(dev);
		return -1;
	}
	dev->id.subsystem_vendor_id = (uint16_t)tmp;

	/* get subsystem_device id */
	snprintf(filename, sizeof(filename), "%s/subsystem_device",
		 dirname);
	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
		free(dev);
		return -1;
	}
	dev->id.subsystem_device_id = (uint16_t)tmp;

	/* get max_vfs */
	dev->max_vfs = 0;
	snprintf(filename, sizeof(filename), "%s/max_vfs", dirname);
	if (!access(filename, F_OK) &&
	    eal_parse_sysfs_value(filename, &tmp) == 0) {
		dev->max_vfs = (uint16_t)tmp;
	}

	/* get numa node */
	snprintf(filename, sizeof(filename), "%s/numa_node",
		 dirname);
	if (access(filename, R_OK) != 0) {
		/* if no NUMA support just set node to -1 */
		dev->numa_node = -1;
	} else {
		if (eal_parse_sysfs_value(filename, &tmp) < 0) {
			free(dev);
			return -1;
		}
		dev->numa_node = tmp;
	}

	/* parse resources */
	snprintf(filename, sizeof(filename), "%s/resource", dirname);
	if (pci_parse_sysfs_resource(filename, dev) < 0) {
		RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__);
		free(dev);
		return -1;
	}

	/* device is valid, add in list (sorted) */
	if (TAILQ_EMPTY(&pci_device_list)) {
		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
	}
	else {
		struct rte_pci_device *dev2 = NULL;

		TAILQ_FOREACH(dev2, &pci_device_list, next) {
			if (pci_addr_comparison(&dev->addr, &dev2->addr))
				continue;
			else {
				TAILQ_INSERT_BEFORE(dev2, dev, next);
				return 0;
			}
		}
		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
	}
	return 0;
}

简单总:
    利用linux 内核PCI的扫描结果,读取这些pci设备的信息到用户态来,然后记录这些设备信息,形成用户态的设备信息,实现用户态的pci设备。

函数pci_parse_sysfs_resource 会将pci设备的地址空间(bar空间)地址记录下来。

static int
pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
{
	FILE *f;
	char buf[BUFSIZ];
	union pci_resource_info {
		struct {
			char *phys_addr;
			char *end_addr;
			char *flags;
		};
		char *ptrs[PCI_RESOURCE_FMT_NVAL];
	} res_info;
	int i;
	uint64_t phys_addr, end_addr, flags;

	f = fopen(filename, "r");
	if (f == NULL) {
		RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n");
		return -1;
	}

	for (i = 0; i<PCI_MAX_RESOURCE; i++) {

		if (fgets(buf, sizeof(buf), f) == NULL) {
			RTE_LOG(ERR, EAL,
				"%s(): cannot read resource\n", __func__);
			goto error;
		}

		if (rte_strsplit(buf, sizeof(buf), res_info.ptrs, 3, ' ') != 3) {
			RTE_LOG(ERR, EAL,
				"%s(): bad resource format\n", __func__);
			goto error;
		}
		errno = 0;
		//字符串处理,拿到 PCI BAR 起始地址、PCI BAR 结束地址、PCI BAR 标志
		phys_addr = strtoull(res_info.phys_addr, NULL, 16);
		end_addr = strtoull(res_info.end_addr, NULL, 16);
		flags = strtoull(res_info.flags, NULL, 16);
		if (errno != 0) {
			RTE_LOG(ERR, EAL,
				"%s(): bad resource format\n", __func__);
			goto error;
		}

		if (flags & IORESOURCE_MEM) {
			dev->mem_resource[i].phys_addr = phys_addr;
			dev->mem_resource[i].len = end_addr - phys_addr + 1;
			/* not mapped for now */
			dev->mem_resource[i].addr = NULL; /* 这里还为完成,在匹配的时候完成 */
		}
	}
	fclose(f);
	return 0;

error:
	fclose(f);
	return -1;
}
cat /sys/bus/pci/devices/0000:00:08.0/resource
0x0000000000001000 0x000000000000103f 0x0000000000040101
0x00000000c0040000 0x00000000c0040fff 0x0000000000040200
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000440000000 0x0000000440003fff 0x000000000014220c
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x00000000c0000000 0x00000000c003ffff 0x000000000004e200
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x00000000000000006 行为 PCI 设备的 6 个 BAR,还是以 Intel 82599 为例,前两个 BAR 为 Memory BAR,中间两个 BAR 为 IO BAR,最后两个 BAR 为 MSI-X BAR。其中,每个 BAR 又分为 3 列:

第 1 列为 PCI BAR 的起始地址
第 2 列为 PCI BAR 的终止地址
第 3 列为 PCI BAR 的标识

3、设备与驱动绑定
调用流程:
rte_eal_init->rte_eal_pci_probe->pci_probe_all_drivers->rte_eal_pci_probe_one_driver
–pci_map_device
–dr->devinit(dr, dev);

/*
里面的设备有白名单和黑名单之分,暂不研究。
1、设备与驱动进行匹配
2、如果有uio_dev,需要将设备内存空间mmap到用户态来(pci_map_device)

*/

在函数pci_map_device里面,做了如下事情:
1、查找pci设备是否创建了uio设备
2、将/sys/bus/pci/devices/xxxx.xx.xx.x/uio/uiox 设备节点重新mknod 为 /dev/uiox
3、定义一个struct mapped_pci_resource *uio_res, 打开/dev/uiox 获取pci设备的物理信息等信息,然后与设备里面的物理信息等比较,看是否匹配。如果匹配就进行mmap操作,并且TAILQ_INSERT_TAIL(pci_res_list, uio_res, next);将uio_res 信息加入队列。
完成 设备注册 没有完成的操作 dev->mem_resource[i].addr = mapaddr;。
4、最后初始化设备dr->devinit(dr, dev),及执行函数 rte_eth_dev_init

例如:
在函数pci_get_uio_dev 将 /sys/bus/pci/devices/0000:01:00.0/uio/uio1 通过函数 pci_mknod_uio_dev 创建/dev/uio1设备,常见/dev/uio1设备的流程是:
先获取 /sys/bus/pci/devices/0000:01:00.0/uio/uio1/dev 的主设备和从设备号,然后dev = makedev(major,minor),再mknod(“dev/uio1”, S_IFCHR | S_IRUSR | S_IWUSR, dev);
[ 其中/sys/bus/pci/devices/0000:01:00.0/uio/uio1 就是/sys/class/uio/uio1]

static int
rte_eth_dev_init(struct rte_pci_driver *pci_drv,
		 struct rte_pci_device *pci_dev)
{
	struct eth_driver    *eth_drv;
	struct rte_eth_dev *eth_dev;
	char ethdev_name[RTE_ETH_NAME_MAX_LEN];

	int diag;

	eth_drv = (struct eth_driver *)pci_drv;  //这里就是前面说的继承
 
	/* Create unique Ethernet device name using PCI address */
	snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%d:%d.%d",
			pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);

	eth_dev = rte_eth_dev_allocate(ethdev_name);
	if (eth_dev == NULL)
		return -ENOMEM;

	if (rte_eal_process_type() == RTE_PROC_PRIMARY){
		eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
				  eth_drv->dev_private_size,
				  RTE_CACHE_LINE_SIZE);
		if (eth_dev->data->dev_private == NULL)
			rte_panic("Cannot allocate memzone for private port data\n");
	}
	eth_dev->pci_dev = pci_dev;
	eth_dev->driver = eth_drv;
	eth_dev->data->rx_mbuf_alloc_failed = 0;

	/* init user callbacks */
	TAILQ_INIT(&(eth_dev->callbacks));

	/*
	 * Set the default MTU.
	 */
	eth_dev->data->mtu = ETHER_MTU;

	/* Invoke PMD device initialization function */
	diag = (*eth_drv->eth_dev_init)(eth_drv, eth_dev);  //这里eth_igb_dev_init
	if (diag == 0)
		return (0);

	PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%u device_id=0x%x)"
			" failed\n", pci_drv->name,
			(unsigned) pci_dev->id.vendor_id,
			(unsigned) pci_dev->id.device_id);
	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
		rte_free(eth_dev->data->dev_private);
	nb_ports--;
	return diag;
}

感觉从设备的注册,驱动的注册,到真正的执行驱动初始化,绕了很大的一个圈,特别是:

rte_eal_pci_probe_one_driver
->
dr->devinit(即eth_drv->pci_drv.devinit = rte_eth_dev_init;->
(*eth_drv->eth_dev_init)(eth_drv, eth_dev);
(static struct eth_driver rte_igb_pmd = {
	{
		.name = "rte_igb_pmd",
		.id_table = pci_id_igb_map,
		.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
	},
	.eth_dev_init = eth_igb_dev_init,//这个
	.dev_private_size = sizeof(struct e1000_adapter),
};)

static int
rte_eth_dev_init(struct rte_pci_driver *pci_drv,
		 struct rte_pci_device *pci_dev)
{
	struct eth_driver    *eth_drv;
	struct rte_eth_dev *eth_dev;
	char ethdev_name[RTE_ETH_NAME_MAX_LEN];

	int diag;

	eth_drv = (struct eth_driver *)pci_drv;

	/* Create unique Ethernet device name using PCI address */
	snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%d:%d.%d",
			pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);

	eth_dev = rte_eth_dev_allocate(ethdev_name);   //申请dev
	if (eth_dev == NULL)
		return -ENOMEM;

	if (rte_eal_process_type() == RTE_PROC_PRIMARY){
		eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
				  eth_drv->dev_private_size,
				  RTE_CACHE_LINE_SIZE);
		if (eth_dev->data->dev_private == NULL)
			rte_panic("Cannot allocate memzone for private port data\n");
	}
	eth_dev->pci_dev = pci_dev;
	eth_dev->driver = eth_drv;
	eth_dev->data->rx_mbuf_alloc_failed = 0;

	/* init user callbacks */
	TAILQ_INIT(&(eth_dev->callbacks));

	/*
	 * Set the default MTU.
	 */
	eth_dev->data->mtu = ETHER_MTU;

	/* Invoke PMD device initialization function */
	diag = (*eth_drv->eth_dev_init)(eth_drv, eth_dev);
	if (diag == 0)
		return (0);

	PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%u device_id=0x%x)"
			" failed\n", pci_drv->name,
			(unsigned) pci_dev->id.vendor_id,
			(unsigned) pci_dev->id.device_id);
	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
		rte_free(eth_dev->data->dev_private);
	nb_ports--;
	return diag;
}

struct rte_eth_dev *
rte_eth_dev_allocate(const char *name)
{
	struct rte_eth_dev *eth_dev;

	if (nb_ports == RTE_MAX_ETHPORTS) {
		PMD_DEBUG_TRACE("Reached maximum number of Ethernet ports\n");
		return NULL;
	}

	if (rte_eth_dev_data == NULL)
		rte_eth_dev_data_alloc();

	if (rte_eth_dev_allocated(name) != NULL) {
		PMD_DEBUG_TRACE("Ethernet Device with name %s already allocated!\n", name);
		return NULL;
	}

	eth_dev = &rte_eth_devices[nb_ports];  //从数组里面拿
	eth_dev->data = &rte_eth_dev_data[nb_ports];
	snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name);
	eth_dev->data->port_id = nb_ports++;  //表示设备号
	return eth_dev;
}

我的版本是18.04的,新一点的20.02版本的驱动注册就没有那么绕,直接是使用宏

RTE_PMD_REGISTER_PCI(net_ixgbe, rte_ixgbe_pmd);进行注册
#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \
 {\
    pci_drv.driver.name = nm;
    rte_pci_register(&pci_drv); \
}
void rte_pci_register(struct rte_pci_driver *driver)
{
    TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next);
    driver->bus = &rte_pci_bus;
}

新版的sdk引入了bus的概念。
struct rte_pci_bus rte_pci_bus = {
.bus = {
.scan = rte_pci_scan, /* 扫描pci设备,加入设备链表(实现原理和老版本一样) /
.probe = rte_pci_proble, /
设备与driver进行匹配 */


},
}

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值