spdk-20.10 io_channel 和 轮询 group的机制分析

20.10轮询模式分析以及struct nvme_bdev_poll_group的具体作用

bdev初始化的过程中在nvme子系统:
	spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_poll_group_create_cb,
				bdev_nvme_poll_group_destroy_cb,
				sizeof(struct nvme_bdev_poll_group),  "bdev_nvme_poll_groups");
				
	说明这个group是从属于g_nvme_bdev_ctrlrs这样一个iodevice的。
	目前在spdk中存在多个group:bdev_aio_group_channel、nvme_bdev_poll_group、bdev_rbd_group_channel、bdev_uring_group_channel
	这个group把不同类型的块设备进行了一个分类。
	
	目前我们更关心的是nvme_bdev_poll_group:
	他的定义:
	
struct nvme_bdev_poll_group {
	struct spdk_nvme_poll_group		*group;
	struct spdk_poller			*poller;
	bool					collect_spin_stat;
	uint64_t				spin_ticks;
	uint64_t				start_ticks;
	uint64_t				end_ticks;
#ifdef	SPDK_CONFIG_APP_RW
	uint64_t				save_start_ticks;
	uint64_t				num_poll_timeout;
#endif
};

poller用来专门poll这个group,在bdev_nvme_poll_group_create_cb中注册的,我们使用的是非时延的poller
bdev_nvme_create_cb调用pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);来从注册的io_device中创建属于线程的spdk_io_channel
每个io_device在每个spdk的thread中只可以有一个spdk_io_channel,第一次如果没有,则创建一个,ch->dev = dev,通过dev判断是否已经创建。
复用thread内部的spdk_io_channel话,ch->ref++,直接返回该ch
ch穿件过程中ch = calloc(1, sizeof(*ch) + dev->ctx_size); 其中dev->ctx_size就是注册的下层channel,不同的io_device注册会有不同的下层channel

在bdev层和nvme层注册的io_device有:
	spdk_io_device_register(&g_bdev_mgr, bdev_mgmt_channel_create,
				bdev_mgmt_channel_destroy,
				sizeof(struct spdk_bdev_mgmt_channel),
				"bdev_mgr");
	注册了一个类型为g_bdev_mgr,名为bdev_mgr的通用块层管理io_device,用于管理bdev设备
	
	
	spdk_io_device_register(__bdev_to_io_dev(bdev),
				bdev_channel_create, bdev_channel_destroy,
				sizeof(struct spdk_bdev_channel),
				bdev_name);
	注册了一个类型为__bdev_to_io_dev(bdev),名为bdev_name的spdk_bdev_channel,用于下发读写等io
	
	spdk_io_device_register(nvme_bdev_ctrlr, bdev_nvme_create_cb, bdev_nvme_destroy_cb,
				sizeof(struct nvme_io_channel),
				name);
	注册了一个名为name的,为nvme_bdev_ctrlr类型的 io_device,用于nvme层的io操作
-------------------------------------------------------------------------------------------------------
spdk_io_channel的结构:
当调用spdk_get_io_channel时会创建并返回一个spdk_io_channel,它是属于每个spdk_thread内部的资源,外部访问


spdk_get_io_channel(void *io_device) 入参为io_device
1.遍历g_io_devices,如果没有找到直接退出
2.thread = _get_thread(); 必须为spdk_thread,且thread的状态不能是exit,否则退出
3.TAILQ_FOREACH(ch, &thread->io_channels, tailq) { 现在本地查找,如果找到,直接复用,ref++
4.创建一个新的ch = calloc(1, sizeof(*ch) + dev->ctx_size);				ctx_size为注册时传入
5.rc = dev->create_cb(io_device, (uint8_t *)ch + sizeof(*ch));			创建下属channel


void
spdk_io_device_register(void *io_device, spdk_io_channel_create_cb create_cb,
			spdk_io_channel_destroy_cb destroy_cb, uint32_t ctx_size,
			const char *name)
入参为:io_device、create_cb、delete_cb、ctx_size、name

1.thread = spdk_get_thread();     					必须在spdk_thread环境下执行
2.TAILQ_FOREACH(tmp, &g_io_devices, tailq) {		在g_io_devices中搜索,可以复用的设备不用重新register,防止重复
3.dev = calloc(1, sizeof(struct io_device));		创建一个dev,赋值io_device、create_cb、delete_cb等关键信息
4.TAILQ_INSERT_TAIL(&g_io_devices, dev, tailq);		插入dev到g_io_devices中进行保存


void
spdk_put_io_channel(struct spdk_io_channel *ch)		入参为spdk_io_channel
1.thread = spdk_get_thread();						必须在spdk_thread环境下执行
2.if (ch->thread != thread) {						必须让同线程处理
3.ch->ref--; 										引用减一
4.if (ch->ref == 0) {
rc = spdk_thread_send_msg(thread, put_io_channel, ch); 如果引用数为0,则直接销毁
----------------------------------------------------------------------------------------
主要的调用和实现流程为:

1.intspdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,		打开块设备,desc = calloc(1, sizeof(*desc));分配了一个desc
		   void *event_ctx, struct spdk_bdev_desc **_desc)

2.spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc)										入参为desc,创建io_channel资源
	spdk_get_io_channel(__bdev_to_io_dev(spdk_bdev_desc_get_bdev(desc)));					传入类型为	__bdev_to_io_dev(bdev)
		ch = calloc(1, sizeof(*ch) + dev->ctx_size);
		TAILQ_INSERT_TAIL(&thread->io_channels, ch, tailq);									ch插入thread->io_channels
		rc = dev->create_cb(io_device, (uint8_t *)ch + sizeof(*ch));
		bdev_channel_create
			ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt);
			bdev_nvme_get_io_channel(void *ctx)
				struct nvme_bdev *nvme_bdev = ctx;
					return spdk_get_io_channel(nvme_bdev->nvme_ns->ctrlr);					传入类型为struct nvme_bdev_ctrlr
								ch = calloc(1, sizeof(*ch) + dev->ctx_size);
								TAILQ_INSERT_TAIL(&thread->io_channels, ch, tailq);			ch插入thread->io_channels
								rc = dev->create_cb(io_device, (uint8_t *)ch + sizeof(*ch));
									bdev_nvme_create_cb
										ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts));   分配qpair
										pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);											传入了g_nvme_bdev_ctrlrs
											ch = calloc(
											TAILQ_INSERT_TAIL(&thread->io_channels, ch, tailq);			pg_ch插入thread->io_channels
											rc = dev->create_cb(
												bdev_nvme_poll_group_create_cb
													struct nvme_bdev_poll_group *group = ctx_buf;								从ctx_buf取出预先创建的nvme_bdev_poll_group
													group->group = spdk_nvme_poll_group_create(group);							
															struct spdk_nvme_poll_group *group;									创建了spdk_nvme_poll_group
															group = calloc(1, sizeof(*group));
															group->ctx = ctx;													把nvme_bdev_poll_group传入ctx
															STAILQ_INIT(&group->tgroups);										初始化一个spdk_nvme_transport_poll_group的队列
													group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us);  注册poller,bdev_nvme_poll为fn
										if (spdk_nvme_poll_group_add(ch->group->group, ch->qpair) != 0)							做qpair->poll_group = tgroup; qpair->poll_group_tailq_head = &tgroup->disconnected_qpairs;
										ch->group = spdk_io_channel_get_ctx(pg_ch);												把group传入到nvme_io_channel中存放
										rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_bdev_ctrlr->ctrlr, ch->qpair);
											rc = transport->ops.ctrlr_connect_qpair(ctrlr, qpair);									
												nvme_pcie_ctrlr_connect_qpair
													_nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qpair->id);					创建qpair的cq和sq
											nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);									pair状态置为NVME_QPAIR_CONNECTED
											rc = nvme_poll_group_connect_qpair(qpair);
												rc = tgroup->transport->ops.poll_group_connect_qpair(qpair);					
												qpair->poll_group_tailq_head = &tgroup->connected_qpairs;
												STAILQ_REMOVE(&tgroup->disconnected_qpairs, qpair, spdk_nvme_qpair, poll_group_stailq);
												STAILQ_INSERT_TAIL(&tgroup->connected_qpairs, qpair, poll_group_stailq);
										TAILQ_INIT(&ch->pending_resets); 	初始化reset队列
			
			mgmt_io_ch = spdk_get_io_channel(&g_bdev_mgr);
				ch = calloc(1, sizeof(*ch) + dev->ctx_size);
				TAILQ_INSERT_TAIL(&thread->io_channels, ch, tailq);									ch插入thread->io_channels
				rc = dev->create_cb(io_device, (uint8_t *)ch + sizeof(*ch));
					bdev_mgmt_channel_create														创建mgmt_ch,做TAILQ_INIT(&ch->shared_resources);...初始化队列资源
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------



块层初始化以及子系统初始化:
spdk_subsystem_init
	subsystem_sort					遍历g_subsystems_deps,如果没有找到则加进去,目标是在初始化一个subsystem时,必须先初始化他依赖的subsystem
									目前bdev依赖的有:accel、vmd、sock
	spdk_subsystem_init_next
		g_next_subsystem->init()	一个个初始化subsystem,直接跳到bdev,这是一个递归调用
			bdev_subsystem_initialize
				spdk_bdev_initialize(bdev_initialize_complete, NULL);
					g_bdev_mgr.bdev_io_pool = spdk_mempool_create   创建3个pool
					g_bdev_mgr.buf_small_pool =
					g_bdev_mgr.buf_large_pool = 
					g_bdev_mgr.zero_buffer = 						创建一个buffer
					spdk_io_device_register(&g_bdev_mgr				注册g_bdev_mgr为io_device,用于管理,传入size为spdk_bdev_mgmt_channel
					rc = bdev_modules_init();						初始化下属module
						rc = module->module_init();					bdev下属module有:aio、iscsi、malloc、nvme、ocssd、uring等... 大概接近30个
							bdev_nvme_library_init					只关心nvme
							g_bdev_nvme_init_thread = spdk_get_thread();		这里读取初始化的线程指针
							spdk_io_device_register(&g_nvme_bdev_ctrlrs,		注册g_nvme_bdev_ctrlrs类型的io_device,把nvme_bdev_poll_group传入为size
					bdev_module_action_complete();
							


----------------------------------------------------------------------------------------------------------------

块设备的资源分配和注册:
spdk_bdev_register 函数完成对块设备的注册


rc = spdk_bdev_register(&bdev->disk);
	int rc = bdev_init(bdev);
			spdk_bdev_get_by_name(bdev->name)							检查bdev是否已经被注册,如果已经存在,return -EEXIST;
			bdev_name = spdk_sprintf_alloc("bdev_%s", bdev->name);		给bdev生成一个格式化的name,bdev_%s
			
	bdev_start(bdev);






在ctrlr的init过程完毕后,ctrlr->state == NVME_CTRLR_STATE_READY 执行的attach流程
probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);

attach_cb
	prchk_flags = ctx->prchk_flags[i];
	name = strdup(ctx->names[i]);				在nvme_probe_ctx不为空的情况下,进行name和prchk_flags传入
	nvme_bdev_ctrlr_create(ctrlr, name, trid, prchk_flags);				开始创建nvme_bdev_ctrlr
			nvme_bdev_ctrlr = calloc(1, sizeof(*nvme_bdev_ctrlr));
			nvme_bdev_ctrlr->num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
			nvme_bdev_ctrlr->namespaces = calloc(nvme_bdev_ctrlr->num_ns, sizeof(struct nvme_bdev_ns *));			支持创建多个namespace,分配的是二维数组的资源
			trid_entry = calloc(1, sizeof(*trid_entry));															创建了一个struct nvme_bdev_ctrlr_trid
			trid_entry->trid = *trid;
			nvme_bdev_ctrlr->namespaces[i] = calloc(1, sizeof(struct nvme_bdev_ns));								为每一个namespace分配资源
			nvme_bdev_ctrlr->thread = spdk_get_thread();															nvme_bdev_ctrlr属于这个thread
			nvme_bdev_ctrlr->ctrlr = ctrlr;																			关联到spdk_nvme_ctrlr
			nvme_bdev_ctrlr->name = strdup(name);
			nvme_bdev_ctrlr->prchk_flags = prchk_flags;																传入name和prchk_flags
			spdk_io_device_register(nvme_bdev_ctrlr, bdev_nvme_create_cb, bdev_nvme_destroy_cb,						注册io_device
				sizeof(struct nvme_io_channel),
				name);
			TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nvme_bdev_ctrlr, tailq);											用g_nvme_bdev_ctrlrs来管理nvme_bdev_ctrlr
	nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL);
			nvme_ctrlr_populate_namespace(nvme_bdev_ctrlr, ns, ctx);
					nvme_ctrlr_populate_standard_namespace
						bdev = calloc(1, sizeof(*bdev));
						bdev->disk.name = spdk_sprintf_alloc("%sn%d", nvme_bdev_ctrlr->name, spdk_nvme_ns_get_id(ns));
						bdev->disk.ctxt = bdev;
						rc = spdk_bdev_register(&bdev->disk);
								int rc = bdev_init(bdev);
									spdk_bdev_get_by_name(bdev->name)							检查bdev是否已经被注册,如果已经存在,return -EEXIST;
									bdev_name = spdk_sprintf_alloc("bdev_%s", bdev->name);		给bdev生成一个格式化的name,bdev_%s
									bdev->internal.status = SPDK_BDEV_STATUS_READY;
									spdk_io_device_register(__bdev_to_io_dev(bdev),
										bdev_channel_create, bdev_channel_destroy,
										sizeof(struct spdk_bdev_channel),
										bdev_name);
								bdev_start(bdev);
									TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, internal.link);
									bdev_examine(bdev);
						nvme_bdev_attach_bdev_to_ns(nvme_ns, bdev);
							nvme_ns->ctrlr->ref++;
							TAILQ_INSERT_TAIL(&nvme_ns->bdevs, nvme_disk, tailq);
						nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, 0);
							ns->populated = true;
								ns->ctrlr->ref++;
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
<think>嗯,用户想通过dpkg安装特定版本的containerd.io_1.4.6-1_amd64.deb文件,但之前遇到了依赖问题。我需要先检查他们是否已经正确下载了该文件。用户可能没有提到是否已经下载了这个deb包,所以第一步应该确认文件存在。 接下来,用户之前的错误信息显示依赖问题,可能需要先手动安装依赖。通常,使用dpkg安装时,依赖不会自动处理,所以需要运行apt-get install -f来修复依赖。但用户可能已经尝试过,但问题依旧,可能需要手动下载所需的依赖包。 然后,用户可能需要按照正确的顺序安装containerd.io、docker-ce-clidocker-ce,就像引用[1]中提到的例子。顺序很重要,因为docker-ce可能依赖其他两个包。所以应该分步骤安装每个deb文件,确保每个都成功后再继续。 另外,用户可能是在Ubuntu系统上安装,根据引用[3]提到的架构是amd64,所以需要确认系统架构是否匹配。如果用户下载的是amd64的包,而系统是其他架构,就会出错。不过用户已经确认了架构,所以可以排除这个问题。 还有,用户提到引用[4]中的错误,说明在安装containerd.io时依赖问题导致未配置。可能需要先安装较新的containerd.io版本,或者手动解决依赖。比如,检查是否有特定版本的containerd.io依赖的其他包,如libseccomp2等,是否满足最低版本要求。 最后,安装完成后,用户可能需要启动并验证服务是否正常运行。如果服务启动失败,可能需要检查日志或依赖是否完全满足。如果用户遇到其他问题,如版本冲突,可能需要清理旧版本或配置仓库源来获取正确的依赖。</think>根据您提供的错误信息需求,安装特定版本containerd.io的完整步骤如下: ### 一、安装前准备 1. 确认系统架构(必须为amd64): ```bash dpkg --print-architecture ``` 若输出`amd64`则符合要求[^3] 2. 下载所需安装包: ```bash wget https://archive.ubuntu.com/ubuntu/pool/.../containerd.io_1.4.6-1_amd64.deb wget https://archive.ubuntu.com/.../docker-ce-cli_20.10.7~3-0~ubuntu-xenial_amd64.deb wget https://archive.ubuntu.com/.../docker-ce_20.10.7~3-0~ubuntu-xenial_amd64.deb ``` ### 二、安装顺序(关键步骤) ```bash sudo dpkg -i containerd.io_1.4.6-1_amd64.deb # 优先安装基础组件 sudo apt-get install -f # 修复可能出现的依赖问题 sudo dpkg -i docker-ce-cli_20.10.7~3-0~ubuntu-xenial_amd64.deb sudo dpkg -i docker-ce_20.10.7~3-0~ubuntu-xenial_amd64.deb ``` ### 三、验证安装 ```bash containerd --version # 应显示1.4.6版本 systemctl status containerd # 检查服务状态 ``` ### 四、常见问题处理 1. **依赖错误**: ```bash sudo apt-get install -f # 自动修复依赖关系 ``` 若仍报错,需手动安装缺少的依赖包: $$ \text{缺失依赖} = \text{报错提示中的包名} \pm \text{版本要求} $$ 2. **版本冲突**: ```bash sudo apt remove containerd.io # 先卸载旧版本 sudo dpkg --purge containerd.io # 彻底清除配置 ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值