对于Nvme SSD,我们有的时候会用到ioctl系统调用,该调用的流程是怎样的呢?
首先,在注册nvme设备的时候,会初始化该设备的注册了file operations:
static const struct file_operations nvme_dev_fops = { .owner = THIS_MODULE, .open = nvme_dev_open, .release = nvme_dev_release, .unlocked_ioctl = nvme_dev_ioctl, .compat_ioctl = nvme_dev_ioctl, };
在nvme_dev_ioctl里,存在switch语句,列举ioctl的几种cmd,其中我们主要关注的是:NVME_IOCTL_ADMIN_CMD和NVME_IO_CMD。
static long nvme_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct nvme_ctrl *ctrl = file->private_data; void __user *argp = (void __user *)arg; switch (cmd) { case NVME_IOCTL_ADMIN_CMD: return nvme_user_cmd(ctrl, NULL, argp); case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: dev_warn(ctrl->device, "resetting controller\n"); return ctrl->ops->reset_ctrl(ctrl); case NVME_IOCTL_SUBSYS_RESET: return nvme_reset_subsystem(ctrl); case NVME_IOCTL_RESCAN: nvme_queue_scan(ctrl); return 0; default: return -ENOTTY; } }
对于ssd的读写命令,显然是要走 NVME_IOCTL_IO_CMD这一分支,该分支的函数主要做的事情是填充了nvme_command c命令:
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_passthru_cmd __user *ucmd) { struct nvme_passthru_cmd cmd; struct nvme_command c; unsigned timeout = 0; int status; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (copy_from_user(&cmd, ucmd, sizeof(cmd))) return -EFAULT; if (cmd.flags) return -EINVAL; memset(&c, 0, sizeof(c)); c.common.opcode = cmd.opcode; c.common.flags = cmd.flags; c.common.nsid = cpu_to_le32(cmd.nsid); c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, (void __user *)(uintptr_t)cmd.addr, cmd.data_len, &cmd.result, timeout); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) return -EFAULT; } return status; }
int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, u32 *result, unsigned timeout) { return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0, result, timeout); } int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, u32 *result, unsigned timeout) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; struct gendisk *disk = ns ? ns->disk : NULL; struct request *req; struct bio *bio = NULL; void *meta = NULL; int ret; req = nvme_alloc_request(q, cmd, 0, NVME_QID_ANY); if (IS_ERR(req)) return PTR_ERR(req); req->timeout = timeout ? timeout : ADMIN_TIMEOUT; if (ubuffer && bufflen) { ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, GFP_KERNEL); if (ret) goto out; bio = req->bio; if (!disk) goto submit; bio->bi_bdev = bdget_disk(disk, 0); if (!bio->bi_bdev) { ret = -ENODEV; goto out_unmap; } if (meta_buffer && meta_len) { struct bio_integrity_payload *bip; meta = kmalloc(meta_len, GFP_KERNEL); if (!meta) { ret = -ENOMEM; goto out_unmap; } if (write) { if (copy_from_user(meta, meta_buffer, meta_len)) { ret = -EFAULT; goto out_free_meta; } } bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); if (IS_ERR(bip)) { ret = PTR_ERR(bip); goto out_free_meta; } bip->bip_iter.bi_size = meta_len; bip->bip_iter.bi_sector = meta_seed; ret = bio_integrity_add_page(bio, virt_to_page(meta), meta_len, offset_in_page(meta)); if (ret != meta_len) { ret = -ENOMEM; goto out_free_meta; } } } submit: blk_execute_rq(req->q, disk, req, 0); if (nvme_req(req)->flags & NVME_REQ_CANCELLED) ret = -EINTR; else ret = nvme_req(req)->status; if (result) *result = le32_to_cpu(