在nvme_scan_ns_list->nvme_validate_ns->nvme_alloc_ns 中会通过alloc_disk_node来申请一个gendisk来描述一个独立的磁盘或者分区。
disk = alloc_disk_node(0, node);
if (!disk)
goto out_free_id;
disk->fops = &nvme_fops;
disk->private_data = ns;
disk->queue = ns->queue;
disk->flags = GENHD_FL_EXT_DEVT;
memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
ns->disk = disk;
这里给disk->fops赋值的是nvme_fops,代表对这个block device的操作。
static const struct block_device_operations nvme_fops = {
.owner = THIS_MODULE,
.ioctl = nvme_ioctl,
.compat_ioctl = nvme_compat_ioctl,
.open = nvme_open,
.release = nvme_release,
.getgeo = nvme_getgeo,
.revalidate_disk= nvme_revalidate_disk,
.pr_ops = &nvme_pr_ops,
};
最后调用device_add_disk(ctrl->device, ns->disk);来添加这个disk.
还会为这个目录的kobject创建一个attibute group。
if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
&nvme_ns_attr_group))
pr_warn("%s: failed to create sysfs group for identification\n",
ns->disk->disk_name);
这个attibute 提供的如下功能
static const struct attribute_group nvme_ns_attr_group = {
.attrs = nvme_ns_attrs,
.is_visible = nvme_ns_attrs_are_visible,
};
static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
static struct attribute *nvme_ns_attrs[] = {
&dev_attr_wwid.attr,
&dev_attr_uuid.attr,
&dev_attr_eui.attr,
&dev_attr_nsid.attr,
NULL,
};
这里以nsid为例,首先通过nvme_get_ns_from_dev得到这个dev的namespace,然后输出namespace id即ns->ns_id
static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
return sprintf(buf, "%d\n", ns->ns_id);
}
在nvme_alloc_ns调用__nvme_revalidate_disk 来设置这个block的capacity
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
struct nvme_ns *ns = disk->private_data;
u8 lbaf, pi_type;
u16 old_ms;
unsigned short bs;
old_ms = ns->ms;
lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
ns->lba_shift = id->lbaf[lbaf].ds;
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/*
* If identify namespace failed, use default 512 byte block size so
* block layer can use before failing read/write for 0 capacity.
*/
//得到block size,lba_shift等于id->lbaf[lbaf].ds的话,如果id->lbaf[lbaf].ds没有设置block size,则默认用512 bytes
if (ns->lba_shift == 0)
ns->lba_shift = 9;
bs = 1 << ns->lba_shift;
/* XXX: PI implementation requires metadata equal t10 pi tuple size */
pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
id->dps & NVME_NS_DPS_PI_MASK : 0;
blk_mq_freeze_queue(disk->queue);
if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
ns->ms != old_ms ||
bs != queue_logical_block_size(disk->queue) ||
(ns->ms && ns->ext)))
blk_integrity_unregister(disk);
ns->pi_type = pi_type;
//设置block device的logic block size,这里的bs等于1 << ns->lba_shift。这个函数的就是直接给q->limits.logical_block_size = size;赋值,这里的q就等于ns->queue
blk_queue_logical_block_size(ns->queue, bs);
if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
nvme_init_integrity(ns);
//下面最核心的就是调用set_capacity来设置disk->part0.nr_sects = size
if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
set_capacity(disk, 0);
else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
nvme_config_discard(ns);
blk_mq_unfreeze_queue(disk->queue);
}
这个函数有几个关键的函数我们一个一个来看
static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
{
struct blk_integrity *bi = &disk->queue->integrity;
if (!bi->profile)
return NULL;
return bi;
}
其实就是看bi是否为null,如果为null的话,后面会调用nvme_init_integrity来对bi进行初始化
{
struct blk_integrity integrity;
memset(&integrity, 0, sizeof(integrity));
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE3:
integrity.profile = &t10_pi_type3_crc;
integrity.tag_size = sizeof(u16) + sizeof(u32);
integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
break;
case NVME_NS_DPS_PI_TYPE1:
case NVME_NS_DPS_PI_TYPE2:
integrity.profile = &t10_pi_type1_crc;
integrity.tag_size = sizeof(u16);
integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
break;
default:
integrity.profile = NULL;
break;
}
integrity.tuple_size = ns->ms;
blk_integrity_register(ns->disk, &integrity);
blk_queue_max_integrity_segments(ns->queue, 1);
}
但是如果想让bi不为null,需要打开CONFIG_BLK_DEV_INTEGRITY
通过set_capacity 设定block的capacity,很简单只有一句话
static inline void set_capacity(struct gendisk *disk, sector_t size)
{
disk->part0.nr_sects = size;
}
static inline unsigned short queue_logical_block_size(struct request_queue *q)
{
int retval = 512;
if (q && q->limits.logical_block_size)
retval = q->limits.logical_block_size;
return retval;
}
通过queue_logical_block_size来得到block size,如果limits.logical_block_size为null的话,默认用512,这正好和__nvme_revalidate_disk中的
if (ns->lba_shift == 0)
ns->lba_shift = 9;
想呼吁
与queue_logical_block_size对应的是通过
void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
{
q->limits.logical_block_size = size;
if (q->limits.physical_block_size < size)
q->limits.physical_block_size = size;
if (q->limits.io_min < q->limits.physical_block_size)
q->limits.io_min = q->limits.physical_block_size;
}
来设定block size。
其次需要注意的是在设定这个block device的相关参数的时候需要通过blk_mq_freeze_queue(disk->queue);/blk_mq_unfreeze_queue(disk->queue); 保护起来.
disk = alloc_disk_node(0, node);
if (!disk)
goto out_free_id;
disk->fops = &nvme_fops;
disk->private_data = ns;
disk->queue = ns->queue;
disk->flags = GENHD_FL_EXT_DEVT;
memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
ns->disk = disk;
这里给disk->fops赋值的是nvme_fops,代表对这个block device的操作。
static const struct block_device_operations nvme_fops = {
.owner = THIS_MODULE,
.ioctl = nvme_ioctl,
.compat_ioctl = nvme_compat_ioctl,
.open = nvme_open,
.release = nvme_release,
.getgeo = nvme_getgeo,
.revalidate_disk= nvme_revalidate_disk,
.pr_ops = &nvme_pr_ops,
};
最后调用device_add_disk(ctrl->device, ns->disk);来添加这个disk.
还会为这个目录的kobject创建一个attibute group。
if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
&nvme_ns_attr_group))
pr_warn("%s: failed to create sysfs group for identification\n",
ns->disk->disk_name);
这个attibute 提供的如下功能
static const struct attribute_group nvme_ns_attr_group = {
.attrs = nvme_ns_attrs,
.is_visible = nvme_ns_attrs_are_visible,
};
static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
static struct attribute *nvme_ns_attrs[] = {
&dev_attr_wwid.attr,
&dev_attr_uuid.attr,
&dev_attr_eui.attr,
&dev_attr_nsid.attr,
NULL,
};
这里以nsid为例,首先通过nvme_get_ns_from_dev得到这个dev的namespace,然后输出namespace id即ns->ns_id
static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
return sprintf(buf, "%d\n", ns->ns_id);
}
在nvme_alloc_ns调用__nvme_revalidate_disk 来设置这个block的capacity
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
struct nvme_ns *ns = disk->private_data;
u8 lbaf, pi_type;
u16 old_ms;
unsigned short bs;
old_ms = ns->ms;
lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
ns->lba_shift = id->lbaf[lbaf].ds;
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/*
* If identify namespace failed, use default 512 byte block size so
* block layer can use before failing read/write for 0 capacity.
*/
//得到block size,lba_shift等于id->lbaf[lbaf].ds的话,如果id->lbaf[lbaf].ds没有设置block size,则默认用512 bytes
if (ns->lba_shift == 0)
ns->lba_shift = 9;
bs = 1 << ns->lba_shift;
/* XXX: PI implementation requires metadata equal t10 pi tuple size */
pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
id->dps & NVME_NS_DPS_PI_MASK : 0;
blk_mq_freeze_queue(disk->queue);
if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
ns->ms != old_ms ||
bs != queue_logical_block_size(disk->queue) ||
(ns->ms && ns->ext)))
blk_integrity_unregister(disk);
ns->pi_type = pi_type;
//设置block device的logic block size,这里的bs等于1 << ns->lba_shift。这个函数的就是直接给q->limits.logical_block_size = size;赋值,这里的q就等于ns->queue
blk_queue_logical_block_size(ns->queue, bs);
if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
nvme_init_integrity(ns);
//下面最核心的就是调用set_capacity来设置disk->part0.nr_sects = size
if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
set_capacity(disk, 0);
else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
nvme_config_discard(ns);
blk_mq_unfreeze_queue(disk->queue);
}
这个函数有几个关键的函数我们一个一个来看
static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
{
struct blk_integrity *bi = &disk->queue->integrity;
if (!bi->profile)
return NULL;
return bi;
}
其实就是看bi是否为null,如果为null的话,后面会调用nvme_init_integrity来对bi进行初始化
{
struct blk_integrity integrity;
memset(&integrity, 0, sizeof(integrity));
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE3:
integrity.profile = &t10_pi_type3_crc;
integrity.tag_size = sizeof(u16) + sizeof(u32);
integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
break;
case NVME_NS_DPS_PI_TYPE1:
case NVME_NS_DPS_PI_TYPE2:
integrity.profile = &t10_pi_type1_crc;
integrity.tag_size = sizeof(u16);
integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
break;
default:
integrity.profile = NULL;
break;
}
integrity.tuple_size = ns->ms;
blk_integrity_register(ns->disk, &integrity);
blk_queue_max_integrity_segments(ns->queue, 1);
}
但是如果想让bi不为null,需要打开CONFIG_BLK_DEV_INTEGRITY
通过set_capacity 设定block的capacity,很简单只有一句话
static inline void set_capacity(struct gendisk *disk, sector_t size)
{
disk->part0.nr_sects = size;
}
static inline unsigned short queue_logical_block_size(struct request_queue *q)
{
int retval = 512;
if (q && q->limits.logical_block_size)
retval = q->limits.logical_block_size;
return retval;
}
通过queue_logical_block_size来得到block size,如果limits.logical_block_size为null的话,默认用512,这正好和__nvme_revalidate_disk中的
if (ns->lba_shift == 0)
ns->lba_shift = 9;
想呼吁
与queue_logical_block_size对应的是通过
void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
{
q->limits.logical_block_size = size;
if (q->limits.physical_block_size < size)
q->limits.physical_block_size = size;
if (q->limits.io_min < q->limits.physical_block_size)
q->limits.io_min = q->limits.physical_block_size;
}
来设定block size。
其次需要注意的是在设定这个block device的相关参数的时候需要通过blk_mq_freeze_queue(disk->queue);/blk_mq_unfreeze_queue(disk->queue); 保护起来.