将虚拟块设备的逻辑范围分段进行映射,将一段范围按照特定的规则映射到低层设备。
映射设备
/*
* DM core internal structure that used directly by dm.c and dm-rq.c
* DM targets must _not_ deference a mapped_device to directly access its members!
*/
struct mapped_device {
struct mutex suspend_lock;
struct mutex table_devices_lock;
struct list_head table_devices;
/*
* The current mapping (struct dm_table *).
* Use dm_get_live_table{_fast} or take suspend_lock for
* dereference.
*/
void __rcu *map;
unsigned long flags;
/* Protect queue and type against concurrent access. */
struct mutex type_lock;
enum dm_queue_mode type;
int numa_node_id;
struct request_queue *queue;
atomic_t holders;
atomic_t open_count;
struct dm_target *immutable_target;
struct target_type *immutable_target_type;
char name[16];
struct gendisk *disk;
struct dax_device *dax_dev;
/*
* A list of ios that arrived while we were suspended.
*/
struct work_struct work;
wait_queue_head_t wait;
spinlock_t deferred_lock;
struct bio_list deferred;
void *interface_ptr;
/*
* Event handling.
*/
wait_queue_head_t eventq;
atomic_t event_nr;
atomic_t uevent_seq;
struct list_head uevent_list;
spinlock_t uevent_lock; /* Protect access to uevent_list */
/* the number of internal suspends */
unsigned internal_suspend_count;
/*
* io objects are allocated from here.
*/
struct bio_set io_bs;
struct bio_set bs;
/*
* Processing queue (flush)
*/
struct workqueue_struct *wq;
/*
* freeze/thaw support require holding onto a super block
*/
struct super_block *frozen_sb;
/* forced geometry settings */
struct hd_geometry geometry;
/* kobject and completion */
struct dm_kobject_holder kobj_holder;
struct block_device *bdev;
struct dm_stats stats;
/* for blk-mq request-based DM support */
struct blk_mq_tag_set *tag_set;
bool init_tio_pdu:1;
struct srcu_struct io_barrier;
};
目标设备
struct dm_target {
struct dm_table *table;
struct target_type *type;
/* target limits */
sector_t begin;
sector_t len;
/* If non-zero, maximum size of I/O submitted to a target. */
uint32_t max_io_len;
/*
* A number of zero-length barrier bios that will be submitted
* to the target for the purpose of flushing cache.
*
* The bio number can be accessed with dm_bio_get_target_bio_nr.
* It is a responsibility of the target driver to remap these bios
* to the real underlying devices.
*/
unsigned num_flush_bios;
/*
* The number of discard bios that will be submitted to the target.
* The bio number can be accessed with dm_bio_get_target_bio_nr.
*/
unsigned num_discard_bios;
/*
* The number of secure erase bios that will be submitted to the target.
* The bio number can be accessed with dm_bio_get_target_bio_nr.
*/
unsigned num_secure_erase_bios;
/*
* The number of WRITE SAME bios that will be submitted to the target.
* The bio number can be accessed with dm_bio_get_target_bio_nr.
*/
unsigned num_write_same_bios;
/*
* The number of WRITE ZEROES bios that will be submitted to the target.
* The bio number can be accessed with dm_bio_get_target_bio_nr.
*/
unsigned num_write_zeroes_bios;
/*
* The minimum number of extra bytes allocated in each io for the
* target to use.
*/
unsigned per_io_data_size;
/* target specific data */
void *private;
/* Used to provide an error string from the ctr */
char *error;
/*
* Set if this target needs to receive flushes regardless of
* whether or not its underlying devices have support.
*/
bool flush_supported:1;
/*
* Set if this target needs to receive discards regardless of
* whether or not its underlying devices have support.
*/
bool discards_supported:1;
/*
* Set if the target required discard bios to be split
* on max_io_len boundary.
*/
bool split_discard_bios:1;
};
映射规则
映射表
struct dm_table {
struct mapped_device *md;
enum dm_queue_mode type;
/* btree table */
unsigned int depth;
unsigned int counts[MAX_DEPTH]; /* in nodes */
sector_t *index[MAX_DEPTH];
unsigned int num_targets;
unsigned int num_allocated;
sector_t *highs;
struct dm_target *targets;
struct target_type *immutable_target_type;
bool integrity_supported:1;
bool singleton:1;
unsigned integrity_added:1;
/*
* Indicates the rw permissions for the new logical
* device. This should be a combination of FMODE_READ
* and FMODE_WRITE.
*/
fmode_t mode;
/* a list of devices used by this table */
struct list_head devices;
/* events get handed up using this callback */
void (*event_fn)(void *);
void *event_context;
struct dm_md_mempools *mempools;
struct list_head target_callbacks;
};
目标类型
/*
* Information about a target type
*/
struct target_type {
uint64_t features;
const char *name;
struct module *module;
unsigned version[3];
dm_ctr_fn ctr;
dm_dtr_fn dtr;
dm_map_fn map;
dm_clone_and_map_request_fn clone_and_map_rq;
dm_release_clone_request_fn release_clone_rq;
dm_endio_fn end_io;
dm_request_endio_fn rq_end_io;
dm_presuspend_fn presuspend;
dm_presuspend_undo_fn presuspend_undo;
dm_postsuspend_fn postsuspend;
dm_preresume_fn preresume;
dm_resume_fn resume;
dm_status_fn status;
dm_message_fn message;
dm_prepare_ioctl_fn prepare_ioctl;
#ifdef CONFIG_BLK_DEV_ZONED
dm_report_zones_fn report_zones;
#endif
dm_busy_fn busy;
dm_iterate_devices_fn iterate_devices;
dm_io_hints_fn io_hints;
dm_dax_direct_access_fn direct_access;
dm_dax_copy_iter_fn dax_copy_from_iter;
dm_dax_copy_iter_fn dax_copy_to_iter;
/* For internal device-mapper use. */
struct list_head list;
};
dm模块初始化
module_init(dm_init)
dm_init调用_inits函数指针数组里的函数
创建映射设备
分配映射设备描述符
DM_DEV_CREATE
dev_create
–>dm_create
–>alloc_dev
–>dm_sysfs_init
alloc_dev
/*
* Allocate and initialise a blank device with a given minor.
*/
static struct mapped_device *alloc_dev(int minor)
{
md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id);
r = init_srcu_struct(&md->io_barrier);
md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
md->queue->queuedata = md;
md->queue->backing_dev_info->congested_data = md;
md->disk = alloc_disk_node(1, md->numa_node_id);
INIT_WORK(&md->work, dm_wq_work);
md->disk->fops = &dm_blk_dops;
md->disk->queue = md->queue;
md->disk->private_data = md;
sprintf(md->disk->disk_name, "dm-%d", minor);
md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
}
加载映射表
DM_TABLE_LOAD
table_load
–>populate_table
–>dm_table_complete //映射表的类型???
–>dm_setup_md_queue
/*
* Setup the DM device's queue based on md's type
*/
int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
{
int r;
struct queue_limits limits;
enum dm_queue_mode type = dm_get_md_type(md);
switch (type) {
case DM_TYPE_REQUEST_BASED:
r = dm_mq_init_request_queue(md, t);
if (r) {
DMERR("Cannot initialize queue for request-based dm-mq mapped device");
return r;
}
break;
case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED:
case DM_TYPE_NVME_BIO_BASED:
dm_init_normal_md_queue(md);
blk_queue_make_request(md->queue, dm_make_request);
break;
case DM_TYPE_NONE:
WARN_ON_ONCE(true);
break;
}
r = dm_calculate_queue_limits(t, &limits);
if (r) {
DMERR("Cannot calculate initial queue limits");
return r;
}
dm_table_set_restrictions(t, md->queue, &limits);
blk_register_queue(md->disk);
return 0;
}
恢复映射设备
DM_DEV_SUSPEND
数据关系图
以dm-name 0 4096 linear /dev/sda 0为例,成功创建好dm-0设备后,其在内存中的数据结构关系如图
请求执行
dm_make_request
–>queue_io–>dm_wq_work
–>dm_process_bio–>__split_and_process_bio–>generic_make_request
dm_wq_work
static void dm_wq_work(struct work_struct *work)
{
struct mapped_device *md = container_of(work, struct mapped_device,
work);
struct bio *c;
int srcu_idx;
struct dm_table *map;
map = dm_get_live_table(md, &srcu_idx);
while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
spin_lock_irq(&md->deferred_lock);
c = bio_list_pop(&md->deferred);
spin_unlock_irq(&md->deferred_lock);
if (!c)
break;
if (dm_request_based(md))
(void) generic_make_request(c);
else
(void) dm_process_bio(md, map, c);
}
dm_put_live_table(md, srcu_idx);
}
dm设备在处理bio时,会创建bio的备份,然后将bio的bd_disk指向映射的设备(通过targer_type的map方法),再指向generic_make_request下发到设备的request_queue中。