字符设备与块设备I/O操作有一下不同:
1:块设备只能以块为单位接受输入和返回输出,而字符设备以字节为单位。大多数设备是字符设备,因为他们不需要缓冲而且不以固定块大小进行操作。
2:块设备对应I/O操作有对应的缓冲区,因此他们可以选择以什么顺序进行访问,字符设备无缓冲并且直接进行读写
3:字符设备只能顺序的读写,而块设备能够随机的访问。
弄懂Linux块设备驱动程序,必须理解块设备驱动相关的几个结构体
struct block_device_operations {
int (*open) (struct block_device *, fmode_t);
int (*release) (struct gendisk *, fmode_t);
int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*direct_access) (struct block_device *, sector_t,
void **, unsigned long *);
int (*media_changed) (struct gendisk *);
unsigned long long (*set_capacity) (struct gendisk *,
unsigned long long);
int (*revalidate_disk) (struct gendisk *);
int (*getgeo)(struct block_device *, struct hd_geometry *);
struct module *owner;
};
和字符设备驱动一样,块设备也有对应的operatios结构体,与字符设备不同的是,该结构体指针不是注册block_device的时候传入的,注册block_device函数式register_blkdev(unsigned int major, const char * name),而block_device_operations 是在给gdisk结构体里的变量初始化的时候赋值调用的,disk->fops = &xd_fops;
struct request_queue
{
/*
* Together with queue_head for cacheline sharing
*/
struct list_head queue_head;
struct request *last_merge;
struct elevator_queue *elevator;
/*
* the queue request freelist, one for reads and one for writes
*/
struct request_list rq;
request_fn_proc *request_fn;
make_request_fn *make_request_fn;
prep_rq_fn *prep_rq_fn;
unplug_fn *unplug_fn;
merge_bvec_fn *merge_bvec_fn;
prepare_flush_fn *prepare_flush_fn;
softirq_done_fn *softirq_done_fn;
rq_timed_out_fn *rq_timed_out_fn;
dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn;
/*
* Dispatch queue sorting
*/
sector_t end_sector;
struct request *boundary_rq;
/*
* Auto-unplugging state
*/
struct timer_list unplug_timer;
int unplug_thresh; /* After this many requests */
unsigned long unplug_delay; /* After this many jiffies */
struct work_struct unplug_work;
struct backing_dev_info backing_dev_info;
/*
* The queue owner gets to use this for whatever they like.
* ll_rw_blk doesn't touch it.
*/
void *queuedata;
/*
* queue needs bounce pages for pages above this limit
*/
gfp_t bounce_gfp;
/*
* various queue flags, see QUEUE_* below
*/
unsigned long queue_flags;
/*
* protects queue structures from reentrancy. ->__queue_lock should
* _never_ be used directly, it is queue private. always use
* ->queue_lock.
*/
spinlock_t __queue_lock; //保护队列的自旋锁
spinlock_t *queue_lock;
/*
* queue kobject
*/
struct kobject kobj;
/*
* queue settings
*/
unsigned long nr_requests; /* Max # of requests */
unsigned int nr_congestion_on;
unsigned int nr_congestion_off;
unsigned int nr_batching;
void *dma_drain_buffer;
unsigned int dma_drain_size;
unsigned int dma_pad_mask;
unsigned int dma_alignment;
struct blk_queue_tag *queue_tags;
struct list_head tag_busy_list;
unsigned int nr_sorted;
unsigned int in_flight[2];
unsigned int rq_timeout;
struct timer_list timeout;
struct list_head timeout_list;
struct queue_limits limits;
/*
* sg stuff
*/
unsigned int sg_timeout;
unsigned int sg_reserved_size;
int node;
#ifdef CONFIG_BLK_DEV_IO_TRACE
struct blk_trace *blk_trace;
#endif
/*
* reserved for flush operations
*/
unsigned int ordered, next_ordered, ordseq;
int orderr, ordcolor;
struct request pre_flush_rq, bar_rq, post_flush_rq;
struct request *orig_bar_rq;
struct mutex sysfs_lock;
#if defined(CONFIG_BLK_DEV_BSG)
struct bsg_class_device bsg_dev;
#endif
};
一个块请求队列是一个块I/O请求的队列。请求队列跟踪等候的块I/O请求,它存储用于描叙这个设备能够支持的请求的类型信息,他们的最大大小,多少不同的段可以进入一个请求,硬件扇区的大小,对齐要求等参数,其结果是:如果请求队列被配置正确了,它不会交个该设备一个不能处理的请求。请求队列还实习了一个插入接口,这个接口允许使用多个I/O调度器(也称作电梯)的工作方式是以最优的方式向驱动提交I/O请求,在linux中提供了四种电梯调度算法,在noop-iosched.c, as-iosched.c, deadline-iosched.c, cfq-iosched.c分别实现了上述四种调度算法。
struct request {
struct list_head queuelist; //链表结构体
struct call_single_data csd;
int cpu;
struct request_queue *q;
unsigned int cmd_flags;
enum rq_cmd_type_bits cmd_type;
unsigned long atomic_flags;
/* the following two fields are internal, NEVER access directly */
sector_t __sector; /* sector cursor */
unsigned int __data_len; /* total data len */
struct bio *bio;
struct bio *biotail;
struct hlist_node hash; /* merge hash */
/*
* The rb_node is only used inside the io scheduler, requests
* are pruned when moved to the dispatch queue. So let the
* completion_data share space with the rb_node.
*/
union {
struct rb_node rb_node; /* sort/lookup */
void *completion_data;
};
/*
* two pointers are available for the IO schedulers, if they need
* more they have to dynamically allocate it.
*/
void *elevator_private;
void *elevator_private2;
struct gendisk *rq_disk;
unsigned long start_time;
/* Number of scatter-gather DMA addr+len pairs after
* physical address coalescing is performed.
*/
unsigned short nr_phys_segments;
unsigned short ioprio;
void *special; /* opaque pointer available for LLD use */
char *buffer; /* kaddr of the current segment if available */
int tag;
int errors;
int ref_count;
/*
* when request is used as a packet command carrier
*/
unsigned short cmd_len;
unsigned char __cmd[BLK_MAX_CDB];
unsigned char *cmd;
unsigned int extra_len; /* length of alignment and padding */
unsigned int sense_len;
unsigned int resid_len; /* residual count */
void *sense;
unsigned long deadline;
struct list_head timeout_list;
unsigned int timeout;
int retries;
/*
* completion callback.
*/
rq_end_io_fn *end_io;
void *end_io_data;
/* for bidi */
struct request *next_rq;
};
在Linux扇区中,都是以512Byte为一个扇区,如果硬件的扇区不是以512字节为一个扇区,则需要进行调整,
struct gendisk {
/* major, first_minor and minors are input parameters only,
* don't use directly. Use disk_devt() and disk_max_parts().
*/
int major; /* major number of driver */
int first_minor;
int minors; /* maximum number of minors, =1 for
* disks that can't be partitioned. */
char disk_name[DISK_NAME_LEN]; /* name of major driver */
char *(*devnode)(struct gendisk *gd, mode_t *mode);
/* Array of pointers to partitions indexed by partno.
* Protected with matching bdev lock but stat and other
* non-critical accesses use RCU. Always access through
* helpers.
*/
struct disk_part_tbl *part_tbl;
struct hd_struct part0;
const struct block_device_operations *fops;
struct request_queue *queue;
void *private_data;
int flags;
struct device *driverfs_dev; // FIXME: remove
struct kobject *slave_dir;
struct timer_rand_state *random;
atomic_t sync_io; /* RAID */
struct work_struct async_notify;
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity *integrity;
#endif
int node_id;
};
在Linux内核中使用gendisk表示一个独立的磁盘设备,其中minors表示该磁盘中的分区数,用struct gendisk* allock_gendisk(int minors)来进行设置磁盘的分区数量。
struct bio {
sector_t bi_sector; /* device address in 512 byte
sectors */
struct bio *bi_next; /* request queue link */
struct block_device *bi_bdev;
unsigned long bi_flags; /* status, command, etc */
unsigned long bi_rw; /* bottom bits READ/WRITE,
* top bits priority
*/
unsigned short bi_vcnt; /* how many bio_vec's */
unsigned short bi_idx; /* current index into bvl_vec */
/* Number of segments in this BIO after
* physical address coalescing is performed.
*/
unsigned int bi_phys_segments;
unsigned int bi_size; /* residual I/O count */
/*
* To keep track of the max segment size, we account for the
* sizes of the first and last mergeable segments in this bio.
*/
unsigned int bi_seg_front_size;
unsigned int bi_seg_back_size;
unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
unsigned int bi_comp_cpu; /* completion CPU */
atomic_t bi_cnt; /* pin count */
struct bio_vec *bi_io_vec; /* the actual vec list */
bio_end_io_t *bi_end_io;
void *bi_private;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
struct bio_integrity_payload *bi_integrity; /* data integrity */
#endif
bio_destructor_t *bi_destructor; /* destructor */
/*
* We can inline a number of vecs at the end of the bio, to avoid
* double allocations for a small number of bio_vecs. This member
* MUST obviously be kept at the very end of the bio.
*/
struct bio_vec bi_inline_vecs[0];
};
通常一个bio对应一个I/O请求,IO调度器可以将连续的BIO进行合并成一个请求,所以一个请求可以包含多个bio,其中request 和 io的关系如下图
</pre><p></p><p></p><p><span style="color:#ff0000;">块设备驱动程序的例子参考linux代码中的 drivers\block\xd.c和 drivers\block\z2ram.c; 阅读分析原始代码是王道</span></p><p><span style="color:#000000;">一下这个例子是用内存来模拟磁盘</span></p><p><pre class="cpp" name="code">#include <linux/module.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/timer.h>
#include <linux/genhd.h>
#include <linux/hdreg.h>
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/blkdev.h>
#include <linux/blkpg.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/dma.h>
static struct gendisk *ramblock_disk;
static request_queue_t *ramblock_queue;
static int major;
static DEFINE_SPINLOCK(ramblock_lock);
#define RAMBLOCK_SIZE (1024*1024)
static unsigned char *ramblock_buf;
static int ramblock_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
/* 容量=heads*cylinders*sectors*512 */
geo->heads = 2;
geo->cylinders = 32;
geo->sectors = RAMBLOCK_SIZE/2/32/512;
return 0;
}
static struct block_device_operations ramblock_fops = {
.owner = THIS_MODULE,
.getgeo = ramblock_getgeo,
};
static void do_ramblock_request(request_queue_t * q)
{
static int r_cnt = 0;
static int w_cnt = 0;
struct request *req;
//printk("do_ramblock_request %d\n", ++cnt);
while ((req = elv_next_request(q)) != NULL) {
/* 数据传输三要素: 源,目的,长度 */
/* 源/目的: */
unsigned long offset = req->sector * 512;
/* 目的/源: */
// req->buffer
/* 长度: */
unsigned long len = req->current_nr_sectors * 512;
if (rq_data_dir(req) == READ)
{
//printk("do_ramblock_request read %d\n", ++r_cnt);
memcpy(req->buffer, ramblock_buf+offset, len); //磁盘中的读操作与此不相同
}
else
{
//printk("do_ramblock_request write %d\n", ++w_cnt);
memcpy(ramblock_buf+offset, req->buffer, len); //磁盘中的写操作与此不相同
}
end_request(req, 1);
}
}
static int ramblock_init(void)
{
/* 1. 分配一个gendisk结构体 */
ramblock_disk = alloc_disk(16); /* 次设备号个数: 分区个数+1 */
/* 2. 设置 */
/* 2.1 分配/设置队列: 提供读写能力 */
ramblock_queue = blk_init_queue(do_ramblock_request, &ramblock_lock);
ramblock_disk->queue = ramblock_queue;
/* 2.2 设置其他属性: 比如容量 */
major = register_blkdev(0, "ramblock"); /* cat /proc/devices */
ramblock_disk->major = major;
ramblock_disk->first_minor = 0;
sprintf(ramblock_disk->disk_name, "ramblock");
ramblock_disk->fops = &ramblock_fops;
set_capacity(ramblock_disk, RAMBLOCK_SIZE / 512);
/* 3. 硬件相关操作 */
ramblock_buf = kzalloc(RAMBLOCK_SIZE, GFP_KERNEL);
/* 4. 注册 */
add_disk(ramblock_disk);
return 0;
}
static void ramblock_exit(void)
{
unregister_blkdev(major, "ramblock");
del_gendisk(ramblock_disk);
put_disk(ramblock_disk);
blk_cleanup_queue(ramblock_queue);
kfree(ramblock_buf);
}
module_init(ramblock_init);
module_exit(ramblock_exit);
MODULE_LICENSE("GPL");
上述代码编译加载后,运行mount /dev/ramblock /mnt,然后对mnt目录下进行文件的拷贝操作即是在虚拟磁盘上的操作。