linux驱动——块设备驱动

在字符设备发出读写请求的时候,实际上硬件IO就发生了,但是块设备不同,块设备利用一块系统内存作为缓冲区,当用户进程对设备请求能够满足用户的要求时,就返回请求的数据,如果不能就调用请求函数来进行实验IO操作,块设备是对慢速设备设计,防止消耗过多的CPU事件来等待。与字符设备根本的区别在于是否能够被随机访问(在访问设备的时候随意地从一个位置跳转到另一个位置);
简单来讲,块设备通过系统缓存进行读取,不直接和物理磁盘进行读取,他是将读写放入到队列中,优化之后再执行,而字符设备可以直接读取物理磁盘不经过系统缓存;

			框架:
			应用层:open    read    write
			—————————————————— 文件的读写
			文件系统:vfat      ext2     ext3     yaffs2    (把文件的读写转换为扇区的读写)
			———————ll_rw_block——————扇区的读写       ll_rw_block(1.把读写放入队列 2.调用队列的处理函数(优化/调整顺序/合并))
							块设备驱动程序
			——————————————————
			硬件:硬盘、FLASH

ll_rw_block

void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])    //第一个参数表示读/写,第二个参数表示第三个参数大小,第三个参数表示读写缓冲区
{
	int i;

	for (i = 0; i < nr; i++) {
		struct buffer_head *bh = bhs[i];

		if (rw == SWRITE)
			lock_buffer(bh);
		else if (test_set_buffer_locked(bh))
			continue;

		if (rw == WRITE || rw == SWRITE) {
			if (test_clear_buffer_dirty(bh)) {
				bh->b_end_io = end_buffer_write_sync;
				get_bh(bh);
				submit_bh(WRITE, bh);
				continue;
			}
		} else {
			if (!buffer_uptodate(bh)) {
				bh->b_end_io = end_buffer_read_sync;
				get_bh(bh);
				submit_bh(rw, bh);
				continue;
			}
		}
		unlock_buffer(bh);
	}
}

submit_bh函数

int submit_bh(int rw, struct buffer_head * bh)
{
	struct bio *bio;     //使用bh来构造bio(block input/output)
	int ret = 0;

	BUG_ON(!buffer_locked(bh));
	BUG_ON(!buffer_mapped(bh));
	BUG_ON(!bh->b_end_io);

	if (buffer_ordered(bh) && (rw == WRITE))
		rw = WRITE_BARRIER;

	/*
	 * Only clear out a write error when rewriting, should this
	 * include WRITE_SYNC as well?
	 */
	if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER))
		clear_buffer_write_io_error(bh);

	/*
	 * from here on down, it's all bio -- do the initial mapping,
	 * submit_bio -> generic_make_request may further map this bio around
	 */
	bio = bio_alloc(GFP_NOIO, 1);

	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
	bio->bi_bdev = bh->b_bdev;
	bio->bi_io_vec[0].bv_page = bh->b_page;
	bio->bi_io_vec[0].bv_len = bh->b_size;
	bio->bi_io_vec[0].bv_offset = bh_offset(bh);

	bio->bi_vcnt = 1;
	bio->bi_idx = 0;
	bio->bi_size = bh->b_size;

	bio->bi_end_io = end_bio_bh_io_sync;
	bio->bi_private = bh;

	bio_get(bio);
	submit_bio(rw, bio);

	if (bio_flagged(bio, BIO_EOPNOTSUPP))
		ret = -EOPNOTSUPP;

	bio_put(bio);
	return ret;
}

submit_bio函数:

void submit_bio(int rw, struct bio *bio)
{
	int count = bio_sectors(bio);

	BIO_BUG_ON(!bio->bi_size);
	BIO_BUG_ON(!bio->bi_io_vec);
	bio->bi_rw |= rw;
	if (rw & WRITE) {
		count_vm_events(PGPGOUT, count);
	} else {
		task_io_account_read(bio->bi_size);
		count_vm_events(PGPGIN, count);
	}

	if (unlikely(block_dump)) {
		char b[BDEVNAME_SIZE];
		printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
			current->comm, current->pid,
			(rw & WRITE) ? "WRITE" : "READ",
			(unsigned long long)bio->bi_sector,
			bdevname(bio->bi_bdev,b));
	}

	generic_make_request(bio);    //通用构造请求:使用bio来构造请求(request),将请求放入队列
}

generic_make_request函数:

void generic_make_request(struct bio *bio)
{
	if (current->bio_tail) {
		/* make_request is active */
		*(current->bio_tail) = bio;
		bio->bi_next = NULL;
		current->bio_tail = &bio->bi_next;
		return;
	}
	/* following loop may be a bit non-obvious, and so deserves some
	 * explanation.
	 * Before entering the loop, bio->bi_next is NULL (as all callers
	 * ensure that) so we have a list with a single bio.
	 * We pretend that we have just taken it off a longer list, so
	 * we assign bio_list to the next (which is NULL) and bio_tail
	 * to &bio_list, thus initialising the bio_list of new bios to be
	 * added.  __generic_make_request may indeed add some more bios
	 * through a recursive call to generic_make_request.  If it
	 * did, we find a non-NULL value in bio_list and re-enter the loop
	 * from the top.  In this case we really did just take the bio
	 * of the top of the list (no pretending) and so fixup bio_list and
	 * bio_tail or bi_next, and call into __generic_make_request again.
	 *
	 * The loop was structured like this to make only one call to
	 * __generic_make_request (which is important as it is large and
	 * inlined) and to keep the structure simple.
	 */
	BUG_ON(bio->bi_next);
	do {
		current->bio_list = bio->bi_next;
		if (bio->bi_next == NULL)
			current->bio_tail = &current->bio_list;
		else
			bio->bi_next = NULL;
		__generic_make_request(bio);
		bio = current->bio_list;
	} while (bio);
	current->bio_tail = NULL; /* deactivate */
}

__generic_make_request函数

static inline void __generic_make_request(struct bio *bio)
{
	request_queue_t *q;
	sector_t maxsector;
	sector_t old_sector;
	int ret, nr_sectors = bio_sectors(bio);
	dev_t old_dev;

	might_sleep();
	/* Test device or partition size, when known. */
	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
	if (maxsector) {
		sector_t sector = bio->bi_sector;

		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
			/*
			 * This may well happen - the kernel calls bread()
			 * without checking the size of the device, e.g., when
			 * mounting a device.
			 */
			handle_bad_sector(bio);
			goto end_io;
		}
	}

	/*
	 * Resolve the mapping until finished. (drivers are
	 * still free to implement/resolve their own stacking
	 * by explicitly returning 0)
	 *
	 * NOTE: we don't repeat the blk_size check for each new device.
	 * Stacking drivers are expected to know what they are doing.
	 */
	old_sector = -1;
	old_dev = 0;
	do {
		char b[BDEVNAME_SIZE];

		q = bdev_get_queue(bio->bi_bdev);                     //获得队列,q为请求队列request_queue_t
		if (!q) {
			printk(KERN_ERR
			       "generic_make_request: Trying to access "
				"nonexistent block-device %s (%Lu)\n",
				bdevname(bio->bi_bdev, b),
				(long long) bio->bi_sector);
end_io:
			bio_endio(bio, bio->bi_size, -EIO);
			break;
		}

		if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {
			printk("bio too big device %s (%u > %u)\n", 
				bdevname(bio->bi_bdev, b),
				bio_sectors(bio),
				q->max_hw_sectors);
			goto end_io;
		}

		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
			goto end_io;

		if (should_fail_request(bio))
			goto end_io;

		/*
		 * If this device has partitions, remap block n
		 * of partition p to block n+start(p) of the disk.
		 */
		blk_partition_remap(bio);

		if (old_sector != -1)
			blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 
					    old_sector);

		blk_add_trace_bio(q, bio, BLK_TA_QUEUE);

		old_sector = bio->bi_sector;
		old_dev = bio->bi_bdev->bd_dev;

		maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
		if (maxsector) {
			sector_t sector = bio->bi_sector;

			if (maxsector < nr_sectors ||
					maxsector - nr_sectors < sector) {
				/*
				 * This may well happen - partitions are not
				 * checked to make sure they are within the size
				 * of the whole device.
				 */
				handle_bad_sector(bio);
				goto end_io;
			}
		}

		ret = q->make_request_fn(q, bio);         //调用队列中的构造请求的函数
	} while (ret);
}

make_request_fn的默认请求函数是__make_request

static int __make_request(request_queue_t *q, struct bio *bio)
{
	struct request *req;
	int el_ret, nr_sectors, barrier, err;
	const unsigned short prio = bio_prio(bio);
	const int sync = bio_sync(bio);
	int rw_flags;

	nr_sectors = bio_sectors(bio);

	/*
	 * low level driver can indicate that it wants pages above a
	 * certain limit bounced to low memory (ie for highmem, or even
	 * ISA dma in theory)
	 */
	blk_queue_bounce(q, &bio);

	barrier = bio_barrier(bio);
	if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
		err = -EOPNOTSUPP;
		goto end_io;
	}

	spin_lock_irq(q->queue_lock);

	if (unlikely(barrier) || elv_queue_empty(q))
		goto get_rq;

	el_ret = elv_merge(q, &req, bio);                         //使用电梯调度算法合并,将bio合并到队列q中
	switch (el_ret) {
		case ELEVATOR_BACK_MERGE:
			BUG_ON(!rq_mergeable(req));

			if (!ll_back_merge_fn(q, req, bio))
				break;

			blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);

			req->biotail->bi_next = bio;
			req->biotail = bio;
			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
			req->ioprio = ioprio_best(req->ioprio, prio);
			drive_stat_acct(req, nr_sectors, 0);
			if (!attempt_back_merge(q, req))
				elv_merged_request(q, req, el_ret);
			goto out;

		case ELEVATOR_FRONT_MERGE:
			BUG_ON(!rq_mergeable(req));

			if (!ll_front_merge_fn(q, req, bio))
				break;

			blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);

			bio->bi_next = req->bio;
			req->bio = bio;

			/*
			 * may not be valid. if the low level driver said
			 * it didn't need a bounce buffer then it better
			 * not touch req->buffer either...
			 */
			req->buffer = bio_data(bio);
			req->current_nr_sectors = bio_cur_sectors(bio);
			req->hard_cur_sectors = req->current_nr_sectors;
			req->sector = req->hard_sector = bio->bi_sector;
			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
			req->ioprio = ioprio_best(req->ioprio, prio);
			drive_stat_acct(req, nr_sectors, 0);
			if (!attempt_front_merge(q, req))
				elv_merged_request(q, req, el_ret);
			goto out;

		/* ELV_NO_MERGE: elevator says don't/can't merge. */
		default:
			;
	}

get_rq:
	/*
	 * This sync check and mask will be re-done in init_request_from_bio(),
	 * but we need to set it earlier to expose the sync flag to the
	 * rq allocator and io schedulers.
	 */
	rw_flags = bio_data_dir(bio);
	if (sync)
		rw_flags |= REQ_RW_SYNC;

	/*
	 * Grab a free request. This is might sleep but can not fail.
	 * Returns with the queue unlocked.
	 */
	req = get_request_wait(q, rw_flags, bio);

	/*
	 * After dropping the lock and possibly sleeping here, our request
	 * may now be mergeable after it had proven unmergeable (above).
	 * We don't worry about that case for efficiency. It won't happen
	 * often, and the elevators are able to handle it.
	 */
	init_request_from_bio(req, bio);     //如果合并不成功使用bio构造请求

	spin_lock_irq(q->queue_lock);
	if (elv_queue_empty(q))  
		blk_plug_device(q);
	add_request(q, req);                //不能合并,把请求加入到队列当中去
out:
	if (sync)
		__generic_unplug_device(q);      //执行队列,并不一定在这个地方执行

	spin_unlock_irq(q->queue_lock);
	return 0;

end_io:
	bio_endio(bio, nr_sectors << 9, err);
	return 0;
}

电梯调度算法:为了一次尽可能地合并;
执行队列__generic_unplug_device函数:

void __generic_unplug_device(request_queue_t *q)
{
	if (unlikely(blk_queue_stopped(q)))
		return;

	if (!blk_remove_plug(q))
		return;

	q->request_fn(q);        //调用队列的处理函数
}

编写块设备驱动程序:
1.分配gendisk结构体:alloc_disk
2.设置结构体
2.1分配/设置队列:request_queue_t //提供读写能力
blk_init_queue
2.2设置gendisk其他信息 //提供其他属性:容量
3.注册结构体:add_disk

参考文件:
drivers/block/xd.c
drivers/block/Z2ram.c

从驱动入口module_init(xd_init);
xd_init中驱动注册:
module_init(xd_init)的xd_init

static int __init xd_init(void)
{
	u_char i,controller;
	unsigned int address;
	int err;

#ifdef MODULE
	{
		u_char count = 0;
		for (i = 4; i > 0; i--)
			if (((xd[i] = xd[i-1]) >= 0) && !count)
				count = i;
		if ((xd[0] = count))
			do_xd_setup(xd);
	}
#endif

	init_timer (&xd_watchdog_int); xd_watchdog_int.function = xd_watchdog;

	if (!xd_dma_buffer)
		xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200);
	if (!xd_dma_buffer) {
		printk(KERN_ERR "xd: Out of memory.\n");
		return -ENOMEM;
	}

	err = -EBUSY;
	if (register_blkdev(XT_DISK_MAJOR, "xd"))                       //注册一个块设备驱动
		goto out1;

	err = -ENOMEM;
	xd_queue = blk_init_queue(do_xd_request, &xd_lock);             //初始化一个队列,队列中传入处理队列函数
	if (!xd_queue)
		goto out1a;

	if (xd_detect(&controller,&address)) {

		printk("Detected a%s controller (type %d) at address %06x\n",
			xd_sigs[controller].name,controller,address);
		if (!request_region(xd_iobase,4,"xd")) {
			printk("xd: Ports at 0x%x are not available\n",
				xd_iobase);
			goto out2;
		}
		if (controller)
			xd_sigs[controller].init_controller(address);
		xd_drives = xd_initdrives(xd_sigs[controller].init_drive);
		
		printk("Detected %d hard drive%s (using IRQ%d & DMA%d)\n",
			xd_drives,xd_drives == 1 ? "" : "s",xd_irq,xd_dma);
	}

	err = -ENODEV;
	if (!xd_drives)
		goto out3;

	for (i = 0; i < xd_drives; i++) {
		XD_INFO *p = &xd_info[i];
		struct gendisk *disk = alloc_disk(64);                  //分配一个gendisk结构体
		if (!disk)
			goto Enomem;
		p->unit = i;
		disk->major = XT_DISK_MAJOR;
		disk->first_minor = i<<6;
		sprintf(disk->disk_name, "xd%c", i+'a');
		disk->fops = &xd_fops;
		disk->private_data = p;
		disk->queue = xd_queue;                                 //disk结构体的队列等于分配的队列
		set_capacity(disk, p->heads * p->cylinders * p->sectors);
		printk(" %s: CHS=%d/%d/%d\n", disk->disk_name,
			p->cylinders, p->heads, p->sectors);
		xd_gendisk[i] = disk;
	}

	err = -EBUSY;
	if (request_irq(xd_irq,xd_interrupt_handler, 0, "XT hard disk", NULL)) {
		printk("xd: unable to get IRQ%d\n",xd_irq);
		goto out4;
	}

	if (request_dma(xd_dma,"xd")) {
		printk("xd: unable to get DMA%d\n",xd_dma);
		goto out5;
	}

	/* xd_maxsectors depends on controller - so set after detection */
	blk_queue_max_sectors(xd_queue, xd_maxsectors);

	for (i = 0; i < xd_drives; i++)
		add_disk(xd_gendisk[i]);							//注册

	return 0;

out5:
	free_irq(xd_irq, NULL);
out4:
	for (i = 0; i < xd_drives; i++)
		put_disk(xd_gendisk[i]);
out3:
	release_region(xd_iobase,4);
out2:
	blk_cleanup_queue(xd_queue);
out1a:
	unregister_blkdev(XT_DISK_MAJOR, "xd");
out1:
	if (xd_dma_buffer)
		xd_dma_mem_free((unsigned long)xd_dma_buffer,
				xd_maxsectors * 0x200);
	return err;
Enomem:
	err = -ENOMEM;
	while (i--)
		put_disk(xd_gendisk[i]);
	goto out3;
}

编写块驱动程序基本框架:
内存模拟块设备

#include <linux/module.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/timer.h>
#include <linux/genhd.h>
#include <linux/hdreg.h>
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/blkdev.h>
#include <linux/blkpg.h>
#include <linux/delay.h>
#include <linux/io.h>

#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/dma.h>

static struct gendisk *ramblock_disk;     //gendisk结构体
static request_queue_t *ramblock_queue;   //分配设置队列返回的值
static int major;  //主设备号,可自己确定也可以由系统自动分配
static DEFINE_SPINLOCK(ramblock_lock);    //自旋锁
#define RAMBLOCK_SIZE 1024*1024//容量的大小
static unsigned char *ramblock_buf;   //硬件操作缓冲区

static int ramblock_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{   /*容量 = heads*cylinders*sectors*512*/
	geo->heads     = 2;                     //磁头数
	geo->cylinders = 32;					//柱面
	geo->sectors   = RAMBLOCK_SIZE/2/32/512; //扇区数
	return 0;
}

static struct block_device_operations ramblock_fops = {
	.owner	= THIS_MODULE,
	.getgeo = ramblock_getgeo,            //为了实现使用fdisk进行分区
};

static void do_ramblock_request(request_queue_t *q){      //处理队列函数
		static 	int  cnt = 0;
		struct request *req;
		
		printk("do_ramblock_request\n");
		while ((req = elv_next_request(q)) != NULL) {    //以电梯调度算法取出队列中的请求
			/*数据进行传输三要素:源,目的,长度*/
			/*源、目的*/
			unsigned long offset = req->sector*512;        //偏移值

			/*目的,源*/
			//req->buffer;

			/*长度*/
			unsigned long len    = req->current_nr_sectors *512; //长度

			if(rq_data_dir(req) == READ){       //如果是读函数
				memcpy(req->buffer,ramblock_buf+offset,len);                       //从磁盘中读到buffer  
			}else{
				memcpy(ramblock_buf+offset,req->buffer,len);                      //写函数,从buffer中写到磁盘
			}


			end_request(req, 1);  		/*  0 = fail, 1 = success */
		}
}


//入口函数
static int ramblock_init(void){
/*1.分配一个gendisk结构体*/
ramblock_disk = alloc_disk(16);    //次设备号个数:分区个数+1,写16最多只能创建15个分区

/*2.设置*/
/*2.1  分配/设置一个队列   提供读写能力*/
ramblock_queue = blk_init_queue(do_ramblock_request,&ramblock_lock);   //do_ramblock__request处理队列函数,ramblock_lock自旋锁
ramblock_disk->queue = ramblock_queue;      	   //设置队列
/*2.2  设置其他属性,比如容量*/
major= register_blkdev(0,"ramblock");     //注册一个主设备号,与字符设备驱动相比少了fileoperation
ramblock_disk->major = major;             //主设备号
ramblock_disk->first_minor = 0;           //第一个次设备号
sprintf(ramblock_disk->disk_name, "ramblock");     //名字
ramblock_disk->fops = &ramblock_fops;              //一个空的操作函数      

set_capacity(ramblock_disk, RAMBLOCK_SIZE/512);    //设置扇区容量,内核中认为扇区是512字节


/*3.硬件相关操作*/
ramblock_buf = kzalloc(RAMBLOCK_SIZE,GFP_KERNEL);


/*4.注册*/
add_disk(ramblock_disk);

return 0;
}

//出口函数
static void ramblock_exit(void){
		unregister_blkdev(major,"ramblock");           //卸载设备
		del_gendisk(ramblock_disk);                    //销毁gendisk
		put_disk(ramblock_disk);
		blk_cleanup_queue(ramblock_queue);             //清除队列

		kfree(ramblock_buf);                           //释放缓冲区
}



module_init(ramblock_init);
module_exit(ramblock_exit);
MODULE_LICENSE("GPL");

修改Makefil,将驱动文件以模块的方式编译进内核;

测试步骤:
1.insmod ramblock.ko
2.ls dev/ramblock*
3.fdisk /dev/ramblock
4.格式化:mkdosfs /dev/ramblock
3.挂接:mount /dev/ramblock /temp/ 挂接到temp目录中去
4.读写文件:cd /temp 在里面读写文件
5.umount /temp
6.cat /dev/ramblock > /mnt/ramblock.bin将整个磁盘映像拷贝到/mnt/ramblock.bin
7.在pc上查看ramblock.bin sudo mount -o loop ramblock.bin /mnt (-o loop 将普通设备当做块设备进行挂接)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值