Linux驱动开发---块设备驱动

块设备驱动(Linux kernel 4.9.x)

主要结构

  1. gendisk结构体:表示一个独立的磁盘设备(或分区)
1.1 定义如下:
struct gendisk {
		/* major, first_minor and minors are input parameters only,
		 * don't use directly.  Use disk_devt() and disk_max_parts().
		 */
		int major;						/* major number of driver */
		int first_minor;
		int minors;                     /* maximum number of minors, =1 for
											 * disks that can't be partitioned. */

		char disk_name[DISK_NAME_LEN];	/* name of major driver */
		char *(*devnode)(struct gendisk *gd, umode_t *mode);

		unsigned int events;			/* supported events */
		unsigned int async_events;		/* async events, subset of all */

		/* Array of pointers to partitions indexed by partno.
		 * Protected with matching bdev lock but stat and other
		 * non-critical accesses use RCU.  Always access through
		 * helpers.
		 */
		struct disk_part_tbl __rcu *part_tbl;
		struct hd_struct part0;

		const struct block_device_operations *fops;
		struct request_queue *queue;
		void *private_data;
		...
	};
	
1.2 一组操作gendisk函数
	/* 分配gendisk */
	struct gendisk *alloc_disk(int minors);	
	/* 添加gendisk */
	void device_add_disk(struct device *parent, struct gendisk *disk); 
	void add_disk(struct gendisk *disk);
	/* 设置gendisk容量 */
	void set_capacity(struct gendisk *disk, sector_t size);	
	/* 释放gendisk */
	void del_gendisk(struct gendisk *gp);
  1. requestrequest_queuebio结构
2.1 request结构体
struct request {
	struct list_head queuelist;
	union {
		struct call_single_data csd;
		u64 fifo_time;
	};

	struct request_queue *q;
	struct blk_mq_ctx *mq_ctx;

	int cpu;
	unsigned cmd_type;
	unsigned int cmd_flags;		/* op and common flags */
	req_flags_t rq_flags;
	unsigned long atomic_flags;

	/* the following two fields are internal, NEVER access directly */
	unsigned int __data_len;	/* total data len */
	sector_t __sector;		/* sector cursor */

	struct bio *bio;
	struct bio *biotail;
	...
	...
	struct gendisk *rq_disk;
	struct hd_struct *part;
	unsigned long start_time;
	...
	...
	unsigned short ioprio;

	void *special;		/* opaque pointer available for LLD use */

	int tag;
	int errors;

	/*
	 * when request is used as a packet command carrier
	 */
	unsigned char __cmd[BLK_MAX_CDB];
	unsigned char *cmd;
	unsigned short cmd_len;

	unsigned int extra_len;	/* length of alignment and padding */
	unsigned int sense_len;
	unsigned int resid_len;	/* residual count */
	void *sense;

	unsigned long deadline;
	struct list_head timeout_list;
	unsigned int timeout;
	int retries;

	/*
	 * completion callback.
	 */
	rq_end_io_fn *end_io;
	void *end_io_data;

	/* for bidi */
	struct request *next_rq;
};

2.2 request_queue结构体
struct request_queue {
	/*
	* Together with queue_head for cacheline sharing
	*/
	struct list_head	queue_head;
	struct request		*last_merge;
	struct elevator_queue	*elevator;
	int			nr_rqs[2];	/* # allocated [a]sync rqs */
	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */

	/*
	* If blkcg is not used, @q->root_rl serves all requests.  If blkcg
	* is used, root blkg allocates from @q->root_rl and all other
	* blkgs from their own blkg->rl.  Which one to use should be
	* determined using bio_request_list().
	*/
	struct request_list	root_rl;

	request_fn_proc		*request_fn;
	make_request_fn		*make_request_fn;
	prep_rq_fn		*prep_rq_fn;
	unprep_rq_fn		*unprep_rq_fn;
	softirq_done_fn		*softirq_done_fn;
	rq_timed_out_fn		*rq_timed_out_fn;
	dma_drain_needed_fn	*dma_drain_needed;
	lld_busy_fn		*lld_busy_fn;
	init_rq_fn		*init_rq_fn;
	exit_rq_fn		*exit_rq_fn;

	struct blk_mq_ops	*mq_ops;

	unsigned int		*mq_map;

	/* sw queues */
	struct blk_mq_ctx __percpu	*queue_ctx;
	unsigned int		nr_queues;

	/* hw dispatch queues */
	struct blk_mq_hw_ctx	**queue_hw_ctx;
	unsigned int		nr_hw_queues;

	/*
	* Dispatch queue sorting
	*/
	sector_t		end_sector;
	struct request		*boundary_rq;
	...
	/*
	* The queue owner gets to use this for whatever they like.
	* ll_rw_blk doesn't touch it.
	*/
	void			*queuedata;

	/*
	* various queue flags, see QUEUE_* below
	*/
	unsigned long		queue_flags;

	/*
	* ida allocated id for this queue.  Used to index queues from
	* ioctx.
	*/
	int			id;

	/*
	* queue needs bounce pages for pages above this limit
	*/
	gfp_t			bounce_gfp;

	/*
	* protects queue structures from reentrancy. ->__queue_lock should
	* _never_ be used directly, it is queue private. always use
	* ->queue_lock.
	*/
	spinlock_t		__queue_lock;
	spinlock_t		*queue_lock;

	/*
	* queue kobject
	*/
	struct kobject kobj;

	/*
	* mq queue kobject
	*/
	struct kobject mq_kobj;
	...
	...
	/*
	* queue settings
	*/
	unsigned long		nr_requests;	/* Max # of requests */
	unsigned int		nr_congestion_on;
	unsigned int		nr_congestion_off;
	unsigned int		nr_batching;

	unsigned int		dma_drain_size;
	void			*dma_drain_buffer;
	unsigned int		dma_pad_mask;
	unsigned int		dma_alignment;

	struct blk_queue_tag	*queue_tags;
	struct list_head	tag_busy_list;

	unsigned int		nr_sorted;
	unsigned int		in_flight[2];
	/*
	* Number of active block driver functions for which blk_drain_queue()
	* must wait. Must be incremented around functions that unlock the
	* queue_lock internally, e.g. scsi_request_fn().
	*/
	unsigned int		request_fn_active;

	unsigned int		rq_timeout;
	struct timer_list	timeout;
	struct work_struct	timeout_work;
	struct list_head	timeout_list;

	struct list_head	icq_list;
	...
	...
	struct queue_limits	limits;

	/*
	* sg stuff
	*/
	unsigned int		sg_timeout;
	unsigned int		sg_reserved_size;
	int			node;
#ifdef CONFIG_BLK_DEV_IO_TRACE
	struct blk_trace	*blk_trace;
#endif
	/*
	* for flush operations
	*/
	struct blk_flush_queue	*fq;

	struct list_head	requeue_list;
	spinlock_t		requeue_lock;
	struct delayed_work	requeue_work;

	struct mutex		sysfs_lock;

	int			bypass_depth;
	atomic_t		mq_freeze_depth;
	...
	...
	struct rcu_head		rcu_head;
	wait_queue_head_t	mq_freeze_wq;
	struct percpu_ref	q_usage_counter;
	struct list_head	all_q_node;

	struct blk_mq_tag_set	*tag_set;
	struct list_head	tag_set_list;
	struct bio_set		*bio_split;

	bool			mq_sysfs_init_done;

	size_t			cmd_size;
	void			*rq_alloc_data;
};

2.3 bio结构体
struct bio {
	struct bio		*bi_next;	/* request queue link */
	struct block_device	*bi_bdev;
	int			bi_error;
	unsigned int		bi_opf;		/* bottom bits req flags,
						* top bits REQ_OP. Use
						* accessors.
						*/
	unsigned short		bi_flags;	/* status, command, etc */
	unsigned short		bi_ioprio;

	struct bvec_iter	bi_iter;

	/* Number of segments in this BIO after
	* physical address coalescing is performed.
	*/
	unsigned int		bi_phys_segments;

	/*
	* To keep track of the max segment size, we account for the
	* sizes of the first and last mergeable segments in this bio.
	*/
	unsigned int		bi_seg_front_size;
	unsigned int		bi_seg_back_size;

	atomic_t		__bi_remaining;

	bio_end_io_t		*bi_end_io;

	void			*bi_private;
	...
	...

	unsigned short		bi_vcnt;	/* how many bio_vec's */

	/*
	* Everything starting with bi_max_vecs will be preserved by bio_reset()
	*/

	unsigned short		bi_max_vecs;	/* max bvl_vecs we can hold */

	atomic_t		__bi_cnt;	/* pin count */

	struct bio_vec		*bi_io_vec;	/* the actual vec list */

	struct bio_set		*bi_pool;

	/*
	* We can inline a number of vecs at the end of the bio, to avoid
	* double allocations for a small number of bio_vecs. This member
	* MUST obviously be kept at the very end of the bio.
	*/
	struct bio_vec		bi_inline_vecs[0];
};

2.4 bio_vec结构体
struct bio_vec {
	struct page	*bv_page;
	unsigned int	bv_len;
	unsigned int	bv_offset;
};
  1. block_device_operations结构体:类似于字符设备中的file_operations结构体
struct block_device_operations {
		int (*open) (struct block_device *, fmode_t);
		void (*release) (struct gendisk *, fmode_t);
		int (*rw_page)(struct block_device *, sector_t, struct page *, bool);
		int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
		int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
		long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *,
				long);
		unsigned int (*check_events) (struct gendisk *disk,
						unsigned int clearing);
		/* ->media_changed() is DEPRECATED, use ->check_events() instead */
		int (*media_changed) (struct gendisk *);
		void (*unlock_native_capacity) (struct gendisk *);
		int (*revalidate_disk) (struct gendisk *);
		int (*getgeo)(struct block_device *, struct hd_geometry *);
		/* this callback is with swap_lock and sometimes page table lock held */
		void (*swap_slot_free_notify) (struct block_device *, unsigned long);
		struct module *owner;
		const struct pr_ops *pr_ops;
};

其中int (*getgeo)(struct block_device *, struct hd_geometry *);是用来获取驱动器的信息,hd_geometry结构体中包含磁头、扇面、柱面等信息。

驱动的注册/注销、加载/卸载函数

  1. 注册与注销
int register_blkdev(unsigned int major, const char *name);
int unregister_blkdev(unsigned int major, const char *name);
  1. 加载
    1)注册块设备;
    2)
    分配gendisk、初始化(设置)gendisk;
    设置gendisk容量;
    初始化请求队列、设置队列;
    3)添加gendisk。
    NOTE: 2)项内容并无前后顺序。

  2. 卸载
    1)释放gendisk(若有对gendisk的引用,也要释放);
    2)移除请求队列;
    3)注销块设备;
    NOTE: 以上三步并无前后顺序。

  3. 实例:以drivers/block/z2ram.c为例(省略返回值判断)

static int __init z2_init(void)
{
	int ret;
	...
	
	if (register_blkdev(Z2RAM_MAJOR, DEVICE_NAME))			/* 注册块设备 */
	goto err;

	ret = -ENOMEM;
	z2ram_gendisk = alloc_disk(1);							/* 分配gendisk */
	...

	z2_queue = blk_init_queue(do_z2_request, &z2ram_lock);	/* 初始化请求队列 */
	...
	
	z2ram_gendisk->major = Z2RAM_MAJOR;
	z2ram_gendisk->first_minor = 0;
	z2ram_gendisk->fops = &z2_fops;
	sprintf(z2ram_gendisk->disk_name, "z2ram");

	z2ram_gendisk->queue = z2_queue;
	add_disk(z2ram_gendisk);								/* 添加gendisk */
	blk_register_region(MKDEV(Z2RAM_MAJOR, 0), Z2MINOR_COUNT, THIS_MODULE,
				z2_find, NULL, NULL);

	return 0;
	...
}

static void __exit z2_exit(void)
{
	int i, j;
	blk_unregister_region(MKDEV(Z2RAM_MAJOR, 0), Z2MINOR_COUNT);
	unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME);		/* 注销块设备 */
	del_gendisk(z2ram_gendisk);							/* 释放gendisk */
	put_disk(z2ram_gendisk);							/* 释放对gendisk的引用 */
	blk_cleanup_queue(z2_queue);						/* 移除请求队列 */
	...
	...
	return;
} 

块设备的open、release、ioctl等函数

该部分指的是:前面介绍的block_device_operations结构体下指向的函数。

块设备的I/O请求处理

  1. 使用请求队列:对于机械的磁盘设备,有助于提高系统的性能。
  2. 不使用请求队列:如存储卡、RAM盘等完全可真正随机访问的设备,无法从请求队列逻辑获益的块设备。

实例

  1. Ramdisk:利用内存(RAM)模拟磁盘,数据实际存储在内存中,以块设备的方式访问内存。
    /* Ram backed block device driver(drivers/block/brd.c) —不使用请求队列的块设备 */
static struct brd_device *brd_alloc(int i)
{
	struct brd_device *brd;
	struct gendisk *disk;

	brd = kzalloc(sizeof(*brd), GFP_KERNEL);
	...
	...

	brd->brd_queue = blk_alloc_queue(GFP_KERNEL);				/* 分配“请求队列” */
	...

	blk_queue_make_request(brd->brd_queue, brd_make_request);	/* 绑定“制造请求”函数 */
	blk_queue_max_hw_sectors(brd->brd_queue, 1024);
	blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);

	blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);

	brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
	blk_queue_max_discard_sectors(brd->brd_queue, UINT_MAX);
	brd->brd_queue->limits.discard_zeroes_data = 1;
	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
	...
	disk = brd->brd_disk = alloc_disk(max_part);
	...
	disk->major		= RAMDISK_MAJOR;
	disk->first_minor	= i * max_part;
	disk->fops		= &brd_fops;
	disk->private_data	= brd;
	disk->queue		= brd->brd_queue;
	disk->flags		= GENHD_FL_EXT_DEVT;
	sprintf(disk->disk_name, "ram%d", i);
	set_capacity(disk, rd_size * 2);

	return brd;
	...
}
/* 加载函数 */
static int __init brd_init(void)
{
	struct brd_device *brd, *next;
	int i;
	...
	...
	
	if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
		return -EIO;
	...

	for (i = 0; i < rd_nr; i++) {
		brd = brd_alloc(i);
		...
		list_add_tail(&brd->brd_list, &brd_devices);
	}

	/* point of no return */
	list_for_each_entry(brd, &brd_devices, brd_list)
		add_disk(brd->brd_disk);

	blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS,
				THIS_MODULE, brd_probe, NULL, NULL);

	pr_info("brd: module loaded\n");
	return 0;
}		
  1. IDE(Integrated Drive Electronics):集成驱动器电路。原名ATA接口,本意为将硬盘控制器与盘体集成在一起的硬盘驱动器。
    NOTE:关于IDE的代码均在内核目录drivers/ide/下。

小结

  1. 块设备和字符设备的I/O操作区别
    1)块设备只能以块为单位进行输入/输出;而字符设备则以字节为单位
    2)块设备对于I/O请求有缓存区,可以选择以什么顺序进行响应;而字符设备则无需缓冲区,直接读写。
    —块设备的I/O操作中贯穿“请求”,会排队和组合。
    3)块设备可以随机访问(对于磁盘,组织顺序访问有助于提高访问效率);而字符设备只能顺序读写

  2. 块设备驱动程序
    1)驱动的任务处理请求I/O调度算法解决请求的排队和整合。
    2)驱动的核心请求处理函数“制造请求”函数
    3)驱动虽包含block_device_operations结构体(类似于字符设备中的file_operations结构体),但不再包含读写一类的成员函数。仅包含打开、释放及I/O 控制等与具体读写无关的函数。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值