块设备--块设备的架构

块设备

        字符设备通过init_special_inode 为字符设备设置函数指针。 对于块设备,这部分架构是相同的,也是通过init_special_inode为块设备设置函数指针;不同之处,赋值字符设备的函数结构指针是 def_chr_fops,而赋予块设备的函数指针是 def_blk_fops。

void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
	inode->i_mode = mode;
	if (S_ISCHR(mode)) {
		inode->i_fop = &def_chr_fops;
		inode->i_rdev = rdev;
	} else if (S_ISBLK(mode)) {
		inode->i_fop = &def_blk_fops;
		inode->i_rdev = rdev;
	} else if (S_ISFIFO(mode))
		inode->i_fop = &def_fifo_fops;
	else if (S_ISSOCK(mode))
		inode->i_fop = &bad_sock_fops;
	else
		printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
		       mode);
}
EXPORT_SYMBOL(init_special_inode);

        init_special_inode这个结构,

/*
 * Dummy default file-operations: the only thing this does
 * is contain the open that then fills in the correct operations
 * depending on the special file...
 */
const struct file_operations def_chr_fops = {
	.open = chrdev_open,
	.llseek = noop_llseek,
};

const struct file_operations def_blk_fops = {
	.open		= blkdev_open,
	.release	= blkdev_close,
	.llseek		= block_llseek,
	.read		= generic_file_read,
	.write		= blkdev_file_write,
  	.aio_read	= generic_file_aio_read,
  	.aio_write	= blkdev_file_aio_write, 
	.mmap		= generic_file_mmap,
	.fsync		= block_fsync,
	.unlocked_ioctl	= block_ioctl,
#ifdef CONFIG_COMPAT
	.compat_ioctl	= compat_blkdev_ioctl,
#endif
	.readv		= generic_file_readv,
	.writev		= generic_file_write_nolock,
	.sendfile	= generic_file_sendfile,
	.splice_read	= generic_file_splice_read,
	.splice_write	= generic_file_splice_write,
};

        其中 块设备的结构 会有很多读写等其他操作


9.1 块设备的架构

        块设备常常和磁盘关联在一起,它的使用和管理比字符设备复杂。

9.1.1 块设备、磁盘对象 和队列

        块设备一般总和通用磁盘对象gendisk捆绑在一起。块设备的结构定义如下所示,其中省略了当前不关心的内容:

struct gendisk {
	/* major, first_minor and minors are input parameters only,
	 * don't use directly.  Use disk_devt() and disk_max_parts().
	 */
	int major;			/* major number of driver */
	int first_minor;
	int minors;                     /* maximum number of minors, =1 for
                                         * disks that can't be partitioned. */

	char disk_name[DISK_NAME_LEN];	/* name of major driver */
	char *(*devnode)(struct gendisk *gd, mode_t *mode);

	unsigned int events;		/* supported events */
	unsigned int async_events;	/* async events, subset of all */

	/* Array of pointers to partitions indexed by partno.
	 * Protected with matching bdev lock but stat and other
	 * non-critical accesses use RCU.  Always access through
	 * helpers.
	 */
	struct disk_part_tbl __rcu *part_tbl;
	struct hd_struct part0;

	const struct block_device_operations *fops;
	struct request_queue *queue;
	void *private_data;

	int flags;
	struct device *driverfs_dev;  // FIXME: remove
	struct kobject *slave_dir;

	struct timer_rand_state *random;
	atomic_t sync_io;		/* RAID */
	struct disk_events *ev;
#ifdef  CONFIG_BLK_DEV_INTEGRITY
	struct blk_integrity *integrity;
#endif
	int node_id;
};

        通用磁盘对象是在计算机启动时扫描磁盘 或者 磁盘插入时,内核为物理磁盘创建的数据结构(光盘、磁带设备也用通用磁盘对象表示)。

        通用磁盘创建后,一般要在根目录dev目录下面,创建一个设备文件,这个设备文件被指明为块设备,具有自己的磁盘名。 所以通用磁盘对象创建在前,等用户打开块设备时,会绑定块设备到相关的通用磁盘对象。

        通用磁盘对象的结构定义如下:


        省略部分无关成员;结构成员首先是 主从设备号,然后是磁盘的名字。

         区别 块设备 和 字符设备 的最重要成员就是队列queue。所有对通用磁盘对象的I/O操作都要进入这个队列queue排队,然后再由内核处理。

         这里的块设备队列是笼统的说法,其实块设备块设备使用的队列既包括块设备自身的队列,也包括块设备 隐含的电梯对象的队列

9.1.2 快设备 和 通用磁盘对象的绑定

        通用磁盘对象需要把自身注册到系统的管理链表中,blk_register_region函数实现:

/*
 * Register device numbers dev..(dev+range-1)
 * range must be nonzero
 * The hash chain is sorted on range, so that subranges can override.
 */
void blk_register_region(dev_t devt, unsigned long range, struct module *module,
			 struct kobject *(*probe)(dev_t, int *, void *),
			 int (*lock)(dev_t, void *), void *data)
{
	kobj_map(bdev_map, devt, range, module, probe, lock, data);
}

        kobj_map是一个熟悉的函数,在5章中分析过,作用是把设备号注册到系统的管理链表

        通过设备号获得通用磁盘对象,调用get_gendisk函数:

struct gendisk *get_gendisk(dev_t devt, int *partno)
{
	struct gendisk *disk = NULL;

	if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
		struct kobject *kobj;

		kobj = kobj_lookup(bdev_map, devt, partno);
		if (kobj)
			disk = dev_to_disk(kobj_to_dev(kobj));
	} else {
		struct hd_struct *part;

		mutex_lock(&ext_devt_mutex);
		part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
		if (part && get_disk(part_to_disk(part))) {
			*partno = part->partno;
			disk = part_to_disk(part);
		}
		mutex_unlock(&ext_devt_mutex);
	}

	return disk;
}
EXPORT_SYMBOL(get_gendisk);



#define dev_to_disk(device)	container_of((device), struct gendisk, part0.__dev)




        kobj_lookup也熟悉,根据设备号搜素kobj结构,然后通过container方法获得通用磁盘对象结构指针。

        再次回顾init_special_inode函数对 块设备 的处理代码:

	} else if (S_ISBLK(mode)) {
		inode->i_fop = &def_blk_fops;
		inode->i_rdev = rdev;
	} else if (S_ISFIFO(mode))

        块设备的特殊inode的操作函数结构被设置为 def_blk_fops,而inode自身也保存了设备号,共同的设备号 把设备 和通用磁盘对象关联起来。


9.1.3 块设备的队列 和 队列处理函数

        通用磁盘对象包含一系列queue,块设备的队列其实是借用这个队列,队列结构定义:

struct request_queue
{
	/*
	 * Together with queue_head for cacheline sharing
	 */
	struct list_head	queue_head;
	struct request		*last_merge;
	struct elevator_queue	*elevator;

	/*
	 * the queue request freelist, one for reads and one for writes
	 */
	struct request_list	rq;

	request_fn_proc		*request_fn;
	make_request_fn		*make_request_fn;
	prep_rq_fn		*prep_rq_fn;
	unprep_rq_fn		*unprep_rq_fn;
	merge_bvec_fn		*merge_bvec_fn;
	softirq_done_fn		*softirq_done_fn;
	rq_timed_out_fn		*rq_timed_out_fn;
	dma_drain_needed_fn	*dma_drain_needed;
	lld_busy_fn		*lld_busy_fn;

	/*
	 * Dispatch queue sorting
	 */
	sector_t		end_sector;
	struct request		*boundary_rq;

	/*
	 * Delayed queue handling
	 */
	struct delayed_work	delay_work;

	struct backing_dev_info	backing_dev_info;

	/*
	 * The queue owner gets to use this for whatever they like.
	 * ll_rw_blk doesn't touch it.
	 */
	void			*queuedata;

	/*
	 * queue needs bounce pages for pages above this limit
	 */
	gfp_t			bounce_gfp;

	/*
	 * various queue flags, see QUEUE_* below
	 */
	unsigned long		queue_flags;

	/*
	 * protects queue structures from reentrancy. ->__queue_lock should
	 * _never_ be used directly, it is queue private. always use
	 * ->queue_lock.
	 */
	spinlock_t		__queue_lock;
	spinlock_t		*queue_lock;

	/*
	 * queue kobject
	 */
	struct kobject kobj;

	/*
	 * queue settings
	 */
	unsigned long		nr_requests;	/* Max # of requests */
	unsigned int		nr_congestion_on;
	unsigned int		nr_congestion_off;
	unsigned int		nr_batching;

	void			*dma_drain_buffer;
	unsigned int		dma_drain_size;
	unsigned int		dma_pad_mask;
	unsigned int		dma_alignment;

	struct blk_queue_tag	*queue_tags;
	struct list_head	tag_busy_list;

	unsigned int		nr_sorted;
	unsigned int		in_flight[2];

	unsigned int		rq_timeout;
	struct timer_list	timeout;
	struct list_head	timeout_list;

	struct queue_limits	limits;

	/*
	 * sg stuff
	 */
	unsigned int		sg_timeout;
	unsigned int		sg_reserved_size;
	int			node;
#ifdef CONFIG_BLK_DEV_IO_TRACE
	struct blk_trace	*blk_trace;
#endif
	/*
	 * for flush operations
	 */
	unsigned int		flush_flags;
	unsigned int		flush_pending_idx:1;
	unsigned int		flush_running_idx:1;
	unsigned long		flush_pending_since;
	struct list_head	flush_queue[2];
	struct list_head	flush_data_in_flight;
	struct request		flush_rq;

	struct mutex		sysfs_lock;

#if defined(CONFIG_BLK_DEV_BSG)
	struct bsg_class_device bsg_dev;
#endif

#ifdef CONFIG_BLK_DEV_THROTTLING
	/* Throttle data */
	struct throtl_data *td;
#endif
};

        

这个结构包含了两点:

1)封装了elevator_t指针

2)队列中包含了众多的队列处理函数指针


        块设备一般具有连续读写、随机读写慢的特征【硬盘这种机械装置的物理特性】。所有在块设备队列中排队的读写请求,需要经过elevator_t 进行次序调整,然后才真正由块设备执行读写。

elevator_t结构提供一个框架,这个框架提供不同的调度算法------后文将分析块设备的调度算法;

        在内核的I/O处理流程中,需要调用队列中的处理函数,比如make_request_fn用来将I/O请求插入到队列,而request_fn_proc 则用来从队列中获得一个I/O请求。当完成I/O时,调用软中断处理函数softirq_done_fn处理。   入队列的次序和出队列的次序可能不同,这是I/O调度算法提供的功能。

make_request_fn *make_request_fn;

request_fn_proc            *request_fn;
make_request_fn           *make_request_fn;
prep_rq_fn                     *prep_rq_fn;
unprep_rq_fn                 *unprep_rq_fn;
merge_bvec_fn              *merge_bvec_fn;
softirq_done_fn             *softirq_done_fn;
rq_timed_out_fn            *rq_timed_out_fn;
dma_drain_needed_fn  *dma_drain_needed;
lld_busy_fn                    *lld_busy_fn;

        






  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值