Linux IO Scheduler -- NOOP(no operation)

每个块设备或者块设备的分区,都对应有自身的请求队列(request_queue),
而每个请求队列都可以选择一个IO调度器来协调所递交的request。
IO调度器的基本目的是将请求按照它们对应在块设备上的扇区号进行排列,以减少磁头的移动,提高效率。

每个request_queue都有一个request的队列,队列里的请求将按顺序被响应。
实际上,除了这个队列,每个调度器自身都维护有不同数量的队列,用来对递交上来的request进行处理,
而排在队列最前面的request将适时被移动到request_queue中等待响应。

Noop 调度器(no operations) : 是最简单的IO调度器,它本质上就是一个链表实现的 fifo 队列,并对请求进行简单的合并处理,。
此调度程序最适合于SSD,专为随机访问设备而设计

首先要了解描述elevator的数据结构。
和elevator相关的数据结构有个,一个是elevator_type,一个是elevator_queue,
前者对应一个调度器类型,后者对应一个调度器队列。
如果内核中只有上述四种类型的调度器,则就有四个elevator_type,但是多个块设备(分区)可拥有多个相应的elevator_queue。
两个数据结构中最关键的元素都是struct elevator_ops,该结构定义了一组操作函数,
用来描述请求队列的相关算法,实现对请求的处理。

./include/linux/elevator.h
 8 typedef int (elevator_merge_fn) (struct request_queue *, struct request **,
 9                                  struct bio *);
 10
 11 typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *);
 12
 13 typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int);
 14
 15 typedef int (elevator_allow_merge_fn) (struct request_queue *, struct request *, struct bio *);
 16
 17 typedef int (elevator_dispatch_fn) (struct request_queue *, int);
 18
 19 typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
 20 typedef int (elevator_queue_empty_fn) (struct request_queue *);
 21 typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
 22 typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
 23 typedef int (elevator_may_queue_fn) (struct request_queue *, int);
 24
 25 typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
 26 typedef void (elevator_put_req_fn) (struct request *);
 27 typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
 28 typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
 29
 30 typedef void *(elevator_init_fn) (struct request_queue *);
 31 typedef void (elevator_exit_fn) (struct elevator_queue *);

 33 struct elevator_ops
 34 {
 35         elevator_merge_fn *elevator_merge_fn;
 36         elevator_merged_fn *elevator_merged_fn;
 37         elevator_merge_req_fn *elevator_merge_req_fn;
 38         elevator_allow_merge_fn *elevator_allow_merge_fn;
 39
 40         elevator_dispatch_fn *elevator_dispatch_fn;
 41         elevator_add_req_fn *elevator_add_req_fn;
 42         elevator_activate_req_fn *elevator_activate_req_fn;
 43         elevator_deactivate_req_fn *elevator_deactivate_req_fn;
 44
 45         elevator_queue_empty_fn *elevator_queue_empty_fn;
 46         elevator_completed_req_fn *elevator_completed_req_fn;
 47
 48         elevator_request_list_fn *elevator_former_req_fn;
 49         elevator_request_list_fn *elevator_latter_req_fn;
 50
 51         elevator_set_req_fn *elevator_set_req_fn;
 52         elevator_put_req_fn *elevator_put_req_fn;
 53
 54         elevator_may_queue_fn *elevator_may_queue_fn;
 55
 56         elevator_init_fn *elevator_init_fn;
 57         elevator_exit_fn *elevator_exit_fn;
 58         void (*trim)(struct io_context *);
 59 };

 69 /*
 70  * identifies an elevator type, such as AS or deadline
 71  */
 72 struct elevator_type
 73 {
 74         struct list_head list;
 75         struct elevator_ops ops;
 76         struct elv_fs_entry *elevator_attrs;
 77         char elevator_name[ELV_NAME_MAX];
 78         struct module *elevator_owner;
 79 };
 80
 81 /*
 82  * each queue has an elevator_queue associated with it
 83  */
 84 struct elevator_queue
 85 {
 86         struct elevator_ops *ops;
 87         void *elevator_data;
 88         struct kobject kobj;
 89         struct elevator_type *elevator_type;
 90         struct mutex sysfs_lock;
 91         struct hlist_head *hash;
 92 };

 ./block/elevator.c
 247 int elevator_init(struct request_queue *q, char *name)
 248 {
 249         struct elevator_type *e = NULL;
 250         struct elevator_queue *eq;
 251         int ret = 0;
 252         void *data;
 253         // 初始化请求队列相关属性
 254         INIT_LIST_HEAD(&q->queue_head);
 255         q->last_merge = NULL;
 256         q->end_sector = 0;
 257         q->boundary_rq = NULL;
 258         // 如果有指定名称的IO调度器,则获取与指定名称匹配的IO调度器
 259         if (name) {
 260                 e = elevator_get(name);
 261                 if (!e)
 262                         return -EINVAL;
 263         }
 264         // 如果没有指定名称的IO调度器,存在可选IO调度器,获取可选IO调度器
 265         if (!e && *chosen_elevator) {
 266                 e = elevator_get(chosen_elevator);
 267                 if (!e)
 268                         printk(KERN_ERR "I/O scheduler %s not found\n",
 269                                                         chosen_elevator);
 270         }
 271         // 如果没有指定名称或者可选的IO调度器,获取默认的调度器
 272         if (!e) {
 273                 e = elevator_get(CONFIG_DEFAULT_IOSCHED);
 274                 if (!e) {
 275                         printk(KERN_ERR
 276                                 "Default I/O scheduler not found. " \
 277                                 "Using noop.\n");
 278                         e = elevator_get("noop");
 279                 }
 280         }
 281         // 为调度器分配elevator_queue
 282         eq = elevator_alloc(q, e);
 283         if (!eq)
 284                 return -ENOMEM;
 285         // 从reqest_queue中为调度器数据分配内存
 286         data = elevator_init_queue(q, eq);
 287         if (!data) {
 288                 kobject_put(&eq->kobj);
 289                 return -ENOMEM;
 290         }
 291         // 关联request_queue,elevator_queue
 292         elevator_attach(q, eq, data);
 293         return ret;
 294 }
 295 EXPORT_SYMBOL(elevator_init);
 296
 297 void elevator_exit(struct elevator_queue *e)
 298 {
 299         mutex_lock(&e->sysfs_lock);
 300         if (e->ops->elevator_exit_fn)
 301                 e->ops->elevator_exit_fn(e);
 302         e->ops = NULL;
 303         mutex_unlock(&e->sysfs_lock);
 304
 305         kobject_put(&e->kobj);
 306 }
 307 EXPORT_SYMBOL(elevator_exit);
 
 207 static struct elevator_queue *elevator_alloc(struct request_queue *q,
 208                                   struct elevator_type *e)
 209 {
 210         struct elevator_queue *eq;
 211         int i;
 212         // 为elevator_queue分配内存
 213         eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node);
 214         if (unlikely(!eq))
 215                 goto err;
 216         // 使elevator_type与elevator_queue关联
 217         eq->ops = &e->ops;
 218         eq->elevator_type = e;
 219         kobject_init(&eq->kobj, &elv_ktype);
 220         mutex_init(&eq->sysfs_lock);
 221         // 为elevator_queue hash表分配内存
 222         eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
 223                                         GFP_KERNEL, q->node);
 224         if (!eq->hash)
 225                 goto err;
 226         // 初始化elevator_queue hash表
 227         for (i = 0; i < ELV_HASH_ENTRIES; i++)
 228                 INIT_HLIST_HEAD(&eq->hash[i]);
 229
 230         return eq;
 231 err:
 232         kfree(eq);
 233         elevator_put(e);
 234         return NULL;
 235 }

 175 static void *elevator_init_queue(struct request_queue *q,
 176                                  struct elevator_queue *eq)
 177 {
 178         return eq->ops->elevator_init_fn(q);
 179 }
 180
 181 static void elevator_attach(struct request_queue *q, struct elevator_queue *eq,
 182                            void *data)
 183 {
 184         q->elevator = eq;
 185         eq->elevator_data = data;
 186 }

 ./block/noop-iosched.c
 87 static struct elevator_type elevator_noop = {
 88         .ops = {
 89                 .elevator_merge_req_fn          = noop_merged_requests,
 90                 .elevator_dispatch_fn           = noop_dispatch,
 91                 .elevator_add_req_fn            = noop_add_request,
 92                 .elevator_queue_empty_fn        = noop_queue_empty,
 93                 .elevator_former_req_fn         = noop_former_request,
 94                 .elevator_latter_req_fn         = noop_latter_request,
 95                 .elevator_init_fn               = noop_init_queue,
 96                 .elevator_exit_fn               = noop_exit_queue,
 97         },
 98         .elevator_name = "noop",
 99         .elevator_owner = THIS_MODULE,
100 };
101
102 static int __init noop_init(void)
103 {
104         elv_register(&elevator_noop);
105
106         return 0;
107 }
108
109 static void __exit noop_exit(void)
110 {
111         elv_unregister(&elevator_noop);
112 }
113
114 module_init(noop_init);
115 module_exit(noop_exit);

 68 static void *noop_init_queue(struct request_queue *q)
 69 {
 70         struct noop_data *nd;
 71         // 为noop_data从指点的q->node中分配内存
 72         nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
 73         if (!nd)
 74                 return NULL;
 75         INIT_LIST_HEAD(&nd->queue);
 76         return nd;
 77 }
 10 struct noop_data {
 11         struct list_head queue;
 12 };
 13
 14 static void noop_merged_requests(struct request_queue *q, struct request *rq,
 15                                  struct request *next)
 16 {
 17         list_del_init(&next->queuelist);
 18 }
 19
 20 static int noop_dispatch(struct request_queue *q, int force)
 21 {
 22         struct noop_data *nd = q->elevator->elevator_data;
 23
 24         if (!list_empty(&nd->queue)) {
 25                 struct request *rq;
 26                 rq = list_entry(nd->queue.next, struct request, queuelist);
 27                 list_del_init(&rq->queuelist);
 28                 elv_dispatch_sort(q, rq);
 29                 return 1;
 30         }
 31         return 0;
 32 }
 33  
 34 static void noop_add_request(struct request_queue *q, struct request *rq)
 35 {
 36         struct noop_data *nd = q->elevator->elevator_data;
 37
 38         list_add_tail(&rq->queuelist, &nd->queue);
 39 }
 40
 
./include/linux/blkdev.h
 153 /*
 154  * try to put the fields that are referenced together in the same cacheline.
 155  * if you modify this structure, be sure to check block/blk-core.c:rq_init()
 156  * as well!
 157  */
 158 struct request {
 159         struct list_head queuelist;
 160         struct call_single_data csd;
 161         int cpu;
 162
 163         struct request_queue *q;
 164
 165         unsigned int cmd_flags;
 166         enum rq_cmd_type_bits cmd_type;
 167         unsigned long atomic_flags;
 168
 169         /* the following two fields are internal, NEVER access directly */
 170         sector_t __sector;              /* sector cursor */
 171         unsigned int __data_len;        /* total data len */
 172
 173         struct bio *bio;
 174         struct bio *biotail;
 175
 176         struct hlist_node hash; /* merge hash */
 177         /*
 178          * The rb_node is only used inside the io scheduler, requests
 179          * are pruned when moved to the dispatch queue. So let the
 180          * completion_data share space with the rb_node.
 181          */
 182         union {
 183                 struct rb_node rb_node; /* sort/lookup */
 184                 void *completion_data;
 185         };
 186
 187         /*
 188          * two pointers are available for the IO schedulers, if they need
 189          * more they have to dynamically allocate it.
 190          */
 191         void *elevator_private;
 192         void *elevator_private2;
 193
 194         struct gendisk *rq_disk;
 195         unsigned long start_time;
 196
 197         /* Number of scatter-gather DMA addr+len pairs after
 198          * physical address coalescing is performed.
 199          */
 200         unsigned short nr_phys_segments;
 201
 202         unsigned short ioprio;
 203
 204         void *special;          /* opaque pointer available for LLD use */
 205         char *buffer;           /* kaddr of the current segment if available */
 206
 207         int tag;
 208         int errors;
 209
 210         int ref_count;
 211
 212         /*
 213          * when request is used as a packet command carrier
 214          */
 215         unsigned short cmd_len;
 216         unsigned char __cmd[BLK_MAX_CDB];
 217         unsigned char *cmd;
 218
 219         unsigned int extra_len; /* length of alignment and padding */
 220         unsigned int sense_len;
 221         unsigned int resid_len; /* residual count */
 222         void *sense;
 223
 224         unsigned long deadline;
 225         struct list_head timeout_list;
 226         unsigned int timeout;
 227         int retries;
 228
 229         /*
 230          * completion callback.
 231          */
 232         rq_end_io_fn *end_io;
 233         void *end_io_data;
 234
 235         /* for bidi */
 236         struct request *next_rq;
 237 };
 
 324 struct request_queue
 325 {
 326         /*
 327          * Together with queue_head for cacheline sharing
 328          */
 329         struct list_head        queue_head;
 330         struct request          *last_merge;
 331         struct elevator_queue   *elevator;
 332
 333         /*
 334          * the queue request freelist, one for reads and one for writes
 335          */
 336         struct request_list     rq;
 337
 338         request_fn_proc         *request_fn;
 339         make_request_fn         *make_request_fn;
 340         prep_rq_fn              *prep_rq_fn;
 341         unplug_fn               *unplug_fn;
 342         merge_bvec_fn           *merge_bvec_fn;
 343         prepare_flush_fn        *prepare_flush_fn;
 344         softirq_done_fn         *softirq_done_fn;
 345         rq_timed_out_fn         *rq_timed_out_fn;
 346         dma_drain_needed_fn     *dma_drain_needed;
 347         lld_busy_fn             *lld_busy_fn;
 348
 349         /*
 350          * Dispatch queue sorting
 351          */
 352         sector_t                end_sector;
 353         struct request          *boundary_rq;
 354
 355         /*
 356          * Auto-unplugging state
 357          */
 358         struct timer_list       unplug_timer;
 359         int                     unplug_thresh;  /* After this many requests */
 360         unsigned long           unplug_delay;   /* After this many jiffies */
 361         struct work_struct      unplug_work;
 362
 363         struct backing_dev_info backing_dev_info;
 364
 365         /*
 366          * The queue owner gets to use this for whatever they like.
 367          * ll_rw_blk doesn't touch it.
 368          */
 369         void                    *queuedata;
 370
 371         /*
 372          * queue needs bounce pages for pages above this limit
 373          */
 374         gfp_t                   bounce_gfp;
 375
 376         /*
 377          * various queue flags, see QUEUE_* below
 378          */
 379         unsigned long           queue_flags;
 380
 381         /*
 382          * protects queue structures from reentrancy. ->__queue_lock should
 383          * _never_ be used directly, it is queue private. always use
 384          * ->queue_lock.
 385          */
 386         spinlock_t              __queue_lock;
 387         spinlock_t              *queue_lock;
 388
 389         /*
 390          * queue kobject
 391          */
 392         struct kobject kobj;
 393
 394         /*
 395          * queue settings
 396          */
 397         unsigned long           nr_requests;    /* Max # of requests */
 398         unsigned int            nr_congestion_on;
 399         unsigned int            nr_congestion_off;
 400         unsigned int            nr_batching;
 401
 402         void                    *dma_drain_buffer;
 403         unsigned int            dma_drain_size;
 404         unsigned int            dma_pad_mask;
 405         unsigned int            dma_alignment;
 406
 407         struct blk_queue_tag    *queue_tags;
 408         struct list_head        tag_busy_list;
 409
 410         unsigned int            nr_sorted;
 411         unsigned int            in_flight[2];
 412
 413         unsigned int            rq_timeout;
 414         struct timer_list       timeout;
 415         struct list_head        timeout_list;
 416
 417         struct queue_limits     limits;
 418
 419         /*
 420          * sg stuff
 421          */
 422         unsigned int            sg_timeout;
 423         unsigned int            sg_reserved_size;
 424         int                     node;
 425 #ifdef CONFIG_BLK_DEV_IO_TRACE
 426         struct blk_trace        *blk_trace;
 427 #endif
 428         /*
 429          * reserved for flush operations
 430          */
 431         unsigned int            ordered, next_ordered, ordseq;
 432         int                     orderr, ordcolor;
 433         struct request          pre_flush_rq, bar_rq, post_flush_rq;
 434         struct request          *orig_bar_rq;
 435
 436         struct mutex            sysfs_lock;
 437
 438 #if defined(CONFIG_BLK_DEV_BSG)
 439         struct bsg_class_device bsg_dev;
 440 #endif
 441 };

查看当前系统支持的IO调度算法
dmesg | grep -i scheduler
[    0.207639] io scheduler noop registered
[    0.207641] io scheduler deadline registered
[    0.207675] io scheduler cfq registered (default)

查看当前系统的I/O调度方法:
cat /sys/block/xvda/queue/scheduler
noop deadline [cfq]
可以看出cfq为默认的调度方法

                                                  


  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值