为了实现共享缓冲区的互斥访问,互斥锁是比较直观的方案。
在内核驱动的实现中,为了保证读进程和写进程不相互干扰,锁就成为了至关重要的环节。但是,在性能需求较高的情况下,锁往往成为了性能瓶颈,这也体现在锁的粒度的表现上。锁的粒度太小,则需要频繁地执行加锁和解锁的操作;另一方面,如果锁的粒度太大,往往容易导致某些进程在较长时间内占用了过多的资源,从而导致另一些进程饥饿。
为了满足不同情况下对于锁的需求,内核也给出了一些alternativ options。(参考《Linux Device Driver, V3》)
(1)不加锁算法,也就是我们这里要介绍的环形缓冲区(Kfifo),这个缓冲区只支持单个读进程和单个写进程, 读进程读数据的时候,读指针前移;相应的,写进程写数据的时候,写指针前移。当读指针和写指针相重合的时候,要么缓冲区已经没有数据可读;当写指针刚好出现在读指针的前一位,则表示缓冲区已满。
(2)原子变量,顾名思义,是内核中提供的能够支持原子操作的变量类型atomic_t,定义在<asm/atomic.h>中,在系统中调用ATOMIC_INT(value)来对它进行初始化。可以调用atomic_add(sub)(i, *v),或者atomic_inc(dec)(*v)来执行对于原子变量的操作。
(3)位操作,完成原子位操作,由set_bit,clear_bit, test_and_set_bit等函数来支持这些位操作,定义在<asm/bitops.h>。
(4)seqlocks,seqlocks通常用于保护较小的、较简单的并且进程被访问的资源,并且对于这些资源的写操作很少发生,而每次发生都很快完成。seqlocks允许读进程自由地访问这些资源,然而这些读进程必须能够处理和写进程之间的冲突,当冲突发生的时候,读进程退出并试图重新访问。
(5)RCU(Read-Copy-Update),具体可以参考RCU白皮书(http://www.rdrop.com/users/paulmck/rclock/intro/rclock_intro.html). 通常也是应用在读比写更频繁的场合。在RCU机制下,所有的资源只能通过指针来访问,并且这些资源的地址只能在定义为原子操作的代码中被访问。当数据结构需要发生改变时,写进程生成一份拷贝,对拷贝进行更新,然后将旧数据结构定位到新的拷贝。这个方法在fork写数据的时候有典型的应用!
扯了一堆闲话,现在我们来看下,基于kfifo的内核驱动的实现方式,事实上,对于内核驱动实现的一些框架代码,包括major和minor号等可以参考《Linux Device Driver, V3》的source code,这里不进行赘述。
另外,需要特别只出的是,对于2.6.36或更新版本的内核,kfifo有更为成熟的实现,提供了包括kfifo_to_user和kfifo_from_user这样一些实用函数,所以本文所有代码按照新的内核来编写。
(1)scull_kfifo结构
01 | struct scull_kfifo { |
02 | // unsigned int access_key; /* used by sculluid and scullpriv */ |
03 | struct semaphore sem; /* mutual exclusion semaphore */ |
04 | struct cdev cdev; /* Char device structure */ |
05 | spinlock_t lock; |
06 | struct kfifo* mykfifo; /*The kernel fifo buffer, lock free for one-reader-one-writer, fitting for our prototype*/ |
07 | wait_queue_head_t inq, outq; /* read and write queues */ |
08 | int numWriter; |
09 | int numReader; |
10 | }; |
(2)Open and Release操作
01 | int scull_kfifo_open( struct inode *inode, struct file *filp) |
02 | { |
03 | struct scull_kfifo *dev; /* device information */ |
04 |
05 | dev = container_of(inode->i_cdev, struct scull_kfifo, cdev); |
06 |
07 | // We only permit two different users, of which one is a reader, the other is a writer, |
08 | // to sychronizedly open the device! |
09 | // |
10 | |
11 | if (down_interruptible(&(dev->sem))) |
12 | return -ERESTARTSYS; |
13 | if ((filp->f_flags & O_ACCMODE) == O_WRONLY){ |
14 | if (dev->numWriter == 1){ |
15 | printk(KERN_DEBUG "Writer exist!\n" ); |
16 | up(&(dev->sem)); |
17 | return -EBUSY; |
18 | } |
19 | dev->numWriter++; |
20 | } |
21 |
22 | else if ((filp->f_flags & O_ACCMODE) == O_RDONLY){ |
23 | if (dev->numReader == 1){ |
24 | printk(KERN_DEBUG "Reader exist!\n" ); |
25 | up(&(dev->sem)); |
26 | return -EBUSY; |
27 | } |
28 |
29 | dev->numReader++; |
30 | } |
31 |
32 | |
33 | else if ((filp->f_flags & O_ACCMODE) == O_RDWR){ |
34 | if (dev->numReader == 1){ |
35 | printk(KERN_DEBUG "Reader exist!\n" ); |
36 | up(&(dev->sem)); |
37 | return -EBUSY; |
38 | } else if (dev->numWriter == 1){ |
39 | printk(KERN_DEBUG "Writer exist!\n" ); |
40 | up(&(dev->sem)); |
41 | return -EBUSY; |
42 | } else { |
43 | dev->numWriter++; |
44 | dev->numReader++; |
45 | } |
46 |
47 | } |
48 |
49 | else { |
50 | printk(KERN_DEBUG "Unsupported permission!\n" ); |
51 | up(&(dev->sem)); |
52 | return -ERESTARTSYS; |
53 | } |
54 |
55 | up(&(dev->sem)); |
56 |
57 | filp->private_data = dev; /* for other methods */ |
58 |
59 | return nonseekable_open(inode, filp); /* success */ |
60 | } |
01 | int scull_kfifo_release( struct inode *inode, struct file *filp) |
02 | { |
03 | struct scull_kfifo *dev; |
04 | dev = filp->private_data; |
05 |
06 | down(&dev->sem); |
07 | //return -ERESTARTSYS; |
08 | kfifo_reset(dev->mykfifo); |
09 | |
10 | if ((filp->f_flags & O_ACCMODE) == O_WRONLY){ |
11 | if (dev->numWriter == 0){ |
12 | printk(KERN_DEBUG "Something wrong with the device, there is writer quiting while no writer found!\n" ); |
13 | up(&(dev->sem)); |
14 | return -ERESTARTSYS; |
15 | } |
16 | dev->numWriter--; |
17 | } |
18 |
19 | else if ((filp->f_flags & O_ACCMODE) == O_RDONLY){ |
20 | if (dev->numReader == 0){ |
21 | printk(KERN_DEBUG "Something wrong with the device, there is reader quiting while no reader found!\n" ); |
22 | up(&(dev->sem)); |
23 | return -ERESTARTSYS; |
24 | } |
25 | dev->numReader--; |
26 | } |
27 |
28 | else if ((filp->f_flags & O_ACCMODE) == O_RDWR){ |
29 | if (dev->numReader == 0){ |
30 | printk(KERN_DEBUG "Something wrong with the device, there is reader quiting while no reader found!\n" ); |
31 | up(&(dev->sem)); |
32 | return -ERESTARTSYS; |
33 | } else if (dev->numWriter == 0){ |
34 | printk(KERN_DEBUG "Something wrong with the device, there is reader quiting while no reader found!\n" ); |
35 | up(&(dev->sem)); |
36 | return -ERESTARTSYS; |
37 | } else { |
38 | dev->numWriter--; |
39 | dev->numReader--; |
40 | } |
41 |
42 | } |
43 | |
44 |
45 | up(&dev->sem); |
46 | // printk(KERN_DEBUG"Devices released successfully!\n"); |
47 | return 0; |
48 | } |
(3)Read函数
01 | ssize_t scull_kfifo_read( struct file *filp, char __user *buf, size_t count, |
02 | loff_t *f_pos) |
03 | { |
04 | struct scull_kfifo *dev = filp->private_data; |
05 | ssize_t retval = 0; |
06 | ssize_t copied_count = 0; |
07 | //cycles_t just = 0, now = 0; |
08 |
09 | //if (down_interruptible(&dev->sem)) |
10 | // return -ERESTARTSYS; |
11 | |
12 | if ((filp->f_flags & O_ACCMODE) == O_WRONLY){ |
13 | return -ERESTARTSYS; |
14 | } |
15 |
16 | while (kfifo_is_empty(dev->mykfifo)) { /* nothing to read */ |
17 | // up(&dev->sem); /* release the lock */ |
18 | if (filp->f_flags & O_NONBLOCK) |
19 | return -EAGAIN; |
20 | //printk("\"%s\" reading: going to sleep\n", current->comm); |
21 | if (wait_event_interruptible(dev->inq, (!kfifo_is_empty(dev->mykfifo)))){ |
22 | // printk(KERN_DEBUG"\"%s\" reading: should be blocked because nothing to read\n", current->comm); |
23 | return -ERESTARTSYS; /* signal: tell the fs layer to handle it */ |
24 | } |
25 | /* otherwise loop, but first reacquire the lock */ |
26 | //if (down_interruptible(&dev->sem)) |
27 | //return -ERESTARTSYS; |
28 | } |
29 |
30 | if (count > kfifo_len(dev->mykfifo)) |
31 | count = kfifo_len(dev->mykfifo); |
32 | //count = kfifo_out(dev->tekkamankfifo,tekkamanbuffer, count); |
33 | |
34 | //rdtscll(just); |
35 | if (kfifo_to_user(dev->mykfifo, buf, count, &copied_count)) { |
36 | retval = -EFAULT; |
37 | goto out; |
38 | } |
39 | //rdtscll(now); |
40 | //printk(KERN_DEBUG"The Kfifo copy %d bytes to user causes %lld ticks.\n", count, (now - just)); |
41 | retval = copied_count; |
42 |
43 | out: |
44 | //up(&dev->sem); |
45 | wake_up_interruptible(&dev->outq); /*Await any writer*/ |
46 | // if (printk_ratelimit()) printk("\"%s\" did read %li bytes\n",current->comm, (long)retval); |
47 | return retval; |
48 | } |
(4)Write函数
01 | ssize_t scull_kfifo_write( struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) |
02 | { |
03 | size_t copied_count = 0; |
04 | struct scull_kfifo *dev = filp->private_data; |
05 | ssize_t retval = -ENOMEM; /* value used in "goto out" statements */ |
06 |
07 | //if (down_interruptible(&dev->sem)) |
08 | // return -ERESTARTSYS; |
09 | |
10 | if ((filp->f_flags & O_ACCMODE) == O_RDONLY){ |
11 | return -ERESTARTSYS; |
12 | } |
13 |
14 | if (kfifo_is_full(dev->mykfifo)){ |
15 | if (filp->f_flags & O_NONBLOCK) |
16 | return -EAGAIN; |
17 | //printk("\"%s\" writing: going to sleep\n", current->comm); |
18 | if (wait_event_interruptible(dev->outq, (!kfifo_is_full(dev->mykfifo)))){ |
19 | |
20 | printk(KERN_DEBUG "\"%s\" writing: should be blocked because no space to write\n" , current->comm); |
21 | return -ERESTARTSYS; /* signal: tell the fs layer to handle it */ |
22 | } |
23 | } |
24 | |
25 | if (count > kfifo_avail(dev->mykfifo)) count = kfifo_avail(dev->mykfifo); |
26 | //rdtscll(just); |
27 | if (kfifo_from_user(dev->mykfifo, buf, count, &copied_count)) { |
28 | // up (&dev->sem); |
29 | return -EFAULT; |
30 | } |
31 | //rdtscll(now); |
32 |
33 | //printk(KERN_DEBUG"The Kfifo copy %d bytes from user causes %lld ticks.\n", count, (now - just)); |
34 | |
35 | //count = kfifo_in(dev->tekkamankfifo,tekkamanbuffer, count); |
36 | retval = copied_count; |
37 |
38 | // up(&dev->sem); |
39 | wake_up_interruptible(&dev->inq); /* blocked in read() and select(), Await any reader */ |
40 | // if (printk_ratelimit()) printk("\"%s\" did write %li bytes\n",current->comm, (long)count); |
41 | return retval; |
42 | } |
(5)Setup和Cleanup
01 | void scull_kfifo_cleanup_module( void ) |
02 | { |
03 | dev_t devno = MKDEV(scull_kfifo_major, scull_kfifo_minor); |
04 | int keke = 0xff; |
05 |
06 | /* Get rid of our char dev entries */ |
07 | if (scull_kfifo_devices) { |
08 | if (scull_kfifo_devices->mykfifo) { |
09 | kfifo_free(scull_kfifo_devices->mykfifo); |
10 | kfree(scull_kfifo_devices->mykfifo); |
11 | keke = 0; |
12 | } |
13 | cdev_del(&scull_kfifo_devices->cdev); |
14 | kfree(scull_kfifo_devices); |
15 | } |
16 | #if 1 |
17 | //if (kfifo_buffer && keke) kfree(kfifo_buffer); |
18 | #endif |
19 | //if (tekkaman) kfree(tekkaman); |
20 | /* cleanup_module is never called if registering failed */ |
21 | unregister_chrdev_region(devno, 1); |
22 | } |
01 | static void scull_kfifo_setup_cdev( struct scull_kfifo *dev) |
02 | { |
03 | int err, devno = MKDEV(scull_kfifo_major, scull_kfifo_minor ); |
04 | |
05 | cdev_init(&dev->cdev, &scull_kfifo_fops); |
06 | dev->cdev.owner = THIS_MODULE; |
07 | err = cdev_add (&dev->cdev, devno, 1); |
08 | /* Fail gracefully if need be */ |
09 | if (err) |
10 | printk(KERN_NOTICE "Error %d adding scull_kfifo" , err); |
11 | } |
12 |
13 |
14 | int scull_kfifo_init_module( void ) |
15 | { |
16 | int result; |
17 | dev_t dev = 0; |
18 |
19 | /* |
20 | * Get a range of minor numbers to work with, asking for a dynamic |
21 | * major unless directed otherwise at load time. |
22 | */ |
23 | if (scull_kfifo_major != 0) { |
24 | dev = MKDEV(scull_kfifo_major, scull_kfifo_minor); |
25 | result = register_chrdev_region(dev, 1, "scull_kfifo" ); |
26 | } else { |
27 | result = alloc_chrdev_region(&dev, scull_kfifo_minor, 1, |
28 | "scull_kfifo" ); |
29 | scull_kfifo_major = MAJOR(dev); |
30 | } |
31 | if (result < 0) { |
32 | printk(KERN_WARNING "scull: can't get major %d\n" , scull_kfifo_major); |
33 | return result; |
34 | } |
35 |
36 | /* |
37 | * allocate the devices -- we can't have them static, as the number |
38 | * can be specified at load time |
39 | */ |
40 | scull_kfifo_devices = kmalloc( sizeof ( struct scull_kfifo), GFP_KERNEL); |
41 | if (scull_kfifo_devices == NULL) { |
42 | result = -ENOMEM; |
43 | printk(KERN_ERR "Malloc the device error!" ); |
44 | goto fail; /* Make this more graceful */ |
45 | } |
46 | memset (scull_kfifo_devices, 0, sizeof ( struct scull_kfifo)); |
47 |
48 | /* Initialize each device. */ |
49 | sema_init(&scull_kfifo_devices->sem, 1); |
50 | spin_lock_init (&(scull_kfifo_devices->lock)); |
51 | |
52 | scull_kfifo_devices->mykfifo = kmalloc( sizeof ( struct kfifo), GFP_KERNEL); |
53 | if (scull_kfifo_devices->mykfifo == NULL) { |
54 | result = -ENOMEM; |
55 | printk(KERN_ERR "Malloc the mykfifo error!" ); |
56 | goto fail; /* Make this more graceful */ |
57 | } |
58 | memset (scull_kfifo_devices->mykfifo, 0, sizeof ( struct kfifo)); |
59 |
60 | if ((result = kfifo_alloc(scull_kfifo_devices->mykfifo, BUFSIZE, GFP_KERNEL)) != 0){ |
61 | //result = -ENOMEM; |
62 | printk(KERN_ERR "Mallock the kfifo error!" ); |
63 | goto fail; |
64 | } |
65 | init_waitqueue_head(&(scull_kfifo_devices->inq)); |
66 | init_waitqueue_head(&(scull_kfifo_devices->outq)); |
67 | scull_kfifo_devices->numWriter = 0; |
68 | scull_kfifo_devices->numReader = 0; |
69 | scull_kfifo_setup_cdev(scull_kfifo_devices); |
70 |
71 | return 0; /* succeed */ |
72 |
73 | fail: |
74 | scull_kfifo_cleanup_module(); |
75 | return result; |
76 | } |