inode决定主设备号和从设备号,保存的是静态的信息,从文件系统的角度出发.
file保存有各种标志,从用户的角度出发,保存的是动态的信息.
file结构
struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
#define f_dentry f_path.dentry
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
/*
* Protects f_ep_links, f_flags
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
struct mutex f_pos_lock;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
u64 f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* needed for tty driver, and maybe others */
void *private_data;
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
struct list_head f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
f_mode, which specifies read FMODE_READ (FMODE_READ) or write (FMODE_WRITE);
f_flags, which specifies the file opening flags (O_RDONLY, O_NONBLOCK, O_SYNC, O_APPEND, O_TRUNC, etc.);
f_op, which specifies the operations associated with the file (pointer to the file_operations structure );
private_data, a pointer that can be used by the programmer to store device-specific data; The pointer will be initialized to a memory location assigned by the programmer.
f_pos, the offset within the file
inode结构
/*
* Keep mostly read-only and often accessed (especially for
* the RCU path lookup and 'stat' data) fields at the beginning
* of the 'struct inode'
*/
struct inode {
umode_t i_mode;
unsigned short i_opflags;
kuid_t i_uid;
kgid_t i_gid;
unsigned int i_flags;
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *i_acl;
struct posix_acl *i_default_acl;
#endif
const struct inode_operations *i_op;
struct super_block *i_sb;
struct address_space *i_mapping;
#ifdef CONFIG_SECURITY
void *i_security;
#endif
/* Stat data, not accessed from path walking */
unsigned long i_ino;
/*
* Filesystems may only read i_nlink directly. They shall use the
* following functions for modification:
*
* (set|clear|inc|drop)_nlink
* inode_(inc|dec)_link_count
*/
union {
const unsigned int i_nlink;
unsigned int __i_nlink;
};
dev_t i_rdev;
loff_t i_size;
struct timespec i_atime;
struct timespec i_mtime;
struct timespec i_ctime;
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
unsigned int i_blkbits;
blkcnt_t i_blocks;
#ifdef __NEED_I_SIZE_ORDERED
seqcount_t i_size_seqcount;
#endif
/* Misc */
unsigned long i_state;
struct mutex i_mutex;
unsigned long dirtied_when; /* jiffies of first dirtying */
struct hlist_node i_hash;
struct list_head i_wb_list; /* backing dev IO list */
struct list_head i_lru; /* inode LRU list */
struct list_head i_sb_list;
union {
struct hlist_head i_dentry;
struct rcu_head i_rcu;
};
u64 i_version;
atomic_t i_count;
atomic_t i_dio_count;
atomic_t i_writecount;
#ifdef CONFIG_IMA
atomic_t i_readcount; /* struct files open RO */
#endif
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
struct file_lock *i_flock;
struct address_space i_data;
#ifdef CONFIG_QUOTA
struct dquot *i_dquot[MAXQUOTAS];
#endif
struct list_head i_devices;
union {
struct pipe_inode_info *i_pipe;
struct block_device *i_bdev;
struct cdev *i_cdev;
};
__u32 i_generation;
#ifdef CONFIG_FSNOTIFY
__u32 i_fsnotify_mask; /* all events this inode cares about */
struct hlist_head i_fsnotify_marks;
#endif
void *i_private; /* fs or device private pointer */
};
实现操作
operation结构
struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
int (*show_fdinfo)(struct seq_file *m, struct file *f);
};
为了实现设备驱动,创建的结构要包含设备的信息,在module中会使用这些信息.对于字符设备而言,这种结构将会包含cdev结构
#include <linux/fs.h>
#include <linux/cdev.h>
struct my_device_data {
struct cdev cdev;
/* my data starts here */
//...
};
open的时候将数据保存到private_data中
static int my_open(struct inode *inode, struct file *file)
{
struct my_device_data *my_data;
my_data = container_of(inode->i_cdev, struct my_device_data, cdev);
file->private_data = my_data;
//...
}
read的时候将private_data转换成my_device_data的类型
static int my_read(struct file *file, char __user *user_buffer, size_t size, loff_t *offset)
{
struct my_device_data *my_data;
my_data = (struct my_device_data *) file->private_data;
//...
}
my_device_data结构包含和设备相关的数据.cdev是字符设备,被用于在系统中对设备进行跟踪和定位.使用inode结构的i_cdev字段正是指向cdev的指针(通过container_of宏).在struct file的private_data字段中存储有信息,所以在read,write,release,等例程中会用到.
从linux源码中,找到contain_of宏的定义如下:
/* given a pointer @ptr to the field @member embedded into type (usually
* struct) @type, return pointer to the embedding instance of @type. */
#define container_of(ptr, type, member) \
((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) //container_of宏会将ptr指向的member强制转换为指向type的指针类型
#endif
inode中i_cdev的定义如下
union {
struct pipe_inode_info *i_pipe;
struct block_device *i_bdev;
struct cdev *i_cdev;
};
注册和取消注册字符设备
注册和取消注册字符设备主要在于确定主设备号和从设别号.dev_t可以用来定位一个设备,因为它可以找到该设备对应的主设备号和从设备号.这个过程可以通过MKDEV宏来完成.major和minor分别对应主和从设备号.
#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi))
对于静态地分配和取消分配设备标识,register_chrdev_region和unregister_chrdev_region方法会被使用.
/**
* register_chrdev_region() - register a range of device numbers
* @from: the first in the desired range of device numbers; must include
* the major number.
* @count: the number of consecutive device numbers required
* @name: the name of the device or driver.
*
* Return value is zero on success, a negative error code on failure.
*/
int register_chrdev_region(dev_t from, unsigned count, const char *name)
{
struct char_device_struct *cd;
dev_t to = from + count;
dev_t n, next;
for (n = from; n < to; n = next) {
next = MKDEV(MAJOR(n)+1, 0);
if (next > to)
next = to;
cd = __register_chrdev_region(MAJOR(n), MINOR(n),
next - n, name);
if (IS_ERR(cd))
goto fail;
}
return 0;
fail:
to = n;
for (n = from; n < to; n = next) {
next = MKDEV(MAJOR(n)+1, 0);
kfree(__unregister_chrdev_region(MAJOR(n), MINOR(n), next - n));
}
return PTR_ERR(cd);
}
/**
* unregister_chrdev_region() - return a range of device numbers
* @from: the first in the range of numbers to unregister
* @count: the number of device numbers to unregister
*
* This function will unregister a range of @count device numbers,
* starting with @from. The caller should normally be the one who
* allocated those numbers in the first place...
*/
void unregister_chrdev_region(dev_t from, unsigned count)
{
dev_t to = from + count;
dev_t n, next;
for (n = from; n < to; n = next) {
next = MKDEV(MAJOR(n)+1, 0);
if (next > to)
next = to;
kfree(__unregister_chrdev_region(MAJOR(n), MINOR(n), next - n));
}
}
人们建议通过alloc_chrdev_region方法对设备标识进行动态分配.
/**
* alloc_chrdev_region() - register a range of char device numbers
* @dev: output parameter for first assigned number
* @baseminor: first of the requested range of minor numbers
* @count: the number of minor numbers required
* @name: the name of the associated device or driver
*
* Allocates a range of char device numbers. The major number will be
* chosen dynamically, and returned (along with the first minor number)
* in @dev. Returns zero or a negative error code.
*/
int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
const char *name)
{
struct char_device_struct *cd;
cd = __register_chrdev_region(0, baseminor, count, name);
if (IS_ERR(cd))
return PTR_ERR(cd);
*dev = MKDEV(cd->major, cd->baseminor);
return 0;
}
示例:
#include <linux/fs.h>
...
err = register_chrdev_region(MKDEV(my_major, my_first_minor), my_minor_count,
"my_device_driver");
if (err != 0) {
/* report error */
return err;
}
...
其中,my_minor_count保留有my_minor_count个设备,开始于my_major主设备号和my_first_minor从设备号.(如果达到了从设备号的最大值,那么将会从下一个主设备号开始)
在主从设备号分配完毕之后,字符设备将要被初始化.初始化的时候使用cdev_init方法
/**
* cdev_init() - initialize a cdev structure
* @cdev: the structure to initialize
* @fops: the file_operations for this device
*
* Initializes @cdev, remembering @fops, making it ready to add to the
* system with cdev_add().
*/
void cdev_init(struct cdev *cdev, const struct file_operations *fops)
{
memset(cdev, 0, sizeof *cdev);
INIT_LIST_HEAD(&cdev->list);
kobject_init(&cdev->kobj, &ktype_cdev_default);
cdev->ops = fops;
}
static inline void INIT_LIST_HEAD(struct list_head *list)
{
list->next = list;
list->prev = list;
}
struct list_head {
struct list_head *next, *prev;
};
/**
* kobject_init - initialize a kobject structure
* @kobj: pointer to the kobject to initialize
* @ktype: pointer to the ktype for this kobject.
*
* This function will properly initialize a kobject such that it can then
* be passed to the kobject_add() call.
*
* After this function is called, the kobject MUST be cleaned up by a call
* to kobject_put(), not by a call to kfree directly to ensure that all of
* the memory is cleaned up properly.
*/
void kobject_init(struct kobject *kobj, struct kobj_type *ktype)
{
char *err_str;
if (!kobj) {
err_str = "invalid kobject pointer!";
goto error;
}
if (!ktype) {
err_str = "must have a ktype to be initialized properly!\n";
goto error;
}
if (kobj->state_initialized) {
/* do not error out as sometimes we can recover */
printk(KERN_ERR "kobject (%p): tried to init an initialized "
"object, something is seriously wrong.\n", kobj);
dump_stack();
}
kobject_init_internal(kobj);
kobj->ktype = ktype;
return;
error:
printk(KERN_ERR "kobject (%p): %s\n", kobj, err_str);
dump_stack();
}
static void kobject_init_internal(struct kobject *kobj)
{
if (!kobj)
return;
kref_init(&kobj->kref);
INIT_LIST_HEAD(&kobj->entry);
kobj->state_in_sysfs = 0;
kobj->state_add_uevent_sent = 0;
kobj->state_remove_uevent_sent = 0;
kobj->state_initialized = 1;
}
/**
* kref_init - initialize object.
* @kref: object in question.
*/
static inline void kref_init(struct kref *kref)
{
atomic_set(&kref->refcount, 1);
}
#define atomic_set(v, i) ((v)->counter = (i))
cdev_init的执行,正是为了执行cdev_add做准备的.cdev_add方法如下.
/**
* cdev_add() - add a char device to the system
* @p: the cdev structure for the device
* @dev: the first device number for which this device is responsible
* @count: the number of consecutive minor numbers corresponding to this
* device
*
* cdev_add() adds the device represented by @p to the system, making it
* live immediately. A negative error code is returned on failure.
*/
int cdev_add(struct cdev *p, dev_t dev, unsigned count)
{
int error;
p->dev = dev;
p->count = count;
error = kobj_map(cdev_map, dev, count, NULL,
exact_match, exact_lock, p);
if (error)
return error;
kobject_get(p->kobj.parent);
return 0;
}
/**
* kobject_get - increment refcount for object.
* @kobj: object.
*/
struct kobject *kobject_get(struct kobject *kobj)
{
if (kobj)
kref_get(&kobj->kref);
return kobj;
}
/**
* kref_get - increment refcount for object.
* @kref: object.
*/
static inline void kref_get(struct kref *kref)
{
/* If refcount was 0 before incrementing then we have a race
* condition when this kref is freeing by some other thread right now.
* In this case one should use kref_get_unless_zero()
*/
WARN_ON_ONCE(atomic_inc_return(&kref->refcount) < 2);
}
使用cdev_del方法可以删除一个设备
/**
* cdev_del() - remove a cdev from the system
* @p: the cdev structure to be removed
*
* cdev_del() removes @p from the system, possibly freeing the structure
* itself.
*/
void cdev_del(struct cdev *p)
{
cdev_unmap(p->dev, p->count);
kobject_put(&p->kobj);
}
/**
* kobject_put - decrement refcount for object.
* @kobj: object.
*
* Decrement the refcount, and if 0, call kobject_cleanup().
*/
void kobject_put(struct kobject *kobj)
{
if (kobj) {
if (!kobj->state_initialized)
WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
"initialized, yet kobject_put() is being "
"called.\n", kobject_name(kobj), kobj);
kref_put(&kobj->kref, kobject_release);
}
}
/**
* kref_put - decrement refcount for object.
* @kref: object.
* @release: pointer to the function that will clean up the object when the
* last reference to the object is released.
* This pointer is required, and it is not acceptable to pass kfree
* in as this function. If the caller does pass kfree to this
* function, you will be publicly mocked mercilessly by the kref
* maintainer, and anyone else who happens to notice it. You have
* been warned.
*
* Decrement the refcount, and if 0, call release().
* Return 1 if the object was removed, otherwise return 0. Beware, if this
* function returns 0, you still can not count on the kref from remaining in
* memory. Only use the return value if you want to see if the kref is now
* gone, not present.
*/
static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref))
{
return kref_sub(kref, 1, release);
}
static inline int kref_sub(struct kref *kref, unsigned int count,
void (*release)(struct kref *kref))
{
WARN_ON(release == NULL);
if (atomic_sub_and_test((int) count, &kref->refcount)) {
release(kref);
return 1;
}
return 0;
}
#include <linux/cdev.h>
void cdev_init(struct cdev *cdev, struct file_operations *fops);
int cdev_add(struct cdev *dev, dev_t num, unsigned int count);
void cdev_del(struct cdev *dev);
接下来的例子注册和初始化MY_MAX_MINORS个设备
#include <linux/fs.h>
#include <linux/cdev.h>
#define MY_MAJOR 42
#define MY_MAX_MINORS 5
struct my_device_data {
struct cdev cdev;
/* my data starts here */
//...
};
struct my_device_data devs[MY_MAX_MINORS];
const struct file_operations my_fops = {
.owner = THIS_MODULE,
.open = my_open,
.read = my_read,
.write = my_write,
.release = my_release,
.unlocked_ioctl = my_ioctl
};
int init_module(void)
{
int i, err;
err = register_chrdev_region(MKDEV(MY_MAJOR, 0), MY_MAX_MINORS,
"my_device_driver");
if (err != 0) {
/* report error */
return err;
}
for(i = 0; i < MY_MAX_MINORS; i++) {
/* initialize devs[i] fields */
cdev_init(&devs[i].cdev, &my_fops);
cdev_add(&devs[i].cdev, MKDEV(MY_MAJOR, i), 1);
}
return 0;
}
接下来是前面的逆操作
void cleanup_module(void)
{
int i;
for(i = 0; i < MY_MAX_MINORS; i++) {
/* release devs[i] fields */
cdev_del(&devs[i].cdev);
}
unregister_chrdev_region(MKDEV(MY_MAJOR, 0), MY_MAX_MINORS);
}
访问进程的地址空间
设备的驱动是应用程序和硬件之间的接口.所以,我们不得不访问一个给定的用户空间的.如果我们不使用一个用户空间的指针,那么我们将不能够直接访问进程的地址空间.如果直接访问用户空间指针,那么将会出现预想不到的情况(根据不同的架构,用户模式的指针可能不会被映射到内核空间).正确的访问用户空间数据的方式通过调用下面的宏/函数 来完成.
#include <asm/uaccess.h>
put_user(type val, type *address);
get_user(type val, type *address);
unsigned long copy_to_user(void __user *to, const void *from, unsigned long n);
unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
#define put_user __put_user
#define get_user __get_user
/*
* The "__xxx" versions do not do address space checking, useful when
* doing multiple accesses to the same area (the user has to do the
* checks by hand with "access_ok()")
*/
#define __put_user(x,ptr) __put_user_nocheck((x), (ptr), sizeof(*(ptr)))
#define __get_user(x,ptr) __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
#define __get_user_nocheck(x,ptr,size) \
({ \
long __gu_err; \
unsigned long __gu_val; \
const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
__chk_user_ptr(ptr); \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
(x) = (__typeof__(*(ptr)))__gu_val; \
__gu_err; \
})
#define __put_user_nocheck(x,ptr,size) \
({ \
long __pu_err; \
__typeof__(*(ptr)) __user *__pu_addr = (ptr); \
__typeof__(*(ptr)) __pu_val = x; \
__chk_user_ptr(ptr); \
__put_user_size(__pu_val, __pu_addr, (size), __pu_err); \
__pu_err; \
})
copy_to_user
#define copy_to_user(to, from, n) \
({ \
void __user *__cu_to = (to); \
const void *__cu_from = (from); \
long __cu_len = (n); \
\
if (__access_ok(__cu_to, __cu_len, get_fs())) \
__cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \
__cu_len; \
})
copy_from_user
#define copy_from_user _copy_from_user
_copy_from_user(void *to, const void __user *from, unsigned long n)
{
if (access_ok(VERIFY_READ, from, n))
n = __copy_from_user(to, from, n);
else
memset(to, 0, n);
return n;
}
/*
* __copy_from_user/copy_to_user are based on ones in asm-generic/uaccess.h
*
* C6X supports unaligned 32 and 64 bit loads and stores.
*/
static inline __must_check long __copy_from_user(void *to,
const void __user *from, unsigned long n)
{
u32 tmp32;
u64 tmp64;
if (__builtin_constant_p(n)) {
switch (n) {
case 1:
*(u8 *)to = *(u8 __force *)from;
return 0;
case 4:
asm volatile ("ldnw .d1t1 *%2,%0\n"
"nop 4\n"
"stnw .d1t1 %0,*%1\n"
: "=&a"(tmp32)
: "A"(to), "a"(from)
: "memory");
return 0;
case 8:
asm volatile ("ldndw .d1t1 *%2,%0\n"
"nop 4\n"
"stndw .d1t1 %0,*%1\n"
: "=&a"(tmp64)
: "a"(to), "a"(from)
: "memory");
return 0;
default:
break;
}
}
memcpy(to, (const void __force *)from, n);
return 0;
}
下面的是一个例子
#include <asm/uaccess.h>
/*
* Copy at most size bytes to user space.
* Return ''0'' on success and some other value on error.
*/
if (copy_to_user(user_buffer, kernel_buffer, size))
return -EFAULT;
else
return 0;
打开和释放
open方法执行设备的初始化操作.在大多数的例子中,这些操作总是伴随着初始化设备和填充数据.释放函数用来释放和设备相关的资源,解锁相关的数据和在最后一个访问关闭了之后关闭设备.
在大多数例子中,open函数将会包含如下这样的结构:
static int my_open(struct inode *inode, struct file *file)
{
struct my_device_data *my_data =
container_of(inode->i_cdev, struct my_device_data, cdev);
/* validate access to device */
file->private_data = my_data;
/* initialize device */
...
return 0;
}
当open函数接触control的时候问题就出现了.有时设备在一个特定的时间段只能打开一次.尤其是,在释放之前不允许进行第二次的打开.为了实现这个约束,对于一个已经打开的设备,要使用一种方式来对该设备的open调用进行处理.
在用户空间的对设备进行open和close的函数,可以调用my_open
和my_release
int fd = open("/dev/my_device", O_RDONLY);
if (fd < 0) {
/* handle error */
}
/* do work */
//..
close(fd);
读和写
用户空间的程序调用read或者write系统调用的时候,read和write操作将会传到设备驱动那里.
if (read(fd, buffer, size) < 0) {
/* handle error */
}
if (write(fd, buffer, size) < 0) {
/* handle error */
}
read和write函数在设备和用户空间之间传送数据:read函数从设备中读取数据并且将数据传递到用户空间.write函数读取用户空间的数据并且将该数据写入到设备当中.buffer是一个参数,其作为用户空间的指针而存在,这就是需要使用copy_to_user或者copy_from_user函数的原因.
read后者write函数的返回值可以是:
传送的字节的数目;如果返回值小于参数size的值,那么意味着只是传送了一部分数据.大多数情况下,用户空间的应用程序调用系统调用write或者read函数直到传送数据的数目满足请求数据的数目.
文件的末尾是0,这样是为了便于read操作.如果write返回0,那么说明没有写入任何一个字节并且也没有发生错误.那么这种情况下,用户空间的应用程序重新执行wirte调用.
负数可能会支出错误的代码.
- 在buffer联合设备之间传输尽可能多字节数的的数据(写入到设备当中或者从设备中读取数据到buffer中)
- 更新offset来决定下次传输的位置
- 返回传输的数据的字节数
下面的例子中展示了read函数的例子,这将内部缓冲区大小,用户缓冲区大小和offset考虑在内.
static int my_read(struct file *file, char __user *user_buffer,
size_t size, loff_t *offset)
{
struct my_device_data *my_data = (struct my_device_data *) file->private_data;
ssize_t len = min(my_data->size - *offset, size);//这里my_data->size - *offset表示设备中还需要传送多大的数据量,size表示用户区缓存的大小
if (len <= 0)
return 0;
/* read data from my_data->buffer to user buffer */
if (copy_to_user(user_buffer, my_data->buffer + *offset, len))//从设备缓存中传送len大小的数据到用户缓存中
return -EFAULT;
*offset += len;//更改偏移
return len;
}
下面的图片展示了读取操作,也展示了数据在用户空间和driver之间是如何进行传输的.
当然了,有两种情况需要我们考虑.
1.设备缓存中的数据量大于用户空间缓存的大小.那么将会从设备缓存的offset偏移处,传输size(用户空间的缓存大小)大小的数据量.
2.设备缓存中的数量量小于用户空间缓存的大小.那么会将设备缓存中全部的数据量传输到用户空间的缓存中,在完成传输之后,用户空间缓存有富余.
我们可以读取操作中看出设备驱动的读取操作根据用户空间的读取请求做出响应.在这个例子中,设备驱动在根据读取的数据量的大小对offset指向的内容进行维护,并且返回读取的数据量的大小.
write函数的操作是类似的
static int my_write(struct file *file, const char __user *user_buffer,
size_t size, loff_t * offset)
{
struct my_device_data *my_data = (struct my_device_data *) file->private_data;
ssize_t len = min(my_data->size - *offset, size);
if (len <= 0)
return 0;
/* read data from user buffer to my_data->buffer */
if (copy_from_user(my_data->buffer + *offset, user_buffer, len))
return -EFAULT;
*offset += len;
return len;
}
write操作也会对来自用户空间的write请求做出响应.在这个例子中,根据设备驱动容量的最大值(MAXSIZ),可能会写入多于或者小于请求数据的大小.
ioctl
除了read和write操作,设备驱动需要具备执行特定物理设备控制任务的能力.这些操作通过实现ioctl函数来完成.刚开始,ioctl系统调用使用Big Kernel Lock.这就是这种系统调用逐渐被unlock的版本(unlocked_ioctl)取代的原因.
static long ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct IR *ir = filep->private_data;
unsigned long __user *uptr = (unsigned long __user *)arg;
int result;
unsigned long mode, features;
features = ir->l.features;
switch (cmd) {
case LIRC_GET_LENGTH:
result = put_user(13UL, uptr);
break;
case LIRC_GET_FEATURES:
result = put_user(features, uptr);
break;
case LIRC_GET_REC_MODE:
if (!(features&LIRC_CAN_REC_MASK))
return -ENOSYS;
result = put_user(LIRC_REC2MODE
(features&LIRC_CAN_REC_MASK),
uptr);
break;
case LIRC_SET_REC_MODE:
if (!(features&LIRC_CAN_REC_MASK))
return -ENOSYS;
result = get_user(mode, uptr);
if (!result && !(LIRC_MODE2REC(mode) & features))
result = -EINVAL;
break;
case LIRC_GET_SEND_MODE:
if (!(features&LIRC_CAN_SEND_MASK))
return -ENOSYS;
result = put_user(LIRC_MODE_LIRCCODE, uptr);
break;
case LIRC_SET_SEND_MODE:
if (!(features&LIRC_CAN_SEND_MASK))
return -ENOSYS;
result = get_user(mode, uptr);
if (!result && mode != LIRC_MODE_LIRCCODE)
return -EINVAL;
break;
default:
return -EINVAL;
}
return result;
}
#define get_user(x, ptr) \
({ \
typeof(ptr) __pu_ptr = (ptr); \
__chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \
x = ACCESS_ONCE(*(__pu_ptr)); \
0; \
})
#define get_user(x, ptr) \
({ \
typeof(ptr) __pu_ptr = (ptr); \
__chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \
x = ACCESS_ONCE(*(__pu_ptr)); \
0; \
})
static inline void __chk_user_ptr(const volatile void *p, size_t size)
{
assert(p >= __user_addr_min && p + size <= __user_addr_max);
}
cmd是来自用户空间的命令,如果值被送到用户空间调用,那么将会被直接执行.如果获取到了buffer,那么arg成为指向这个用户空间buffer的指针,然后通过copy_to_user或者copy_from_user进行访问.
在实现ioctl函数之前,和commands相关的数字代码需要被选择.一种方法是从0开始选择连续的numbers,但是建议使用_IOC(dir,type,nr,size)宏定义来生成ioctl代码.这个宏定义如下所示
#ifndef _SPARC_IOCTL_H
#define _SPARC_IOCTL_H
/*
* Our DIR and SIZE overlap in order to simulteneously provide
* a non-zero _IOC_NONE (for binary compatibility) and
* 14 bits of size as on i386. Here's the layout:
*
* 0xE0000000 DIR
* 0x80000000 DIR = WRITE
* 0x40000000 DIR = READ
* 0x20000000 DIR = NONE
* 0x3FFF0000 SIZE (overlaps NONE bit)
* 0x0000FF00 TYPE
* 0x000000FF NR (CMD)
*/
#define _IOC_NRBITS 8
#define _IOC_TYPEBITS 8
#define _IOC_SIZEBITS 13 /* Actually 14, see below. */
#define _IOC_DIRBITS 3
#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1)
#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1)
#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1)
#define _IOC_XSIZEMASK ((1 << (_IOC_SIZEBITS+1))-1)
#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1)
#define _IOC_NRSHIFT 0
#define _IOC_TYPESHIFT (_IOC_NRSHIFT + _IOC_NRBITS)
#define _IOC_SIZESHIFT (_IOC_TYPESHIFT + _IOC_TYPEBITS)
#define _IOC_DIRSHIFT (_IOC_SIZESHIFT + _IOC_SIZEBITS)
#define _IOC_NONE 1U
#define _IOC_READ 2U
#define _IOC_WRITE 4U
#define _IOC(dir,type,nr,size) \
(((dir) << _IOC_DIRSHIFT) | \
((type) << _IOC_TYPESHIFT) | \
((nr) << _IOC_NRSHIFT) | \
((size) << _IOC_SIZESHIFT))
- dir represents the data transfer (_IOC_NONE , _IOC_READ, _IOC_WRITE).
- type represents the magic number (Documentation/ioctl/ioctl-number.txt);
- nr is the ioctl code for the device;
- size is the size of the transferred data.
可以看出,dir代表数据传送的方向.type代表的而是魔数,nr代表ioctl代码,size代表传输数据的大小.
下面的例子展示了ioctl函数的使用
#include <asm/ioctl.h>
#define MY_IOCTL_IN _IOC(_IOC_WRITE, 'k', 1, sizeof(my_ioctl_data))
static long my_ioctl (struct file *file, unsigned int cmd, unsigned long arg)
{
struct my_device_data *my_data =
(struct my_device_data*) file->private_data;
my_ioctl_data mid;
switch(cmd) {
case MY_IOCTL_IN:
if( copy_from_user(&mid, (my_ioctl_data *) arg,
sizeof(my_ioctl_data)) )
return -EFAULT;
/* process data and execute command */
break;
default:
return -ENOTTY;
}
return 0;
}
在用户空间调用ioctl函数,设备驱动的my_ioctl函数也将会被调用,例子如下.
if (ioctl(fd, MY_IOCTL_IN, buffer) < 0) {
/* handle error */
}
等待队列
线程有必要等待操作的结束,但是理想状态是这种等待不是忙等(因为忙等会占用cpu资源).使用等待队列,我们可以在一个事件到来之前将线程阻塞.当条件满足的时候,无论是在内核,在其他的进程中,还是在中断或者延迟工作中,我们可以对其进行唤醒.
等待队列是正在等待特定事件的进程的列表.通过wait_queue_head_t类型可以对这样的队列进行定义.可以通过函数/宏来进行使用.
#include <linux/wait.h>
DECLARE_WAIT_QUEUE_HEAD(wq_name);
void init_waitqueue_head(wait_queue_head_t *q);
int wait_event(wait_queue_head_t q, int condition);
int wait_event_interruptible(wait_queue_head_t q, int condition);
int wait_event_timeout(wait_queue_head_t q, int condition, int timeout);
int wait_event_interruptible_timeout(wait_queue_head_t q, int condition, int timeout);
void wake_up(wait_queue_head_t *q);
void wake_up_interruptible(wait_queue_head_t *q);
/**
* wait_event_interruptible - sleep until a condition gets true
* @wq: the waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
* @condition evaluates to true or a signal is received.
* The @condition is checked each time the waitqueue @wq is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function will return -ERESTARTSYS if it was interrupted by a
* signal and 0 if @condition evaluated to true.
*/
#define wait_event_interruptible(wq, condition) \
({ \
int __ret = 0; \
if (!(condition)) \
__ret = __wait_event_interruptible(wq, condition); \
__ret; \
})
const struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
.read = new_sync_read,
.read_iter = generic_file_read_iter,
.mmap = generic_file_readonly_mmap,
.splice_read = generic_file_splice_read,
};
ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
return -EFAULT;
ret = rw_verify_area(READ, file, pos, count);
if (ret >= 0) {
count = ret;
if (file->f_op->read)
ret = file->f_op->read(file, buf, count, pos);
else if (file->f_op->aio_read)
ret = do_sync_read(file, buf, count, pos);
else
ret = new_sync_read(file, buf, count, pos);
if (ret > 0) {
fsnotify_access(file);
add_rchar(current, ret);
}
inc_syscr(current);
}
return ret;
}
struct __wait_queue_head {
spinlock_t lock;
struct list_head task_list;
};
typedef struct __wait_queue_head wait_queue_head_t;
#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
#define cmpxchg(ptr, o, n) \
((__typeof__(*(ptr))) __cmpxchg((unsigned long *)(ptr), \
(unsigned long)(o), \
(unsigned long)(n)))
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
{
unsigned long prev;
/*
* Explicit full memory barrier needed before/after as
* LLOCK/SCOND thmeselves don't provide any such semantics
*/
smp_mb();
__asm__ __volatile__(
"1: llock %0, [%1] \n"
" brne %0, %2, 2f \n"
" scond %3, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(prev) /* Early clobber, to prevent reg reuse */
: "r"(ptr), /* Not "m": llock only supports reg direct addr mode */
"ir"(expected),
"r"(new) /* can't be "ir". scond can't take LIMM for "b" */
: "cc", "memory"); /* so that gcc knows memory is being written here */
smp_mb();
return prev;
}
完整的字符设备驱动代码
/*
* Character device drivers lab
*
* All tasks
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include "../include/so2_cdev.h"
MODULE_DESCRIPTION("SO2 character device");
MODULE_AUTHOR("SO2");
MODULE_LICENSE("GPL");
#define LOG_LEVEL KERN_DEBUG
#define MY_MAJOR 42
#define MY_MINOR 0
#define NUM_MINORS 1
#define MODULE_NAME "so2_cdev"
#define MESSAGE "hello\n"
#define IOCTL_MESSAGE "Hello ioctl"
#ifndef BUFSIZ
#define BUFSIZ 4096
#endif
struct so2_device_data {
/* TODO 2: add cdev member */
struct cdev cdev;
/* TODO 4: add buffer with BUFSIZ elements */
char buffer[BUFSIZ];
size_t size;
/* TODO 7: extra members for home */
wait_queue_head_t wq;
int flag;
/* TODO 3: add atomic_t access variable to keep track if file is opened */
atomic_t access;
};
struct so2_device_data devs[NUM_MINORS];
static int so2_cdev_open(struct inode *inode, struct file *file)
{
struct so2_device_data *data;
/* TODO 3: inode->i_cdev contains our cdev struct, use container_of to obtain a pointer to so2_device_data */
data = container_of(inode->i_cdev,struct so2_device_data,cdev);
file->private_data = data;
/* TODO 3: return immediately if access is != 0, use atomic_cmpxchg */
if(atomic_cmpxchg(&data->access,0,1) != 0)
return -EBUSY;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(10);
return 0;
}
static int
so2_cdev_release(struct inode *inode, struct file *file)
{
#ifndef EXTRA
struct so2_device_data *data =
(struct so2_device_data *) file->private_data;
/* TODO 3: reset access variable to 0, use atomic_set */
atomic_set(&data->access,0);
#endif
return 0;
}
static ssize_t
so2_cdev_read(struct file *file,
char __user *user_buffer,
size_t size, loff_t *offset)
{
struct so2_device_data *data =
(struct so2_device_data *) file->private_data;
size_t to_read;
#ifdef EXTRA
/* TODO 7: extra tasks for home */
if(!data->size){
if(file->f_flag & O_NONBLOCK)
return -EAGAIN;
if(wait_event_interruptible(data->wq,data->size!=0) !=0)//第二个参数是condition,condition满足的时候这个函数的返回值是0
return -ERESTARTSYS;
}
#endif
/* TODO 4: Copy data->buffer to user_buffer, use copy_to_user */
to_read = (size > (data->size - *offset)) ? (data->size - *offset) : size;
if(copy_to_user(user_buffer,data->buffer + *offset,to_read) != 0)
return -EFAULT;
*offset += to_read;
return to_read;
}
static ssize_t
so2_cdev_write(struct file *file,
const char __user *user_buffer,
size_t size, loff_t *offset)
{
struct so2_device_data *data =
(struct so2_device_data *) file->private_data;
/* TODO 5: copy user_buffer to data->buffer, use copy_from_user */
size = (*offset + size > BUFSIZ)? (BUFSIZ - *offset) : size;
/* TODO 7: extra tasks for home */
if(copy_from_user(data->buffer + *offset,user_buffer,size) != 0)
return -EFAULT;
*offset += size;
data->size = *offset;
return size;
}
static long
so2_cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct so2_device_data *data =
(struct so2_device_data *) file->private_data;
int ret = 0;
int remains;
switch (cmd) {
/* TODO 6: if cmd = MY_IOCTL_PRINT, display IOCTL_MESSAGE */
case MY_IOCTL_PRINT:
printk(LOG_LEVEL "%s\n",IOCTL_MESSAGE);
break;
/* TODO 7: extra tasks, for home */
case MY_IOCTL_DOWN:
data->flag = 0;
ret = wait_event_interruptible(data->wq,data->flag!=0);
break;
case MY_IOCTL_UP:
data->flag = 1;
wake_up_interruptible(&data->wq);
break;
case MY_IOCTL_SET_BUFFER:
remains = copy_from_user(data->buffer,(char __user *)arg,BUFFER_SIZE);
if(remains)
return -EFAULT;
data->size = BUFFER_SIZE - remains;
break;
case MY_IOCTL_GET_BUFFER:
if(copy_to_user((char __user *)arg,data->buffer,data->size))
return -EFAULT;
break;
default:
ret = -EINVAL;
}
return ret;
}
static const struct file_operations so2_fops = {
.owner = THIS_MODULE,
/* TODO 2: add open, release, read, write functions */
.unlocked_ioctl = so2_cdev_ioctl,
.open = so2_cdev_open,
.release = so2_cdev_release,
.read = so2_cdev_read,
.write = so2_cdev_write,
.unlocked_ioctl = so2_cdev_ioctl,
};
static int so2_cdev_init(void)
{
int err;
int i;
/* TODO 1: register char device region for MY_MAJOR and NUM_MINORS starting at MY_MINOR */
err = register_chrdev_region(MKDEV(MY_MAJOR,MY_MINOR),NUM_MINORS,MODULE_NAME);
if(err != 0){
pr_info("register_chrdev_region");
return err;
}
for (i = 0; i < NUM_MINORS; i++) {
#ifdef EXTRA
/* TODO 7: extra tasks, for home */
devs[i].size = 0;
memset(devs[i].buffer,0,sizeof(devs[i].buffer));
#else
/*TODO 4: initialize buffer with MESSAGE string */
memcpy(devs[i].buffer,MESSAGE,sizeof(MESSAGE));
devs[i].size = sizeof(MESSAGE);
#endif
/* TODO 7: extra tasks for home */
init_waitqueue_head(&devs[i].wq);
/* TODO 3: set access variable to 0, use atomic_set */
atomic_set(&devs[i].access,0);
/* TODO 2: init and add cdev to kernel core */
cdev_init(&devs[i].cdev,&so2_fops);
cdev_add(&devs[i].cdev,MKDEV(MY_MAJOR,i),1);
}
return 0;
}
static void so2_cdev_exit(void)
{
int i;
for (i = 0; i < NUM_MINORS; i++) {
/* TODO 2: delete cdev from kernel core */
cdev_del(&devs[i].cdev);
}
/* TODO 1: unregister char device region, for MY_MAJOR and NUM_MINORS starting at MY_MINOR */
unregister_chrdev_region(MKDEV(MY_MAJOR,MY_MINOR),NUM_MINORS);
}
module_init(so2_cdev_init);
module_exit(so2_cdev_exit);
在qemu虚拟机中进行简单地测试
root@qemux86:~/skels/device_drivers/kernel# lsmod
Tainted: G
root@qemux86:~/skels/device_drivers/kernel# ls
Kbuild so2_cdev.c so2_cdev.mod.c so2_cdev.o
modules.order so2_cdev.ko so2_cdev.mod.o
root@qemux86:~/skels/device_drivers/kernel# insmod so2_cdev.ko
root@qemux86:~/skels/device_drivers/kernel# lsmod
Tainted: G
so2_cdev 20480 0 - Live 0xc8853000 (O)
root@qemux86:~/skels/device_drivers/kernel# cat /proc/devices
Character devices:
1 mem
2 pty
3 ttyp
4 /dev/vc/0
4 tty
5 /dev/tty
5 /dev/console
5 /dev/ptmx
7 vcs
10 misc
13 input
42 so2_cdev
128 ptm
136 pts
229 hvc
253 virtio-portsdev
254 bsg
Block devices:
7 loop
254 virtblk
259 blkext
root@qemux86:~/skels/device_drivers/kernel# cat /dev/so2_cdev
hello
root@qemux86:~/skels/device_drivers/kernel# echo “sky” > /dev/so2_cdev
root@qemux86:~/skels/device_drivers/kernel# cat /dev/so2_cdev
sky