设备号
在fs/char_dev.c中,有255个char_device_struct元素的数组来管理设备号,主设备号取余等于这个数组的index;相同index的字符设备通过char_device_struct链表连接,也就是chrdevs[1]这个表头,链接了主设备号m; m=1+255*n,n>=0
#define CHRDEV_MAJOR_HASH_SIZE 255
static struct char_device_struct {
struct char_device_struct *next;
unsigned int major;
unsigned int baseminor;
int minorct;
char name[64];
struct cdev *cdev; /* will die */
} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
/* index in the above */
static inline int major_to_index(unsigned major)
{
return major % CHRDEV_MAJOR_HASH_SIZE;
}
这个是动态申请设备号的接口,分配一次就会占用一个或多个设备号
**
* alloc_chrdev_region() - register a range of char device numbers
* @dev: output parameter for first assigned number
* @baseminor: first of the requested range of minor numbers
* @count: the number of minor numbers required
* @name: the name of the associated device or driver
*
* Allocates a range of char device numbers. The major number will be
* chosen dynamically, and returned (along with the first minor number)
* in @dev. Returns zero or a negative error code.
*/
int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
const char *name)
{
struct char_device_struct *cd;
cd = __register_chrdev_region(0, baseminor, count, name);
if (IS_ERR(cd))
return PTR_ERR(cd);
*dev = MKDEV(cd->major, cd->baseminor);
return 0;
}
这个是静态分配,可能需要多次分配;如果跨主设备号了,则需要多次执行__register_chrdev_region
/**
* register_chrdev_region() - register a range of device numbers
* @from: the first in the desired range of device numbers; must include
* the major number.
* @count: the number of consecutive device numbers required
* @name: the name of the device or driver.
*
* Return value is zero on success, a negative error code on failure.
*/
int register_chrdev_region(dev_t from, unsigned count, const char *name)
{
struct char_device_struct *cd;
dev_t to = from + count;
dev_t n, next;
for (n = from; n < to; n = next) {
next = MKDEV(MAJOR(n)+1, 0);
if (next > to)
next = to;
cd = __register_chrdev_region(MAJOR(n), MINOR(n),
next - n, name);
if (IS_ERR(cd))
goto fail;
}
return 0;
fail:
to = n;
for (n = from; n < to; n = next) {
next = MKDEV(MAJOR(n)+1, 0);
kfree(__unregister_chrdev_region(MAJOR(n), MINOR(n), next - n));
}
return PTR_ERR(cd);
}
申请一个char_device_struct,并将其插入到chrdevs全局数组对应的index元素的链表中的合适位置,这个"合适位置"见下面代码分析,其实就是判断要注册的设备号范围是否覆盖已有的已注册的设备号范围
/*
* Register a single major with a specified minor range.
*
* If major == 0 this function will dynamically allocate an unused major.
* If major > 0 this function will attempt to reserve the range of minors
* with given major.
*
*/
static struct char_device_struct *
__register_chrdev_region(unsigned int major, unsigned int baseminor,
int minorct, const char *name)
{
struct char_device_struct *cd, *curr, *prev = NULL;
int ret;
int i;
if (major >= CHRDEV_MAJOR_MAX) {
pr_err("CHRDEV \"%s\" major requested (%u) is greater than the maximum (%u)\n",
name, major, CHRDEV_MAJOR_MAX-1);
return ERR_PTR(-EINVAL);
}
if (minorct > MINORMASK + 1 - baseminor) {
pr_err("CHRDEV \"%s\" minor range requested (%u-%u) is out of range of maximum range (%u-%u) for a single major\n",
name, baseminor, baseminor + minorct - 1, 0, MINORMASK);
return ERR_PTR(-EINVAL);
}
cd = kzalloc(sizeof(struct char_device_struct), GFP_KERNEL);
if (cd == NULL)
return ERR_PTR(-ENOMEM);
mutex_lock(&chrdevs_lock);
if (major == 0) {
ret = find_dynamic_major();
if (ret < 0) {
pr_err("CHRDEV \"%s\" dynamic allocation region is full\n",
name);
goto out;
}
major = ret;
}
ret = -EBUSY;
i = major_to_index(major);
for (curr = chrdevs[i]; curr; prev = curr, curr = curr->next) {
if (curr->major < major) //1.如果当前链表元素的主设备号小于注册的主设备号,则继续分析下一个链表元素
continue;
if (curr->major > major) //2.如果大于,则退出去了,因为没有等于的,就不用判断这个主设备号的范围是否会覆盖了
break;
if (curr->baseminor + curr->minorct <= baseminor) //3.如果主设备号相等,且当前从设备号占的范围,不比要注册的从设备启示地址大,还得看下后面的元素是否都满足这个情况,如果都满足,可以注册
continue;
if (curr->baseminor >= baseminor + minorct) //4.如果不满足3,这个当前链表元素的起始从设备号,不小于要注册的从设备号范围,那么可以注册;如果到最后一个元素都小于,就失败
break;
goto out;
}
cd->major = major;
cd->baseminor = baseminor;
cd->minorct = minorct;
strlcpy(cd->name, name, sizeof(cd->name));
if (!prev) {
cd->next = curr;
chrdevs[i] = cd;
} else {
cd->next = prev->next;
prev->next = cd;
}
mutex_unlock(&chrdevs_lock);
return cd;
out:
mutex_unlock(&chrdevs_lock);
kfree(cd);
return ERR_PTR(ret);
}
移除设备号都是使用的这个接口,从chrdevs中找到对应的index元素;并从这个index的链表中,把这个设备移除
static struct char_device_struct *
__unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct)
{
struct char_device_struct *cd = NULL, **cp;
int i = major_to_index(major);
mutex_lock(&chrdevs_lock);
for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
if ((*cp)->major == major &&
(*cp)->baseminor == baseminor &&
(*cp)->minorct == minorct)
break;
if (*cp) {
cd = *cp;
*cp = cd->next;
}
mutex_unlock(&chrdevs_lock);
return cd;
}
字符设备
两种方式初始化cdev,cdev_alloc只动态分配一个cdev,并初始化cdev的kobj,需在外面给ops赋值;静态的cdev_init直接初始化cdev的kobj和ops
struct cdev *cdev_alloc(void)
{
struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL);
if (p) {
INIT_LIST_HEAD(&p->list);
kobject_init(&p->kobj, &ktype_cdev_dynamic);
}
return p;
}
/**
* cdev_init() - initialize a cdev structure
* @cdev: the structure to initialize
* @fops: the file_operations for this device
*
* Initializes @cdev, remembering @fops, making it ready to add to the
* system with cdev_add().
*/
void cdev_init(struct cdev *cdev, const struct file_operations *fops)
{
memset(cdev, 0, sizeof *cdev);
INIT_LIST_HEAD(&cdev->list);
kobject_init(&cdev->kobj, &ktype_cdev_default);
cdev->ops = fops;
}
添加cdev,跟设备号的管理机制基本一致,也是是255个kobj_map元素的probes数组,来映射管理cdev跟dev_t
static struct kobject *exact_match(dev_t dev, int *part, void *data)
{
struct cdev *p = data;
return &p->kobj;
}
static int exact_lock(dev_t dev, void *data)
{
struct cdev *p = data;
return cdev_get(p) ? 0 : -1;
}
int cdev_add(struct cdev *p, dev_t dev, unsigned count)
{
int error;
p->dev = dev;
p->count = count;
error = kobj_map(cdev_map, dev, count, NULL,
exact_match, exact_lock, p);
if (error)
return error;
kobject_get(p->kobj.parent);
return 0;
}
static struct kobj_map *cdev_map;
void __init chrdev_init(void)
{
cdev_map = kobj_map_init(base_probe, &chrdevs_lock);
}
struct kobj_map {
struct probe {
struct probe *next;
dev_t dev;
unsigned long range;
struct module *owner;
kobj_probe_t *get;
int (*lock)(dev_t, void *);
void *data;
} *probes[255];
struct mutex *lock;
};
注册字符设备
一个完整的注册字符设备的API,register_chrdev就是调用的这个,就是3步:1.申请设备号;2.初始化cdev的kobj及其操作函数;3.添加cdev映射
int __register_chrdev(unsigned int major, unsigned int baseminor,
unsigned int count, const char *name,
const struct file_operations *fops)
{
struct char_device_struct *cd;
struct cdev *cdev;
int err = -ENOMEM;
cd = __register_chrdev_region(major, baseminor, count, name);
if (IS_ERR(cd))
return PTR_ERR(cd);
cdev = cdev_alloc();
if (!cdev)
goto out2;
cdev->owner = fops->owner;
cdev->ops = fops;
kobject_set_name(&cdev->kobj, "%s", name);
err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
if (err)
goto out;
cd->cdev = cdev;
return major ? 0 : cd->major;
out:
kobject_put(&cdev->kobj);
out2:
kfree(__unregister_chrdev_region(cd->major, baseminor, count));
return err;
}
创建字符设备文件节点
对于2.4的内核,是只能在上层mknod创建;对于2.6版本的内核 使用__register_chrdev申请了设备号和跟cdev建立联系后 ;udev守护进程匹配到相应规则后,由udev触发执行mknod系统调用,来创建设备文件/dev/xxx和所对应的inode,并将默认的ops赋给inode的i_ops,将设备号保存在inode的i_rdev中
mknod
do_mknodat
vfs_mknod
shmem_mknod
shmem_get_inode
init_special_inode
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
inode->i_mode = mode;
if (S_ISCHR(mode)) {
inode->i_fop = &def_chr_fops;
inode->i_rdev = rdev;
} else if (S_ISBLK(mode)) {
inode->i_fop = &def_blk_fops;
inode->i_rdev = rdev;
} else if (S_ISFIFO(mode))
inode->i_fop = &pipefifo_fops;
else if (S_ISSOCK(mode))
; /* leave it no_open_fops */
else
printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
" inode %s:%lu\n", mode, inode->i_sb->s_id,
inode->i_ino);
}
EXPORT_SYMBOL(init_special_inode);
const struct file_operations def_chr_fops = {
.open = chrdev_open,
.llseek = noop_llseek,
};
高版本内核通过class_create/device_create通过devtmpfs直接创建的了,当然mknod也还是可以用的,只是udev的工作不再会去主动调用mknod,去管理创建和删除设备节点文件了,而是做其他的工作
device_create
device_add
devtmpfs_create_node
wake_up_process(thread)
thread = kthread_run(devtmpfsd, &err, "kdevtmpfs");
devtmpfsd
handle_create
vfs_mknod
打开字符设备
open系统调用打开字符设备文件时,会调用do_dentry_open,里面的f->f_op = fops_get(inode->i_fop);就是上面init_special_inode里的inode->i_fop = &def_chr_fops;其中的open
open
do_sys_open
do_sys_openat2
do_filp_open
path_openat
vfs_open
do_dentry_open
static int do_dentry_open(struct file *f,
struct inode *inode,
int (*open)(struct inode *, struct file *))
{
static const struct file_operations empty_fops = {};
int error;
path_get(&f->f_path);
f->f_inode = inode;
f->f_mapping = inode->i_mapping;
/* Ensure that we skip any errors that predate opening of the file */
f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
if (unlikely(f->f_flags & O_PATH)) {
f->f_mode = FMODE_PATH | FMODE_OPENED;
f->f_op = &empty_fops;
return 0;
}
/* Any file opened for execve()/uselib() has to be a regular file. */
if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) {
error = -EACCES;
goto cleanup_file;
}
if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
error = get_write_access(inode);
if (unlikely(error))
goto cleanup_file;
error = __mnt_want_write(f->f_path.mnt);
if (unlikely(error)) {
put_write_access(inode);
goto cleanup_file;
}
f->f_mode |= FMODE_WRITER;
}
/* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
f->f_mode |= FMODE_ATOMIC_POS;
f->f_op = fops_get(inode->i_fop);
if (WARN_ON(!f->f_op)) {
error = -ENODEV;
goto cleanup_all;
}
error = security_file_open(f);
if (error)
goto cleanup_all;
error = break_lease(locks_inode(f), f->f_flags);
if (error)
goto cleanup_all;
/* normally all 3 are set; ->open() can clear them if needed */
f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
if (!open)
open = f->f_op->open;
if (open) {
error = open(inode, f);
if (error)
goto cleanup_all;
}
f->f_mode |= FMODE_OPENED;
if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(inode);
if ((f->f_mode & FMODE_READ) &&
likely(f->f_op->read || f->f_op->read_iter))
f->f_mode |= FMODE_CAN_READ;
if ((f->f_mode & FMODE_WRITE) &&
likely(f->f_op->write || f->f_op->write_iter))
f->f_mode |= FMODE_CAN_WRITE;
f->f_write_hint = WRITE_LIFE_NOT_SET;
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
/* NB: we're sure to have correct a_ops only after f_op->open */
if (f->f_flags & O_DIRECT) {
if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
return -EINVAL;
}
/*
* XXX: Huge page cache doesn't support writing yet. Drop all page
* cache for this file before processing writes.
*/
if ((f->f_mode & FMODE_WRITE) && filemap_nr_thps(inode->i_mapping))
truncate_pagecache(inode, 0);
return 0;
cleanup_all:
if (WARN_ON_ONCE(error > 0))
error = -EINVAL;
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITER) {
put_write_access(inode);
__mnt_drop_write(f->f_path.mnt);
}
cleanup_file:
path_put(&f->f_path);
f->f_path.mnt = NULL;
f->f_path.dentry = NULL;
f->f_inode = NULL;
return error;
}
打开字符设备的时候,从cdev_map根据inode->i_rdev,这个是设备号,来找到对应的kobj,以解析出对应设备的cdev和具体ops,并把驱动实现的具体ops(open,read,write等),替换filp原本通用的ops;spidev.c和i2c-dev.c都是字符设备驱动最好的例子
static int chrdev_open(struct inode *inode, struct file *filp)
{
const struct file_operations *fops;
struct cdev *p;
struct cdev *new = NULL;
int ret = 0;
spin_lock(&cdev_lock);
p = inode->i_cdev;
if (!p) {
struct kobject *kobj;
int idx;
spin_unlock(&cdev_lock);
kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
if (!kobj)
return -ENXIO;
new = container_of(kobj, struct cdev, kobj);
spin_lock(&cdev_lock);
/* Check i_cdev again in case somebody beat us to it while
we dropped the lock. */
p = inode->i_cdev;
if (!p) {
inode->i_cdev = p = new;
list_add(&inode->i_devices, &p->list);
new = NULL;
} else if (!cdev_get(p))
ret = -ENXIO;
} else if (!cdev_get(p))
ret = -ENXIO;
spin_unlock(&cdev_lock);
cdev_put(new);
if (ret)
return ret;
ret = -ENXIO;
fops = fops_get(p->ops);
if (!fops)
goto out_cdev_put;
replace_fops(filp, fops);
if (filp->f_op->open) {
ret = filp->f_op->open(inode, filp);
if (ret)
goto out_cdev_put;
}
return 0;
out_cdev_put:
cdev_put(p);
return ret;
}