linux 2.6.28 kernel之module源码解析

xx-xx-xxx-xxx

于 2024-09-17 09:00:45 发布

阅读量1.1k

点赞数 10

分类专栏： linuxKernel 文章标签： linux 运维服务器

本文链接：https://blog.csdn.net/wenfengliaoshuzhai/article/details/142310838

版权

linuxKernel 专栏收录该内容

30 篇文章

订阅专栏

linux/module.h

enum module_state {

MODULE_STATE_LIVE,  //模块当前正常使用中（存活状态） 0  

MODULE_STATE_COMING, //模块当前正在被加载  1   

MODULE_STATE_GOING,  //模块当前正在被卸载  2

};

struct module
{
enum module_state state;//模块状态
/* Member of list of modules */
struct list_head list;
//list是作为一个列表的成员，所有的内核模块都被维护在一个全局链表中，链表头是一个全局变量struct
// module modules。任何一个新创建的模块，都会被加入到这个链表的头部
/ Unique handle for this module /
char name[MODULE_NAME_LEN];//模块句柄模块的名字
/ Sysfs stuff. */
struct module_kobject mkobj;
struct module_attribute *modinfo_attrs;
const char *version;
const char *srcversion;
struct kobject holders_dir;
/ Exported symbols */
const struct kernel_symbol *syms;//导出的符号
const unsigned long crcs;
unsigned int num_syms; //符号数量
/ GPL-only exported symbols. */
unsigned int num_gpl_syms;
const struct kernel_symbol *gpl_syms;
const unsigned long gpl_crcs;
#ifdef CONFIG_UNUSED_SYMBOLS
/ unused exported symbols. */
const struct kernel_symbol *unused_syms;
const unsigned long unused_crcs;
unsigned int num_unused_syms;
/ GPL-only, unused exported symbols. */
unsigned int num_unused_gpl_syms;
const struct kernel_symbol *unused_gpl_syms;
const unsigned long unused_gpl_crcs;
#endif
/ symbols that will be GPL-only in the near future. */
const struct kernel_symbol *gpl_future_syms;
const unsigned long gpl_future_crcs;
unsigned int num_gpl_future_syms;
/ Exception table */
unsigned int num_exentries;
struct exception_table_entry extable;
/ Startup function. */
int (init)(void); //模块初始化函数模块入口函数
/ If this is non-NULL, vfree after init() returns */
void module_init;//???
/ Here is the actual code + data, vfree’d on unload. */
void module_core;
/ Here are the sizes of the init and core sections /
unsigned int init_size, core_size;
/ The size of the executable code in each section. /
unsigned int init_text_size, core_text_size;
/ The handle returned from unwind_add_table. */
void unwind_info;
/ Arch-specific module values /
struct mod_arch_specific arch;//体系结构
unsigned int taints; / same bits as kernel:tainted /
#ifdef CONFIG_GENERIC_BUG
/ Support for BUG */
unsigned num_bugs;
struct list_head bug_list;
struct bug_entry bug_table;
#endif
#ifdef CONFIG_KALLSYMS
/ We keep the symbol and string tables for kallsyms. */
Elf_Sym *symtab;
unsigned int num_symtab;
char strtab;
/ Section attributes */
struct module_sect_attrs sect_attrs;
/ Notes attributes */
struct module_notes_attrs notes_attrs;
#endif
/ Per-cpu data. */
void percpu;//针对每个CPU的数据
/ The command line arguments (may be mangled). People like
keeping pointers to this stuff */
char *args; //命令行参数
#ifdef CONFIG_MARKERS
struct marker *markers;
unsigned int num_markers;
#endif
#ifdef CONFIG_TRACEPOINTS
struct tracepoint tracepoints;
unsigned int num_tracepoints;
#endif
#ifdef CONFIG_MODULE_UNLOAD
/ What modules depend on me? /
struct list_head modules_which_use_me;//这个模块所依赖的模块链表
/ Who is waiting for us to be unloaded */
struct task_struct waiter; //正在等待这么模块被卸载的任务
/ Destruction function. */
void (exit)(void); //模块出口函数
/ Reference counts */
struct module_ref ref[NR_CPUS];//引用了一个计数
#endif
};

include/linux/init.h

/* initcalls are now grouped by functionality into separate

subsections. Ordering inside the subsections is determined
by link order.
For backwards compatibility, initcall() puts the call in
the device init subsection.
The `id’ arg to __define_initcall() is needed so that multiple initcalls
can point at the same handler without causing duplicate-symbol build errors.
*/

#define __define_initcall(level,fn,id)
static initcall_t _initcall##fn##id __used
attribute((section(“.initcall” level “.init”))) = fn
///
#define __initcall(fn) device_initcall(fn)
/**

module_init() - driver initialization entry point
@x: function to be run at kernel boot time or module insertion
module_init() will either be called during do_initcalls() (if
builtin) or at module insertion time (if a module). There can only
be one per module.
/
/include/linux/init.h
#define module_init(x) __initcall(x);
#define device_initcall(fn) __define_initcall(“6”,fn,6)
可以发现这些_initcall(fn)最终都是通过__define_initcall(level,fn)宏定义生成的。//这个版本少了id项
__define_initcall宏定义如下：
#define __define_initcall(level,fn)
static initcall_t _initcall##fn attribute_used
attribute((section(“.initcall” level “.init”))) = fn

这句话的意思为定义一个initcall_t型的初始化函数，函数存放在.initcall”level”.init section内。.initcall”level”.init section定义在vmlinux.lds内。
/* arch/arm/kernel/vmlinux.lds */
__initcall_start = .;
*(.initcallearly.init) __early_initcall_end = .; *(.initcall0.init) *(.initcall0s.init) *(.initcall1.init) *(.initcall1s.init) *(.initcall2.init) *(.initcall2s.init) *(.initcall3.init) *(.initcall3s.init) *(.initcall4.init) *(.initcall4s.init) *(.initcall5.init) *(.initcall5s.init) *(.initcallrootfs.init) *(.initcall6.init) *(.initcall6s.init) *(.initcall7.init) *(.initcall7s.init)
__initcall_end = .;

正好包括了上面init.h里定义的从core_initcall到late_initcall等7个level等级的.initcall”level”.init section. 因此通过不同的*_initcall声明的函数指针最终都会存放不同level等级的.initcall”level”.init section内。这些不同level的section按level等级高低依次存放。

下面我们再来看看，内核是什么时候调用存储在.initcall”level”.init section内的函数的。

内核是通过do_initcalls函数循环调用执行initcall.init section内的函数的，流程如下：
main.c
start_kernel -> rest_init -> kernel_thread -> kernel_init -> do_basic_setup -> do_initcalls

init/main.c
extern initcall_t __initcall_start[], __initcall_end[], __early_initcall_end[];

static void __init do_initcalls(void)
{
initcall_t *call;

for (call = __early_initcall_end; call < __initcall_end; call++)
    do_one_initcall(*call);  //回调函数
                                              //kernel/module.c    中
                                              //SYSCALL_DEFINE3(init_module, void __user *, umod,
                                             //       unsigned long, len, const char __user *, uargs)也调用了do_one_initcall

 /* Make sure there is no pending stuff from the initcall sequence */
flush_scheduled_work();

}
/
/* Search for module by name: must hold module_mutex. */
static struct module *find_module(const char *name)
{
struct module *mod;

list_for_each_entry(mod, &modules, list) {
    if (strcmp(mod->name, name) == 0)
        return mod;
}
return NULL;

}
///
static int percpu_modinit(void)
{
pcpu_num_used = 2;
pcpu_num_allocated = 2;
pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
GFP_KERNEL);
/* Static in-kernel percpu data (used). /
pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
/ Free room. */
pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
if (pcpu_size[1] < 0) {
printk(KERN_ERR “No per-cpu room for modules.\n”);
pcpu_num_used = 1;
}

return 0;

}
__initcall(percpu_modinit);

模块加载由内核的系统调用init_module完成。

linux3.5.2/kernel/module.c:3009

/* This is where the real work happens */

SYSCALL_DEFINE3(init_module, void __user *, umod,

   unsigned long, len, const char __user *, uargs)

{

struct module *mod;

int ret = 0;

……

/* Do all the hard work */

mod = load_module(umod, len, uargs);//模块加载

……

/* Start the module */

if (mod->init != NULL)

   ret = do_one_initcall(mod->init);//模块init函数调用

……

return 0;

}
模块加载

系统调用init_module由SYSCALL_DEFINE3(init_module…)实现，其中有两个关键的函数调用。load_module用于模块加载，do_one_initcall用于回调模块的init函数。

/* This is where the real work happens */
SYSCALL_DEFINE3(init_module, void __user *, umod,
unsigned long, len, const char __user *, uargs)
{
struct module *mod;
int ret = 0;

/* Must have permission */
if (!capable(CAP_SYS_MODULE))
    return -EPERM;

/* Only one module load at a time, please */
if (mutex_lock_interruptible(&module_mutex) != 0)
    return -EINTR;

/* Do all the hard work */
mod = load_module(umod, len, uargs);
if (IS_ERR(mod)) {
    mutex_unlock(&module_mutex);
    return PTR_ERR(mod);
}

/* Drop lock so they can recurse */
mutex_unlock(&module_mutex);

blocking_notifier_call_chain(&module_notify_list,
        MODULE_STATE_COMING, mod);

/* Start the module */
if (mod->init != NULL)
    ret = do_one_initcall(mod->init);//do_initcalls里也调用了 do_one_initcall

//此处是没编译进内核时的模块加载？do_initcalls里也调用了 do_one_initcall是编译进内核的模块自动加载？
if (ret < 0) {
/* Init routine failed: abort. Try to protect us from
buggy refcounters. */
mod->state = MODULE_STATE_GOING;
synchronize_sched();
module_put(mod);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
mutex_lock(&module_mutex);
free_module(mod);
mutex_unlock(&module_mutex);
wake_up(&module_wq);
return ret;
}
if (ret > 0) {
printk(KERN_WARNING "%s: ‘%s’->init suspiciously returned %d, "
“it should follow 0/-E convention\n”
KERN_WARNING “%s: loading module anyway…\n”,
func, mod->name, ret,
func);
dump_stack();
}

/* Now it's a first class citizen!  Wake up anyone waiting for it. */
mod->state = MODULE_STATE_LIVE;
wake_up(&module_wq);

mutex_lock(&module_mutex);
/* Drop initial reference. */
module_put(mod);
unwind_remove_table(mod->unwind_info, 1);
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
mod->init_text_size = 0;
mutex_unlock(&module_mutex);

return 0;

}
//

模块卸载由内核的系统调用delete_module完成。
SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
unsigned int, flags)
{
struct module *mod;
char name[MODULE_NAME_LEN];
int ret, forced = 0;

if (!capable(CAP_SYS_MODULE))
    return -EPERM;

if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0)
    return -EFAULT;
name[MODULE_NAME_LEN-1] = '\0';

if (mutex_lock_interruptible(&module_mutex) != 0)
    return -EINTR;

mod = find_module(name);
if (!mod) {
    ret = -ENOENT;
    goto out;
}

if (!list_empty(&mod->modules_which_use_me)) {
    /* Other modules depend on us: get rid of them first. */
    ret = -EWOULDBLOCK;
    goto out;
}

/* Doing init or already dying? */
if (mod->state != MODULE_STATE_LIVE) {
    /* FIXME: if (force), slam module count and wake up
               waiter --RR */
    DEBUGP("%s already dying\n", mod->name);
    ret = -EBUSY;
    goto out;
}

/* If it has an init func, it must have an exit func to unload */
if (mod->init && !mod->exit) {
    forced = try_force_unload(flags);
    if (!forced) {
        /* This module can't be removed */
        ret = -EBUSY;
        goto out;
    }
}

/* Set this up before setting mod->state */
mod->waiter = current;

/* Stop the machine so refcounts can't move and disable module. */
ret = try_stop_module(mod, flags, &forced);
if (ret != 0)
    goto out;

/* Never wait if forced. */
if (!forced && module_refcount(mod) != 0)
    wait_for_zero_refcount(mod);

mutex_unlock(&module_mutex);
/* Final destruction now noone is using it. */
if (mod->exit != NULL)
    mod->exit();//调用exit module
blocking_notifier_call_chain(&module_notify_list,
                 MODULE_STATE_GOING, mod);
mutex_lock(&module_mutex);
/* Store the name of the last unloaded module for diagnostic purposes */
strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
unregister_dynamic_debug_module(mod->name);
free_module(mod);//卸载模块

out:
mutex_unlock(&module_mutex);
return ret;
}

函数在内核中的位置：linux-2.6.30/kernel/module.c

该函数的功能是将一个特定模块module的引用计数减1 ，这样当一个模块的引用计数因为不为0而不能从内核中卸载时，可以调用此函数一次或多次，实现对模块计数的清零，从而实现模块卸载。
void module_put(struct module module)
{
if (module) {
unsigned int cpu = get_cpu();
local_dec(&module->ref[cpu].count);
/ Maybe they’re waiting for us to drop reference? /
if (unlikely(!module_is_live(module)))
wake_up_process(module->waiter);
put_cpu();
}
}
//
/
modprobe_path is set via /proc/sys.
*/
char modprobe_path[KMOD_PATH_LEN] = “/sbin/modprobe”;

/**

request_module - try to load a kernel module
@fmt: printf style format string for the name of the module
@varargs: arguements as specified in the format string
Load a module using the user mode module loader. The function returns
zero on success or a negative errno code on failure. Note that a
successful module load does not mean the module did not then unload
and exit on an error of its own. Callers must check that the service
they requested is now available not blindly invoke it.
If module auto-loading support is disabled then this function
becomes a no-operation.
*/
kernel/kmod.c
int request_module(const char *fmt, …)
{
va_list args;
char module_name[MODULE_NAME_LEN];
unsigned int max_modprobes;
int ret;
char *argv[] = { modprobe_path, “-q”, “–”, module_name, NULL };
static char envp[] = { “HOME=/”,
“TERM=linux”,
“PATH=/sbin:/usr/sbin:/bin:/usr/bin”,
NULL };
static atomic_t kmod_concurrent = ATOMIC_INIT(0);
#define MAX_KMOD_CONCURRENT 50 / Completely arbitrary value - KAO */
static int kmod_loop_msg;

va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
va_end(args);
if (ret >= MODULE_NAME_LEN)
return -ENAMETOOLONG;

/* If modprobe needs a service that is in a module, we get a recursive

loop. Limit the number of running kmod threads to max_threads/2 or
MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method
would be to run the parents of this process, counting how many times
kmod was invoked. That would mean accessing the internals of the
process tables to get the command line, proc_pid_cmdline is static
and it is not worth changing the proc code just to handle this case.
KAO.
“trace the ppid” is simple, but will fail if someone’s
parent exits. I think this is as good as it gets. --RR
/
max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
atomic_inc(&kmod_concurrent);
if (atomic_read(&kmod_concurrent) > max_modprobes) {
/ We may be blaming an innocent here, but unlikely */
if (kmod_loop_msg++ < 5)
printk(KERN_ERR
“request_module: runaway loop modprobe %s\n”,
module_name);
atomic_dec(&kmod_concurrent);
return -ENOMEM;
}

ret = call_usermodehelper(modprobe_path, argv, envp, 1);
atomic_dec(&kmod_concurrent);
return ret;
}

Linux把内核也看作一个模块。那么模块与模块之间如何进行交互呢，一种常用的方法就是共享变量和函数。但并不是模块中的每个变量和函数都能被共享，内核只把各个模块中主要的变量和函数放在一个特定的区段，这些变量和函数就统称为符号。到低哪些符号可以被共享？ Linux内核有自己的规定。例如 EXPORT_SYMBOL_GPL(sysfs_create_link); 在fs/sysfs/symlink.c
其中宏定义EXPORT_SYMBOL（）本身的含义是“移出符号”。为什么说是“移出”呢？因为这些符号本来是内核内部的符号，通过这个宏放在一个公开的地方，使得装入到内核中的其他模块可以引用它们。
实际上，仅仅知道这些符号的名字是不够的，还得知道它们在内核映像中的地址才有意义。因此，内核中定义了如下结构来描述模块的符号：
struct module_symbol

{

    unsigned long value; ／*符号在内核映像中的地址*/

    const char *name;   /*指向符号名的指针*/

};

从后面对EXPORT_SYMBOL宏的定义可以看出，连接程序（ld）在连接内核映像时将这个结构存放在一个叫做“__ksymtab”的区段中，而这个区段中所有的符号就组成了模块对外“移出”的符号表，这些符号可供内核及已安装的模块来引用。而其他“对内”的符号则由连接程序自行生成，并仅供内部使用。
与EXPORT_SYMBOL相关的定义在include/linux/module.h中：
#define __MODULE_STRING_1(x) #x
#define __MODULE_STRING(x) __MODULE_STRING_1(x)
#define __EXPORT_SYMBOL(sym, str) \

const char _kstrtab##sym[] \

attribute((section(“.kstrtab”))) = str; \

const struct module_symbol _ksymtab##sym \

attribute((section(“__ksymtab”))) = \

{ (unsigned long)&sym, _kstrtab##sym }

#if defined(MODVERSIONS) || !defined(CONFIG_MODVERSIONS)

#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var))

下面我们以EXPORT_SYMBOL(schedule)为例，来看一下这个宏的结果是什么。首先EXPORT_SYMBOL(schedule)的定义成了__EXPORT_SYMBOL(schedule, “schedule”)。而__EXPORT_SYMBOL（）定义了两个语句，第一个语句定义了一个名为__kstrtab_ schedule的字符串，将字符串的内容初始化为“schedule”，并将其置于内核映像中的.kstrtab区段，注意这是一个专门存放符号名字符串的区段。第二个语句则定义了一个名为__kstrtab_ schedule的module_symbol结构，将其初始化为｛＆schedule，_kstrtab schedule｝结构，并将其置于内核映像中的__ksymtab区段。这样，module_symbol结构中的域value的值就为schedule在内核映像中的地址，而指针name则指向字符串“schedule”。