linux 2.6.28 kernel之module源码解析

linux/module.h

enum module_state {

MODULE_STATE_LIVE,  //模块当前正常使用中(存活状态) 0  

MODULE_STATE_COMING, //模块当前正在被加载  1   

MODULE_STATE_GOING,  //模块当前正在被卸载  2  

};

struct module
{
enum module_state state;//模块状态
/* Member of list of modules */
struct list_head list;
//list是作为一个列表的成员,所有的内核模块都被维护在一个全局链表中,链表头是一个全局变量struct
// module modules。任何一个新创建的模块,都会被加入到这个链表的头部
/
Unique handle for this module /
char name[MODULE_NAME_LEN];//模块句柄 模块的名字
/
Sysfs stuff. */
struct module_kobject mkobj;
struct module_attribute *modinfo_attrs;
const char *version;
const char *srcversion;
struct kobject holders_dir;
/
Exported symbols */
const struct kernel_symbol *syms;//导出的符号
const unsigned long crcs;
unsigned int num_syms; //符号数量
/
GPL-only exported symbols. */
unsigned int num_gpl_syms;
const struct kernel_symbol *gpl_syms;
const unsigned long gpl_crcs;
#ifdef CONFIG_UNUSED_SYMBOLS
/
unused exported symbols. */
const struct kernel_symbol *unused_syms;
const unsigned long unused_crcs;
unsigned int num_unused_syms;
/
GPL-only, unused exported symbols. */
unsigned int num_unused_gpl_syms;
const struct kernel_symbol *unused_gpl_syms;
const unsigned long unused_gpl_crcs;
#endif
/
symbols that will be GPL-only in the near future. */
const struct kernel_symbol *gpl_future_syms;
const unsigned long gpl_future_crcs;
unsigned int num_gpl_future_syms;
/
Exception table */
unsigned int num_exentries;
struct exception_table_entry extable;
/
Startup function. */
int (init)(void); //模块初始化函数 模块入口函数
/
If this is non-NULL, vfree after init() returns */
void module_init;//???
/
Here is the actual code + data, vfree’d on unload. */
void module_core;
/
Here are the sizes of the init and core sections /
unsigned int init_size, core_size;
/
The size of the executable code in each section. /
unsigned int init_text_size, core_text_size;
/
The handle returned from unwind_add_table. */
void unwind_info;
/
Arch-specific module values /
struct mod_arch_specific arch;//体系结构
unsigned int taints; /
same bits as kernel:tainted /
#ifdef CONFIG_GENERIC_BUG
/
Support for BUG */
unsigned num_bugs;
struct list_head bug_list;
struct bug_entry bug_table;
#endif
#ifdef CONFIG_KALLSYMS
/
We keep the symbol and string tables for kallsyms. */
Elf_Sym *symtab;
unsigned int num_symtab;
char strtab;
/
Section attributes */
struct module_sect_attrs sect_attrs;
/
Notes attributes */
struct module_notes_attrs notes_attrs;
#endif
/
Per-cpu data. */
void percpu;//针对每个CPU的数据
/
The command line arguments (may be mangled). People like
keeping pointers to this stuff */
char *args; //命令行参数
#ifdef CONFIG_MARKERS
struct marker *markers;
unsigned int num_markers;
#endif
#ifdef CONFIG_TRACEPOINTS
struct tracepoint tracepoints;
unsigned int num_tracepoints;
#endif
#ifdef CONFIG_MODULE_UNLOAD
/
What modules depend on me? /
struct list_head modules_which_use_me;//这个模块所依赖的模块链表
/
Who is waiting for us to be unloaded */
struct task_struct waiter; //正在等待这么模块被卸载的任务
/
Destruction function. */
void (exit)(void); //模块出口函数
/
Reference counts */
struct module_ref ref[NR_CPUS];//引用了一个计数
#endif
};

include/linux/init.h

/* initcalls are now grouped by functionality into separate

  • subsections. Ordering inside the subsections is determined
  • by link order.
  • For backwards compatibility, initcall() puts the call in
  • the device init subsection.
  • The `id’ arg to __define_initcall() is needed so that multiple initcalls
  • can point at the same handler without causing duplicate-symbol build errors.
    */

#define __define_initcall(level,fn,id)
static initcall_t _initcall##fn##id __used
attribute((section(“.initcall” level “.init”))) = fn
///
#define __initcall(fn) device_initcall(fn)
/**

  • module_init() - driver initialization entry point
  • @x: function to be run at kernel boot time or module insertion
  • module_init() will either be called during do_initcalls() (if
  • builtin) or at module insertion time (if a module). There can only
  • be one per module.
    /
    /include/linux/init.h
    #define module_init(x) __initcall(x);
    #define device_initcall(fn) __define_initcall(“6”,fn,6)
    可以发现这些
    _initcall(fn)最终都是通过__define_initcall(level,fn)宏定义生成的。//这个版本少了id项
    __define_initcall宏定义如下:
    #define __define_initcall(level,fn)
    static initcall_t _initcall##fn attribute_used
    attribute((section(“.initcall” level “.init”))) = fn

这句话的意思为定义一个initcall_t型的初始化函数,函数存放在.initcall”level”.init section内。.initcall”level”.init section定义在vmlinux.lds内。
/* arch/arm/kernel/vmlinux.lds */
__initcall_start = .;
*(.initcallearly.init) __early_initcall_end = .; *(.initcall0.init) *(.initcall0s.init) *(.initcall1.init) *(.initcall1s.init) *(.initcall2.init) *(.initcall2s.init) *(.initcall3.init) *(.initcall3s.init) *(.initcall4.init) *(.initcall4s.init) *(.initcall5.init) *(.initcall5s.init) *(.initcallrootfs.init) *(.initcall6.init) *(.initcall6s.init) *(.initcall7.init) *(.initcall7s.init)
__initcall_end = .;

正好包括了上面init.h里定义的从core_initcall到late_initcall等7个level等级的.initcall”level”.init section. 因此通过不同的*_initcall声明的函数指针最终都会存放不同level等级的.initcall”level”.init section内。这些不同level的section按level等级高低依次存放。

下面我们再来看看,内核是什么时候调用存储在.initcall”level”.init section内的函数的。

内核是通过do_initcalls函数循环调用执行initcall.init section内的函数的,流程如下:
main.c
start_kernel -> rest_init -> kernel_thread -> kernel_init -> do_basic_setup -> do_initcalls

init/main.c
extern initcall_t __initcall_start[], __initcall_end[], __early_initcall_end[];

static void __init do_initcalls(void)
{
initcall_t *call;

for (call = __early_initcall_end; call < __initcall_end; call++)
    do_one_initcall(*call);  //回调函数
                                              //kernel/module.c    中
                                              //SYSCALL_DEFINE3(init_module, void __user *, umod,
                                             //       unsigned long, len, const char __user *, uargs)也调用了do_one_initcall

 /* Make sure there is no pending stuff from the initcall sequence */
flush_scheduled_work();

}
/
/* Search for module by name: must hold module_mutex. */
static struct module *find_module(const char *name)
{
struct module *mod;

list_for_each_entry(mod, &modules, list) {
    if (strcmp(mod->name, name) == 0)
        return mod;
}
return NULL;

}
///
static int percpu_modinit(void)
{
pcpu_num_used = 2;
pcpu_num_allocated = 2;
pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
GFP_KERNEL);
/* Static in-kernel percpu data (used). /
pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
/
Free room. */
pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
if (pcpu_size[1] < 0) {
printk(KERN_ERR “No per-cpu room for modules.\n”);
pcpu_num_used = 1;
}

return 0;

}
__initcall(percpu_modinit);

模块加载由内核的系统调用init_module完成。

linux3.5.2/kernel/module.c:3009

/* This is where the real work happens */

SYSCALL_DEFINE3(init_module, void __user *, umod,

   unsigned long, len, const char __user *, uargs)

{

struct module *mod;

int ret = 0;

……

/* Do all the hard work */

mod = load_module(umod, len, uargs);//模块加载

……

/* Start the module */

if (mod->init != NULL)

   ret = do_one_initcall(mod->init);//模块init函数调用

……

return 0;

}
模块加载

系统调用init_module由SYSCALL_DEFINE3(init_module…)实现,其中有两个关键的函数调用。load_module用于模块加载,do_one_initcall用于回调模块的init函数。

/* This is where the real work happens */
SYSCALL_DEFINE3(init_module, void __user *, umod,
unsigned long, len, const char __user *, uargs)
{
struct module *mod;
int ret = 0;

/* Must have permission */
if (!capable(CAP_SYS_MODULE))
    return -EPERM;

/* Only one module load at a time, please */
if (mutex_lock_interruptible(&module_mutex) != 0)
    return -EINTR;

/* Do all the hard work */
mod = load_module(umod, len, uargs);
if (IS_ERR(mod)) {
    mutex_unlock(&module_mutex);
    return PTR_ERR(mod);
}

/* Drop lock so they can recurse */
mutex_unlock(&module_mutex);

blocking_notifier_call_chain(&module_notify_list,
        MODULE_STATE_COMING, mod);

/* Start the module */
if (mod->init != NULL)
    ret = do_one_initcall(mod->init);//do_initcalls里也调用了 do_one_initcall

//此处是没编译进内核时的模块加载?do_initcalls里也调用了 do_one_initcall是编译进内核的模块自动加载?
if (ret < 0) {
/* Init routine failed: abort. Try to protect us from
buggy refcounters. */
mod->state = MODULE_STATE_GOING;
synchronize_sched();
module_put(mod);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
mutex_lock(&module_mutex);
free_module(mod);
mutex_unlock(&module_mutex);
wake_up(&module_wq);
return ret;
}
if (ret > 0) {
printk(KERN_WARNING "%s: ‘%s’->init suspiciously returned %d, "
“it should follow 0/-E convention\n”
KERN_WARNING “%s: loading module anyway…\n”,
func, mod->name, ret,
func);
dump_stack();
}

/* Now it's a first class citizen!  Wake up anyone waiting for it. */
mod->state = MODULE_STATE_LIVE;
wake_up(&module_wq);

mutex_lock(&module_mutex);
/* Drop initial reference. */
module_put(mod);
unwind_remove_table(mod->unwind_info, 1);
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
mod->init_text_size = 0;
mutex_unlock(&module_mutex);

return 0;

}
//

模块卸载由内核的系统调用delete_module完成。
SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
unsigned int, flags)
{
struct module *mod;
char name[MODULE_NAME_LEN];
int ret, forced = 0;

if (!capable(CAP_SYS_MODULE))
    return -EPERM;

if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0)
    return -EFAULT;
name[MODULE_NAME_LEN-1] = '\0';

if (mutex_lock_interruptible(&module_mutex) != 0)
    return -EINTR;

mod = find_module(name);
if (!mod) {
    ret = -ENOENT;
    goto out;
}

if (!list_empty(&mod->modules_which_use_me)) {
    /* Other modules depend on us: get rid of them first. */
    ret = -EWOULDBLOCK;
    goto out;
}

/* Doing init or already dying? */
if (mod->state != MODULE_STATE_LIVE) {
    /* FIXME: if (force), slam module count and wake up
               waiter --RR */
    DEBUGP("%s already dying\n", mod->name);
    ret = -EBUSY;
    goto out;
}

/* If it has an init func, it must have an exit func to unload */
if (mod->init && !mod->exit) {
    forced = try_force_unload(flags);
    if (!forced) {
        /* This module can't be removed */
        ret = -EBUSY;
        goto out;
    }
}

/* Set this up before setting mod->state */
mod->waiter = current;

/* Stop the machine so refcounts can't move and disable module. */
ret = try_stop_module(mod, flags, &forced);
if (ret != 0)
    goto out;

/* Never wait if forced. */
if (!forced && module_refcount(mod) != 0)
    wait_for_zero_refcount(mod);

mutex_unlock(&module_mutex);
/* Final destruction now noone is using it. */
if (mod->exit != NULL)
    mod->exit();//调用exit module
blocking_notifier_call_chain(&module_notify_list,
                 MODULE_STATE_GOING, mod);
mutex_lock(&module_mutex);
/* Store the name of the last unloaded module for diagnostic purposes */
strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
unregister_dynamic_debug_module(mod->name);
free_module(mod);//卸载模块

out:
mutex_unlock(&module_mutex);
return ret;
}

函数在内核中的位置:linux-2.6.30/kernel/module.c

该函数的功能是将一个特定模块module的引用计数减1 ,这样当一个模块的引用计数因为不为0而不能从内核中卸载时,可以调用此函数一次或多次,实现对模块计数的清零,从而实现模块卸载。
void module_put(struct module module)
{
if (module) {
unsigned int cpu = get_cpu();
local_dec(&module->ref[cpu].count);
/
Maybe they’re waiting for us to drop reference? /
if (unlikely(!module_is_live(module)))
wake_up_process(module->waiter);
put_cpu();
}
}
//
/

modprobe_path is set via /proc/sys.
*/
char modprobe_path[KMOD_PATH_LEN] = “/sbin/modprobe”;

/**

  • request_module - try to load a kernel module
  • @fmt: printf style format string for the name of the module
  • @varargs: arguements as specified in the format string
  • Load a module using the user mode module loader. The function returns
  • zero on success or a negative errno code on failure. Note that a
  • successful module load does not mean the module did not then unload
  • and exit on an error of its own. Callers must check that the service
  • they requested is now available not blindly invoke it.
  • If module auto-loading support is disabled then this function
  • becomes a no-operation.
    */
    kernel/kmod.c
    int request_module(const char *fmt, …)
    {
    va_list args;
    char module_name[MODULE_NAME_LEN];
    unsigned int max_modprobes;
    int ret;
    char *argv[] = { modprobe_path, “-q”, “–”, module_name, NULL };
    static char envp[] = { “HOME=/”,
    “TERM=linux”,
    “PATH=/sbin:/usr/sbin:/bin:/usr/bin”,
    NULL };
    static atomic_t kmod_concurrent = ATOMIC_INIT(0);
    #define MAX_KMOD_CONCURRENT 50 /
    Completely arbitrary value - KAO */
    static int kmod_loop_msg;

va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
va_end(args);
if (ret >= MODULE_NAME_LEN)
return -ENAMETOOLONG;

/* If modprobe needs a service that is in a module, we get a recursive

  • loop. Limit the number of running kmod threads to max_threads/2 or
  • MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method
  • would be to run the parents of this process, counting how many times
  • kmod was invoked. That would mean accessing the internals of the
  • process tables to get the command line, proc_pid_cmdline is static
  • and it is not worth changing the proc code just to handle this case.
  • KAO.
  • “trace the ppid” is simple, but will fail if someone’s
  • parent exits. I think this is as good as it gets. --RR
    /
    max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
    atomic_inc(&kmod_concurrent);
    if (atomic_read(&kmod_concurrent) > max_modprobes) {
    /
    We may be blaming an innocent here, but unlikely */
    if (kmod_loop_msg++ < 5)
    printk(KERN_ERR
    “request_module: runaway loop modprobe %s\n”,
    module_name);
    atomic_dec(&kmod_concurrent);
    return -ENOMEM;
    }

ret = call_usermodehelper(modprobe_path, argv, envp, 1);
atomic_dec(&kmod_concurrent);
return ret;
}

Linux把内核也看作一个模块。那么模块与模块之间如何进行交互呢,一种常用的方法就是共享变量和函数。但并不是模块中的每个变量和函数都能被共享,内核只把各个模块中主要的变量和函数放在一个特定的区段,这些变量和函数就统称为符号。到低哪些符号可以被共享? Linux内核有自己的规定。例如 EXPORT_SYMBOL_GPL(sysfs_create_link); 在fs/sysfs/symlink.c
其中宏定义EXPORT_SYMBOL()本身的含义是“移出符号”。为什么说是“移出”呢?因为这些符号本来是内核内部的符号,通过这个宏放在一个公开的地方,使得装入到内核中的其他模块可以引用它们。
实际上,仅仅知道这些符号的名字是不够的,还得知道它们在内核映像中的地址才有意义。因此,内核中定义了如下结构来描述模块的符号:
struct module_symbol

{

    unsigned long value; /*符号在内核映像中的地址*/

    const char *name;   /*指向符号名的指针*/

};

从后面对EXPORT_SYMBOL宏的定义可以看出,连接程序(ld)在连接内核映像时将这个结构存放在一个叫做“__ksymtab”的区段中,而这个区段中所有的符号就组成了模块对外“移出”的符号表,这些符号可供内核及已安装的模块来引用。而其他“对内”的符号则由连接程序自行生成,并仅供内部使用。
与EXPORT_SYMBOL相关的定义在include/linux/module.h中:
#define __MODULE_STRING_1(x) #x
#define __MODULE_STRING(x) __MODULE_STRING_1(x)
#define __EXPORT_SYMBOL(sym, str) \

const char _kstrtab##sym[] \

attribute((section(“.kstrtab”))) = str; \

const struct module_symbol _ksymtab##sym \

attribute((section(“__ksymtab”))) = \

{ (unsigned long)&sym, _kstrtab##sym }

#if defined(MODVERSIONS) || !defined(CONFIG_MODVERSIONS)

#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var))

下面我们以EXPORT_SYMBOL(schedule)为例,来看一下这个宏的结果是什么。首先EXPORT_SYMBOL(schedule)的定义成了__EXPORT_SYMBOL(schedule, “schedule”)。而__EXPORT_SYMBOL()定义了两个语句,第一个语句定义了一个名为__kstrtab_ schedule的字符串,将字符串的内容初始化为“schedule”,并将其置于内核映像中的.kstrtab区段,注意这是一个专门存放符号名字符串的区段。第二个语句则定义了一个名为__kstrtab_ schedule的module_symbol结构,将其初始化为{&schedule,_kstrtab schedule}结构,并将其置于内核映像中的__ksymtab区段。这样,module_symbol结构中的域value的值就为schedule在内核映像中的地址,而指针name则指向字符串“schedule”。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

xx-xx-xxx-xxx

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值