回到INSMOD_MAIN。
1826 /**** No symbols or sections to be changed after kallsyms above ***/
之前所有的操作都没动内核。下面就要来真的了。
1828 if (errors)
1829 goto out;
1830
1831 /* If we were just checking, we made it. */
1832 if (flag_silent_probe) {
1833 exit_status = 0;
1834 goto out;
1835 }
1836 /* Module has now finished growing; find its size and install it. */
1837 m_size = obj_load_size(f); /* DEPMOD */
1838
1839 if (noload) {
1840 /* Don't bother actually touching the kernel. */
1841 m_addr = 0x12340000;
1842 } else {
1843 errno = 0;
1844 m_addr = create_module(m_name, m_size);
1845 switch (errno) {
1846 case 0:
1847 break;
1848 case EEXIST:
1849 if (dolock) {
1850 /*
1851 * Assume that we were just invoked
1852 * simultaneous with another insmod
1853 * and return success.
1854 */
1855 exit_status = 0;
1856 goto out;
1857 }
1858 error("a module named %s already exists", m_name);
1859 goto out;
1860 case ENOMEM:
1861 error("can't allocate kernel memory for module; needed %lu bytes",
1862 m_size);
1863 goto out;
1864 default:
1865 error("create_module: %m");
1866 goto out;
1867 }
1868 }
首先如果flag_slient_probe已经设置,说明我们不想真正安装模块,只是想测试一下,那么到这里测试已经完成了,模块一切正常。否则,就要开始动手了,先计算载入模块所需的大小。函数obj_load_size在./modutils-2.4.0/obj/obj_reloc.c里。
Insmod——obj_load_size函数
246 unsigned long
247 obj_load_size (struct obj_file *f)
248 {
249 unsigned long dot = 0;
250 struct obj_section *sec;
251
252 /* Finalize the positions of the sections relative to one another. */
253
254 for (sec = f->load_order; sec ; sec = sec->load_next)
255 {
256 ElfW(Addr) align;
257
258 align = sec->header.sh_addralign;
259 if (align && (dot & (align - 1)))
260 dot = (dot | (align - 1)) + 1;
261
262 sec->header.sh_addr = dot;
263 dot += sec->header.sh_size;
264 }
265
266 return dot;
267 }
前面提到段按对其边界大小排序,可以减少空间占用,就是体现在这里。
如果noload设置了,那么我们选择不真正加载模块。随便给加载地址就完了。但是如果要真正加载,就不能那么儿戏了。首先要通过create_module创建内核里的模块对象。这个函数在./modutils-2.4.0/util/sys_cm.c中。
Insmod——create_module函数
39 #define __NR__create_module __NR_create_module
40 static inline _syscall2(long, _create_module, const char *, name, size_t, size)
41
42 unsigned long create_module(const char *name, size_t size)
43 {
44 /* Why all this fuss?
45
46 In linux 2.1, the address returned by create module point in
47 kernel space which is now mapped at the top of user space (at
48 0xc0000000 on i386). This looks like a negative number for a
49 long. The normal syscall macro of linux 2.0 (and all libc compile
50 with linux 2.0 or below) consider that the return value is a
51 negative number and consider it is an error number (A kernel
52 convention, return value are positive or negative, indicating the
53 error number).
54
55 By checking the value of errno, we know if we have been fooled by
56 the syscall2 macro and we fix it. */
57
58 long ret = _create_module(name, size);
59 if (ret == -1 && errno > 125)
60 {
61 ret = -errno;
62 errno = 0;
63 }
64 return ret;
63 }
这个函数里有一个比较有意思的注释,谈到内核创建了module对象后,因为其地址在内核区,在0xc0000000以上,在系统调用返回时,2.0以下的版本会认为这是个出错符号,因此,在此要做些判断。
syscall2是一个宏,用来作系统调用__NR_create_module。这个宏的定义在linux/include/asm-i386/unistd.h中。
263 #define _syscall2(type,name,type1,arg1,type2,arg2) /
264 type name(type1 arg1,type2 arg2) /
265 { /
266 long __res; /
267 __asm__ volatile ("int $0x80" /
268 : "=a" (__res) /
269 : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2))); /
270 __syscall_return(type,__res); /
271 }
在这个文件里,刚才注释里提到的错误已经修正了,见__syscall_return,这个宏也在这个文件里。
231 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
232
233 #define __syscall_return(type, res) /
234 do { /
235 if ((unsigned long)(res) >= (unsigned long)(-125)) { /
236 errno = -(res); /
237 res = -1; /
238 } /
239 return (type) (res); /
240 } while (0)
函数最终会调用系统调用sys_create_module。这个调用在内核空间生成一个模块对象,并将它链入内核的模块链表(详见linux源代码情景分析)。创建内核模块对象时,有可能出错,INSMOD_MAIN的1845行的swtich检查模块对象是否成功创建。
1870 /* module is already built, complete with ksymoops symbols for the
1871 * persistent filename. If the kernel does not support persistent data
1872 * then give an error but continue. It is too difficult to clean up at
1873 * this stage and this error will only occur on backported modules.
1874 * rmmod will also get an error so warn the user now.
1875 */
1876 if (f->persist && !noload) {
1877 struct {
1878 struct module m;
1879 int data;
1880 } test_read;
1881 memset(&test_read, 0, sizeof(test_read));
1882 test_read.m.size_of_struct = -sizeof(test_read.m); /* -ve size => read, not write */
1883 test_read.m.read_start = m_addr + sizeof(struct module);
1884 test_read.m.read_end = test_read.m.read_start + sizeof(test_read.data);
1885 if (sys_init_module(m_name, (struct module *) &test_read)) {
1886 int old_errors = errors;
1887 error("has persistent data but the kernel is too old to support it."
1888 " Expect errors during rmmod as well");
1889 errors = old_errors;
1890 }
1891 }
如果模块运行时参数使用了文件,而且需要真正加载,那么要执行1876行的语句块,检查内核是否支持persist data(modutils文档里这样解释persist data:当模块初始化时从文件读入,模块退出时将相关内容写入文件)。这里的module结构不是内核使用的那个,它定义在./modutils-2.4.0/include/module.h中。
136 struct module
137 {
138 unsigned tgt_long size_of_struct; /* == sizeof(module) */
139 unsigned tgt_long next;
140 unsigned tgt_long name;
141 unsigned tgt_long size;
142
143 tgt_long usecount;
144 unsigned tgt_long flags; /* AUTOCLEAN et al */
145
146 unsigned nsyms;
147 unsigned ndeps;
148
149 unsigned tgt_long syms;
150 unsigned tgt_long deps;
151 unsigned tgt_long refs;
152 unsigned tgt_long init;
153 unsigned tgt_long cleanup;
154 unsigned tgt_long ex_table_start;
155 unsigned tgt_long ex_table_end;
156 #ifdef __alpha__
157 unsigned tgt_long gp;
158 #endif
159 /* Everything after here is extension. */
160 unsigned tgt_long read_start; /* Read data from existing module */
161 unsigned tgt_long read_end;
162 unsigned tgt_long can_unload;
163 unsigned tgt_long runsize;
164 unsigned tgt_long kallsyms_start;
165 unsigned tgt_long kallsyms_end;
166 unsigned tgt_long archdata_start;
167 unsigned tgt_long archdata_end;
168 unsigned tgt_long kernel_data;
169 };
与此对比的是内核里对模块的定义。
53 struct module
54 {
55 unsigned long size_of_struct; /* == sizeof(module) */
56 struct module *next;
57 const char *name;
58 unsigned long size;
59
60 union
61 {
62 atomic_t usecount;
63 long pad;
64 } uc; /* Needs to keep its size - so says rth */
65
66 unsigned long flags; /* AUTOCLEAN et al */
67
68 unsigned nsyms;
69 unsigned ndeps;
70
71 struct module_symbol *syms;
72 struct module_ref *deps;
73 struct module_ref *refs;
74 int (*init)(void);
75 void (*cleanup)(void);
76 const struct exception_table_entry *ex_table_start;
77 const struct exception_table_entry *ex_table_end;
78 #ifdef __alpha__
79 unsigned long gp;
80 #endif
81 /* Members past this point are extensions to the basic
82 module support and are optional. Use mod_member_present()
83 to examine them. */
84 const struct module_persist *persist_start;
85 const struct module_persist *persist_end;
86 int (*can_unload)(void);
87 int runsize; /* In modutils, not currently used */
88 const char *kallsyms_start; /* All symbols for kernel debugging */
89 const char *kallsyms_end;
90 const char *archdata_start; /* arch specific data for module */
91 const char *archdata_end;
92 const char *kernel_data; /* Reserved for kernel internal use */
93 };
这2个结构基本上是对应的,只是这里定义的结构,为了避免麻烦,使用unsigned long代替各种指针,但是如果insmod和内核的版本相差太大,这2个结构还是会有不对应的地方。
这里的主体是sys_init_module,这个函数在./modutils-2.4.0/util/sys_nim.c中。
37 #ifndef CONFIG_USE_SYSCALL
38
39 extern int init_module(const char *name, const struct module *info);
40
41 int
42 sys_init_module(const char *name, const struct module *info)
43 {
44 return init_module(name, info);
45 }
46
47 #else
48
49 #define __NR_sys_init_module __NR_init_module
50 _syscall2(int, sys_init_module, const char *, name,
51 const struct module *, info)
52
53 #endif
这里调用那个函数取决于宏CONFIG_USE_SYSCALL是否定义。在./modutils-2.4.0/ INSTALL文件中提到这些选项在默认情况下是false。使用这个宏的原因,是有些库不能做系统调用,所以,必须通过_syscall2这样的宏做系统调用。
回到INSMOD_MAIN,在1882行中,size_of_struct是unsigned int类型的,-sizeof(module)结果将会是一个很大的值,为什么要这样做呢?文档patch-2.4.0-test13-pre2给出了答案。在需要使用这个功能的时候,需要给内核打补丁,扩展sys_init_module系统调用的语义。使其在size为负数时,是从现有module结构 读出而不是设置新的模块。
以下就是这个文档给出的蛛丝马迹。
72 if ((error = get_user(mod_user_size, &mod_user->size_of_struct)) != 0)
73 goto err1;
74 -if (mod_user_size < (unsigned long)&((struct module *)0L)->persist_start
75 - || mod_user_size > sizeof(struct module) + 16*sizeof(void*)) {
上面的代码是将在内核进行的判断,如果size是负的,上面的条件一定满足,现在modutils扩展了glibc库,当size为负时,执行下面代码。
76 /* A negative mod_user_size indicates reading data from an
77 * existing module.
78 */
79 for (i = 0; i < 2; ++i) {
80 if (mod_user_size >= (unsigned long)&((struct module *)0L)->read_start
81 && mod_user_size <= sizeof(struct module) + 16*sizeof(void*))
82 break;
83 mod_user_size = -mod_user_size; /* Try with negated size */
84 }
85 if (i == 1) {
86 /* Negative size, read from existing module */
87 error = read_module_data(mod_user_size, mod_user, mod);
88 goto err1;
89 }
90 if (i == 2) {
91 printk(KERN_ERR "init_module: Invalid module header size./n"
92 KERN_ERR "A new version of the modutils is likely "
93 "needed./n");
94 KERN_ERR "A new version of modutils may be needed./n");
95 error = -EINVAL;
96 goto err1;
97 }
16 /* A negative mod_user_size to sys_init_module indicates that the caller wants
17 * to read data out of an existing module instead of initializing a new module.
18 * This usage overloads the meaning of sys_init_module, but the alternative was
19 * yet another system call and changes to glibc. sys_init_module already does
20 * much of the work needed to read from an existing module so it was easier to
21 * extend that syscall. Keith Owens <kaos@ocs.com.au> November 2000
22 */
23
24 static int
25 read_module_data(unsigned long mod_user_size, struct module *mod_user, struct module *mod_exist)
26 {
27 struct module mod;
28 int error;
29 if (!try_inc_mod_count(mod_exist))
30 return(-ENOENT);
31 error = copy_from_user(&mod, mod_user, mod_user_size);
32 if (error) {
33 error = -EFAULT;
34 goto err1;
35 }
36 mod.size_of_struct = mod_user_size;
37 error = -EINVAL;
38 /* read_start and read_end must be present and must point inside the
39 * existing module. The module data from read_start to read_end-1 is
40 * copied back to the user, immediately after the user's struct module.
41 */
42 if (!mod_member_present(&mod, read_end) ||
43 !mod_bound(mod.read_start, 0, mod_exist) ||
44 !mod_bound(mod.read_end, -1, mod_exist) ||
45 mod.read_start >= mod.read_end) {
46 printk(KERN_ERR "init_module: mod->read_xxx data out of bounds./n");
47 goto err1;
48 }
49 error = copy_to_user(((char *)mod_user)+mod_user_size,
50 mod.read_start,
51 mod.read_end - mod.read_start);
52 if (error) {
53 error = -EFAULT;
54 goto err1;
55 }
56 error = 0;
57 err1:
58 __MOD_DEC_USE_COUNT(mod_exist);
59 return(error);
60 }
以上给出的只是原理性代码。这些代码在内核里,相应的read_start,read_end都要换为内核里的persist_start和persist_end。在read_module_data函数里mod_exist是内核生成的模块,注意在INSMOD_MAIN的1844行,m_addr保存了由内核生成的模块的地址,而test_read.m->read_start = m_addr+sizeof(module),这样在49行的copy_to_user语句中,拷贝的正是这个位置,大小是sizeof(int)。在实际运行时,read_start的位置是模块名字符串的开始。因此,read_module_data在这里没做实际的事情,只要是打过补丁的内核,sys_init_module的操作是不会出错的了。所以,这里主要是测试内核是否已经打了补丁。