回到INSMOD_MAIN。
1819 /* archdata based on relocatable addresses */
1820 if (add_archdata(f, &archdata))
1821 goto out;
1822
1823 /* kallsyms based on relocatable addresses */
1824 if (add_kallsyms(f, &kallsyms, force_kallsyms))
1825 goto out;
add_archdata函数也在insmod.c里。
Insmod——add_archdata函数
1034 /* Add an arch data section if the arch wants it. */
1035 static int add_archdata(struct obj_file *f,
1036 struct obj_section **sec)
1037 {
1038 size_t i;
1039
1040 *sec = NULL;
1041 /* Add an empty archdata section to the module if necessary */
1042 for (i = 0; i < f->header.e_shnum; ++i) {
1043 if (strcmp(f->sections[i]->name, ARCHDATA_SEC_NAME) == 0) {
1044 *sec = f->sections[i];
1045 break;
1046 }
1047 }
1048 if (!*sec)
1049 *sec = obj_create_alloced_section(f, ARCHDATA_SEC_NAME, 16, 0);
1050
1051 /* Size and populate archdata */
1052 if (arch_archdata(f, *sec))
1053 return(1);
1054 return 0;
1055 }
函数创建名为“__archdata” (宏ARCH_SEC_NAME的定义)的段。在x86体系下arch_archdata函数是空函数。
接下来是对add_kallsyms的调用,这个函数也在同一文件下。
Insmod——add_kallsyms函数
979 /* Add a kallsyms section if the kernel supports all symbols. */
980 static int add_kallsyms(struct obj_file *f,
981 struct obj_section **module_kallsyms, int force_kallsyms)
982 {
983 struct module_symbol *s;
984 struct obj_file *f_kallsyms;
985 struct obj_section *sec_kallsyms;
986 size_t i;
987 int l;
988 const char *p, *pt_R;
989 unsigned long start = 0, stop = 0;
990
991 for (i = 0, s = ksyms; i < nksyms; ++i, ++s) {
992 p = (char *)s->name;
993 pt_R = strstr(p, "_R");
994 if (pt_R)
995 l = pt_R - p;
996 else
997 l = strlen(p);
998 if (strncmp(p, "__start_" KALLSYMS_SEC_NAME, l) == 0)
999 start = s->value;
1000 else if (strncmp(p, "__stop_" KALLSYMS_SEC_NAME, l) == 0)
1001 stop = s->value;
1002 }
1003
1004 if (start >= stop && !force_kallsyms)
1005 return(0);
1006
1007 /* The kernel contains all symbols, do the same for this module. */
1008
1009 /* Add an empty kallsyms section to the module if necessary */
1010 for (i = 0; i < f->header.e_shnum; ++i) {
1011 if (strcmp(f->sections[i]->name, KALLSYMS_SEC_NAME) == 0) {
1012 *module_kallsyms = f->sections[i];
1013 break;
1014 }
1015 }
1016 if (!*module_kallsyms)
1017 *module_kallsyms = obj_create_alloced_section(f, KALLSYMS_SEC_NAME, 0, 0);
1018
1019 /* Size and populate kallsyms */
1020 if (obj_kallsyms(f, &f_kallsyms))
1021 return(1);
1022 sec_kallsyms = f_kallsyms->sections[KALLSYMS_IDX];
1023 (*module_kallsyms)->header.sh_addralign = sec_kallsyms->header.sh_addralign;
1024 (*module_kallsyms)->header.sh_size = sec_kallsyms->header.sh_size;
1025 free((*module_kallsyms)->contents);
1026 (*module_kallsyms)->contents = sec_kallsyms->contents;
1027 sec_kallsyms->contents = NULL;
1028 obj_free(f_kallsyms);
1029
1030 return 0;
1031 }
这个函数主要是处理内核导出符号。从代码中可以知道,内核的导出符号位于“__start_kallsyms”和“__stop_kallsyms”符号所指向的地址之间。这个函数的主体是obj_kallsyms函数,这个函数在./modutils-2.4.0/obj/obj_kallsyms.c里。
Insmod——obj_kallsyms函数
84 /* Extract all symbols from the input obj_file, ignore ones that are
85 * no use for debugging, build an output obj_file containing only the
86 * kallsyms section.
87 *
88 * The kallsyms section is a bit unusual. It deliberately has no
89 * relocatable data, all "pointers" are represented as byte offsets
90 * into the the section. This means it can be stored anywhere without
91 * relocation problems. In particular it can be stored within a kernel
92 * image, it can be stored separately from the kernel image, it can be
93 * appended to a module just before loading, it can be stored in a
94 * separate area etc.
95 *
96 * Format of the kallsyms section.
97 *
98 * Header:
99 * Size of header.
100 * Total size of kallsyms data, including strings.
101 * Number of loaded sections.
102 * Offset to first section entry from start of header.
103 * Size of each section entry, excluding the name string.
104 * Number of symbols.
105 * Offset to first symbol entry from start of header.
106 * Size of each symbol entry, excluding the name string.
107 *
108 * Section entry - one per loaded section.
109 * Start of section[1].
110 * Size of section.
111 * Offset to name of section, from start of strings.
112 * Section flags.
113 *
114 * Symbol entry - one per symbol in the input file[2].
115 * Offset of section that owns this symbol, from start of section data.
116 * Address of symbol within the real section[1].
117 * Offset to name of symbol, from start of strings.
118 *
119 * Notes: [1] This is an exception to the "represent pointers as
120 * offsets" rule, it is a value, not an offset. The start
121 * address of a section or a symbol is extracted from the
122 * obj_file data which may contain absolute or relocatable
123 * addresses. If the addresses are relocatable then the
124 * caller must adjust the section and/or symbol entries in
125 * kallsyms after relocation.
126 * [2] Only symbols that fall within loaded sections are stored.
127 */
128
129 int
130 obj_kallsyms (struct obj_file *fin, struct obj_file **fout_result)
131 {
132 struct obj_file *fout;
133 int i, loaded = 0, *fin_to_allsym_map;
134 struct obj_section *isec, *osec;
135 struct kallsyms_header *a_hdr;
136 struct kallsyms_section *a_sec;
137 ElfW(Off) sec_off;
138 struct kallsyms_symbol *symbols = NULL, a_sym;
139 ElfW(Word) symbols_size = 0, symbols_left = 0;
140 char *strings = NULL, *p;
141 ElfW(Word) strings_size = 0, strings_left = 0;
142 ElfW(Off) file_offset;
143 static char strtab[] = "/000" KALLSYMS_SEC_NAME;
144
145 /* Create the kallsyms section. */
146 fout = arch_new_file();
147 memset(fout, 0, sizeof(*fout));
148 fout->symbol_cmp = strcmp;
149 fout->symbol_hash = obj_elf_hash;
150 fout->load_order_search_start = &fout->load_order;
151
152 /* Copy file characteristics from input file and modify to suit */
153 memcpy(&fout->header, &fin->header, sizeof(fout->header));
154 fout->header.e_type = ET_REL; /* Output is relocatable */
155 fout->header.e_entry = 0; /* No entry point */
156 fout->header.e_phoff = 0; /* No program header */
157 file_offset = sizeof(fout->header); /* Step over Elf header */
158 fout->header.e_shoff = file_offset; /* Section headers next */
159 fout->header.e_phentsize = 0; /* No program header */
160 fout->header.e_phnum = 0; /* No program header */
161 fout->header.e_shnum = KALLSYMS_IDX+1; /* Initial, strtab, kallsyms */
162 fout->header.e_shstrndx = KALLSYMS_IDX-1; /* strtab */
163 file_offset += fout->header.e_shentsize * fout->header.e_shnum;
164
165 /* Populate the section data for kallsyms itself */
166 fout->sections = xmalloc(sizeof(*(fout->sections))*fout->header.e_shnum);
167 memset(fout->sections, 0, sizeof(*(fout->sections))*fout->header.e_shnum);
168
169 fout->sections[0] = osec = arch_new_section();
170 memset(osec, 0, sizeof(*osec));
171 osec->header.sh_type = SHT_NULL;
172 osec->header.sh_link = SHN_UNDEF;
173
174 fout->sections[KALLSYMS_IDX-1] = osec = arch_new_section();
175 memset(osec, 0, sizeof(*osec));
176 osec->name = ".strtab";
177 osec->header.sh_type = SHT_STRTAB;
178 osec->header.sh_link = SHN_UNDEF;
179 osec->header.sh_offset = file_offset;
180 osec->header.sh_size = sizeof(strtab);
181 osec->contents = xmalloc(sizeof(strtab));
182 memcpy(osec->contents, strtab, sizeof(strtab));
183 file_offset += osec->header.sh_size;
184
185 fout->sections[KALLSYMS_IDX] = osec = arch_new_section();
186 memset(osec, 0, sizeof(*osec));
187 osec->name = KALLSYMS_SEC_NAME;
188 osec->header.sh_name = 1; /* Offset in strtab */
189 osec->header.sh_type = SHT_PROGBITS; /* Load it */
190 osec->header.sh_flags = SHF_ALLOC; /* Read only data */
191 osec->header.sh_link = SHN_UNDEF;
192 osec->header.sh_addralign = sizeof(ElfW(Word));
193 file_offset = (file_offset + osec->header.sh_addralign - 1)
194 & -(osec->header.sh_addralign);
195 osec->header.sh_offset = file_offset;
196
197 /* How many loaded sections are there? */
198 for (i = 0; i < fin->header.e_shnum; ++i) {
199 if (fin->sections[i]->header.sh_flags & SHF_ALLOC)
200 ++loaded;
201 }
202
203 /* Initial contents, header + one entry per input section. No strings. */
204 osec->header.sh_size = sizeof(*a_hdr) + loaded*sizeof(*a_sec);
205 a_hdr = (struct kallsyms_header *) osec->contents =
206 xmalloc(osec->header.sh_size);
207 memset(osec->contents, 0, osec->header.sh_size);
208 a_hdr->size = sizeof(*a_hdr);
209 a_hdr->sections = loaded;
210 a_hdr->section_off = a_hdr->size;
211 a_hdr->section_size = sizeof(*a_sec);
212 a_hdr->symbol_off = osec->header.sh_size;
213 a_hdr->symbol_size = sizeof(a_sym);
214 a_hdr->start = (ElfW(Addr))(~0);
215
216 /* Map input section numbers to kallsyms section offsets. */
217 sec_off = 0; /* Offset to first kallsyms section entry */
218 fin_to_allsym_map = xmalloc(sizeof(*fin_to_allsym_map)*fin->header.e_shnum);
219 for (i = 0; i < fin->header.e_shnum; ++i) {
220 isec = fin->sections[i];
221 if (isec->header.sh_flags & SHF_ALLOC) {
222 fin_to_allsym_map[isec->idx] = sec_off;
223 sec_off += a_hdr->section_size;
224 }
225 else
226 fin_to_allsym_map[isec->idx] = -1; /* Ignore this section */
227 }
228
229 /* Copy the loaded section data. */
230 a_sec = (struct kallsyms_section *) ((char *) a_hdr + a_hdr->section_off);
231 for (i = 0; i < fin->header.e_shnum; ++i) {
232 isec = fin->sections[i];
233 if (!(isec->header.sh_flags & SHF_ALLOC))
234 continue;
235 a_sec->start = isec->header.sh_addr;
236 a_sec->size = isec->header.sh_size;
237 a_sec->flags = isec->header.sh_flags;
238 a_sec->name_off = strings_size - strings_left;
239 append_string(isec->name, &strings, &strings_size, &strings_left);
240 if (a_sec->start < a_hdr->start)
241 a_hdr->start = a_sec->start;
242 if (a_sec->start+a_sec->size > a_hdr->end)
243 a_hdr->end = a_sec->start+a_sec->size;
244 ++a_sec;
245 }
246
247 /* Build the kallsyms symbol table from the symbol hashes. */
248 for (i = 0; i < HASH_BUCKETS; ++i) {
249 struct obj_symbol *sym = fin->symtab[i];
250 for (sym = fin->symtab[i]; sym ; sym = sym->next) {
251 if (!sym || sym->secidx >= fin->header.e_shnum)
252 continue;
253 if ((a_sym.section_off = fin_to_allsym_map[sym->secidx]) == -1)
254 continue;
255 if (strcmp(sym->name, "gcc2_compiled.") == 0 ||
256 strncmp(sym->name, "__insmod_", 9) == 0)
257 continue;
258 a_sym.symbol_addr = sym->value;
259 if (fin->header.e_type == ET_REL)
260 a_sym.symbol_addr += fin->sections[sym->secidx]->header.sh_addr;
261 a_sym.name_off = strings_size - strings_left;
262 append_symbol(&a_sym, &symbols, &symbols_size, &symbols_left);
263 append_string(sym->name, &strings, &strings_size, &strings_left);
264 ++a_hdr->symbols;
265 }
266 }
267 free(fin_to_allsym_map);
268
269 /* Sort the symbols into ascending order by address and name */
270 sym_strings = strings; /* For symbol_compare */
271 qsort((char *) symbols, (unsigned) a_hdr->symbols,
272 sizeof(* symbols), symbol_compare);
273 sym_strings = NULL;
274
275 /* Put the lot together */
276 osec->header.sh_size = a_hdr->total_size = a_hdr->symbol_off +
277 a_hdr->symbols*a_hdr->symbol_size + strings_size - strings_left;
278 a_hdr = (struct kallsyms_header *) osec->contents = xrealloc(a_hdr, a_hdr->total_size);
279 p = (char *)a_hdr + a_hdr->symbol_off;
280 memcpy(p, symbols, a_hdr->symbols*a_hdr->symbol_size);
281 free(symbols);
282 p += a_hdr->symbols*a_hdr->symbol_size;
283 a_hdr->string_off = p - (char *)a_hdr;
284 memcpy(p, strings, strings_size - strings_left);
285 free(strings);
286
287 *fout_result = fout;
288 return 0;
289 }
先看注释,这个函数的作用是将输入obj_file里的符号提取出来,忽略不用于调试的符号。构建出的obj_file只包含kallsyms段。这个段是特别设计,是可任意重定位(否则,debugger的设计将是噩梦!)。它的格式在注释中已经讲得非常清楚,在./modutils-2.40/include/obj_kallsyms.h中也有明确的定义。
Insmod——kallsyms_header结构
60 /* Format of data in the kallsyms section.
61 * Most of the fields are small numbers but the total size and all
62 * offsets can be large so use the 32/64 bit types for these fields.
63 *
64 * Do not use sizeof() on these structures, modutils may be using extra
65 * fields. Instead use the size fields in the header to access the
66 * other bits of data.
67 */
68
69 struct kallsyms_header {
70 int size; /* Size of this header */
71 ElfW(Word) total_size; /* Total size of kallsyms data */
72 int sections; /* Number of section entries */
73 ElfW(Off) section_off; /* Offset to first section entry */
74 int section_size; /* Size of one section entry */
75 int symbols; /* Number of symbol entries */
76 ElfW(Off) symbol_off; /* Offset to first symbol entry */
77 int symbol_size; /* Size of one symbol entry */
78 ElfW(Off) string_off; /* Offset to first string */
79 ElfW(Addr) start; /* Start address of first section */
80 ElfW(Addr) end; /* End address of last section */
81 };
82
83 struct kallsyms_section {
84 ElfW(Addr) start; /* Start address of section */
85 ElfW(Word) size; /* Size of this section */
86 ElfW(Off) name_off; /* Offset to section name */
87 ElfW(Word) flags; /* Flags from section */
88 };
88
89 struct kallsyms_symbol {
90 ElfW(Off) section_off; /* Offset to section that owns this symbol */
91 ElfW(Addr) symbol_addr; /* Address of symbol */
92 ElfW(Off) name_off; /* Offset to symbol name */
93 };
回到obj_kallsyms函数。到167行是对elf文件头的设置,具体请参照elf格式文档。169~172行是对section(段头)第一项的设置,在elf文件里这是保留不用的。174~183行设置第2项section,这是个string section,其内容初始化为”/000__kallsyms”,实际上是不可见的。185行开始设置最后一项section,这是这个obj_file的主要部分。
198行计算已加载段的数目。203~214行设置kallsyms段头。216~227行根据段的序号计算各个已加载段头相对第一个段头的偏移。230~245行依据已加载的段,设置已加载段参数,并把各个段的名字保存到string指向的缓存。函数append_string也在同一文件里。
Insmod——append_string函数
34 /* Append a string to the big list of strings */
35
36 static void
37 append_string (const char *s, char **strings,
38 ElfW(Word) *strings_size, ElfW(Word) *strings_left)
39 {
40 int l = strlen(s) + 1;
41 while (l > *strings_left) {
42 *strings = xrealloc(*strings, *strings_size += EXPAND_BY);
43 *strings_left += EXPAND_BY;
44 }
45 memcpy((char *)*strings+*strings_size-*strings_left, s, l);
46 *strings_left -= l;
45 }
248~266行将合适的符号保存入kallsyms段里。从代码里可以看到,这里忽略符号名为gcc2_complied.或包含__insmod_的符号。这里生成的obj_file的属性是ET_REL,所以执行260行,计算出符号在内存中的地址(注意现在得到的还是理论上的地址,也就是文件在内存的起始位置为0,段头的sh_addr表示段在内存的起始位置,就是以这个为假设的)。然后通过append_ symbol将符号的内容存入缓存,接着保存符号名。append_symbol在同一文件里。
Insmod——append_symbol函数
50 /* Append a symbol to the big list of symbols */
51
52 static void
53 append_symbol (const struct kallsyms_symbol *s,
54 struct kallsyms_symbol **symbols,
55 ElfW(Word) *symbols_size, ElfW(Word) *symbols_left)
56 {
57 int l = sizeof(*s);
58 while (l > *symbols_left) {
59 *symbols = xrealloc(*symbols, *symbols_size += EXPAND_BY);
60 *symbols_left += EXPAND_BY;
61 }
62 memcpy((char *)*symbols+*symbols_size-*symbols_left, s, l);
63 *symbols_left -= l;
63 }
接着在270~273行,对符号进行排序。Symbol_compare函数也还是在同一文件里。
Insmod——symbol_compare函数
66 /* qsort compare routine to sort symbols */
67
68 static const char *sym_strings;
69
70 static int
71 symbol_compare (const void *a, const void *b)
72 {
73 struct kallsyms_symbol *c = (struct kallsyms_symbol *) a;
74 struct kallsyms_symbol *d = (struct kallsyms_symbol *) b;
75
76 if (c->symbol_addr > d->symbol_addr)
77 return(1);
78 if (c->symbol_addr < d->symbol_addr)
79 return(-1);
80 return(strcmp(sym_strings+c->name_off, sym_strings+d->name_off));
79 }
可以看到符号按它们在内存地址的高低排序,地址相同,再使用名字排序。最后,276~285行,将这些经过处理内容拷贝到输出文件里。
到这里为止,输出文件的内容如下图。
实际上,这个文件就是将输入文件里的信息整理整理,更方便使用(记住__kallsyms段是用作辅助内核调试),整个输出文件只是临时的仓库。add_kallsyms函数的1022~1030行,正是将输出文件里的第2个段的内容(途中灰色部分)保存为__kallsyms段的内容。