一般情况下都在user space通过insmod的方式来装载ko,所以一般insmod ko
的行为时user space 主动的,但是如果想在kernel space insmod ko是否可以呢?
答案是肯定的。
如下面这个例子就在在kernel space 主动要求安装rtc-ds1685
http://lxr.free-electrons.com/source/arch/mips/sgi-ip32/ip32-reset.c#L50
request_module("rtc-ds1685");
下面我们看看request_module 是如何实现主动安装ko的
request_module源码如下:
http://lxr.free-electrons.com/source/include/linux/kmod.h#L37
38 #define request_module_nowait(mod...) __request_module(false, mod)
继续看__request_module的实现
http://lxr.free-electrons.com/source/kernel/kmod.c#L124
124 int __request_module(bool wait, const char *fmt, ...)
125 {
126 va_list args;
127 char module_name[MODULE_NAME_LEN];
128 unsigned int max_modprobes;
129 int ret;
130 static atomic_t kmod_concurrent = ATOMIC_INIT(0);
131 #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
132 static int kmod_loop_msg;
133
134 /*
135 * We don't allow synchronous module loading from async. Module
136 * init may invoke async_synchronize_full() which will end up
137 * waiting for this task which already is waiting for the module
138 * loading to complete, leading to a deadlock.
139 */
140 WARN_ON_ONCE(wait && current_is_async());
141
142 if (!modprobe_path[0])
143 return 0;
144
145 va_start(args, fmt);
146 ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
147 va_end(args);
148 if (ret >= MODULE_NAME_LEN)
149 return -ENAMETOOLONG;
150
151 ret = security_kernel_module_request(module_name);
152 if (ret)
153 return ret;
154
155 /* If modprobe needs a service that is in a module, we get a recursive
156 * loop. Limit the number of running kmod threads to max_threads/2 or
157 * MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method
158 * would be to run the parents of this process, counting how many times
159 * kmod was invoked. That would mean accessing the internals of the
160 * process tables to get the command line, proc_pid_cmdline is static
161 * and it is not worth changing the proc code just to handle this case.
162 * KAO.
163 *
164 * "trace the ppid" is simple, but will fail if someone's
165 * parent exits. I think this is as good as it gets. --RR
166 */
167 max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
168 atomic_inc(&kmod_concurrent);
169 if (atomic_read(&kmod_concurrent) > max_modprobes) {
170 /* We may be blaming an innocent here, but unlikely */
171 if (kmod_loop_msg < 5) {
172 printk(KERN_ERR
173 "request_module: runaway loop modprobe %s\n",
174 module_name);
175 kmod_loop_msg++;
176 }
177 atomic_dec(&kmod_concurrent);
178 return -ENOMEM;
179 }
180
181 trace_module_request(module_name, wait, _RET_IP_);
182
183 ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
184
185 atomic_dec(&kmod_concurrent);
186 return ret;
187 }
188 EXPORT_SYMBOL(__request_module);
在146行通过vsnprintf来格式化字符串,例如本例就是rtc-ds1685,注意ko 名字的长度不能超过MODULE_NAME_LEN
151 kernel 做security检查,在android系统总就对应的selinux
167~179行计算当前有多少个ko是kernel 主动要求insmod的,不能超过thread 个数的一般。
最重要的是183行调用call_modprobe来让user space insmod这个ko
69 static int call_modprobe(char *module_name, int wait)
70 {
71 struct subprocess_info *info;
72 static char *envp[] = {
73 "HOME=/",
74 "TERM=linux",
75 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
76 NULL
77 };
78
79 char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL);
80 if (!argv)
81 goto out;
82
83 module_name = kstrdup(module_name, GFP_KERNEL);
84 if (!module_name)
85 goto free_argv;
86
87 argv[0] = modprobe_path;
88 argv[1] = "-q";
89 argv[2] = "--";
90 argv[3] = module_name; /* check free_modprobe_argv() */
91 argv[4] = NULL;
92
93 info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
94 NULL, free_modprobe_argv, NULL);
95 if (!info)
96 goto free_module_name;
97
98 return call_usermodehelper_exec(info, wait | UMH_KILLABLE);
99
100 free_module_name:
101 kfree(module_name);
102 free_argv:
103 kfree(argv);
104 out:
105 return -ENOMEM;
106 }
87~91建立env注意其中的 char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
93行调用call_usermodehelper_setup 来初始化一个sub_info->work,如530行所示
struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
520 char **envp, gfp_t gfp_mask,
521 int (*init)(struct subprocess_info *info, struct cred *new),
522 void (*cleanup)(struct subprocess_info *info),
523 void *data)
524 {
525 struct subprocess_info *sub_info;
526 sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
527 if (!sub_info)
528 goto out;
529
530 INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
531 sub_info->path = path;
532 sub_info->argv = argv;
533 sub_info->envp = envp;
534
535 sub_info->cleanup = cleanup;
536 sub_info->init = init;
537 sub_info->data = data;
538 out:
539 return sub_info;
540 }
541 EXPORT_SYMBOL(call_usermodehelper_setup);
98调用 call_usermodehelper_exec 来让user space 通过modprobe这个命令来insmod这个ko。
我们这个例子中user space要执行的命令为 modprobe rtc-ds1685.ko
我们看call_usermodehelper_exec 主要就是通过system_unbound_wq执行call_usermodehelper_exec
中建立的work。及call_usermodehelper_exec_work
555 int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
556 {
557 DECLARE_COMPLETION_ONSTACK(done);
558 int retval = 0;
559
560 if (!sub_info->path) {
561 call_usermodehelper_freeinfo(sub_info);
562 return -EINVAL;
563 }
564 helper_lock();
565 if (usermodehelper_disabled) {
566 retval = -EBUSY;
567 goto out;
568 }
569 /*
570 * Set the completion pointer only if there is a waiter.
571 * This makes it possible to use umh_complete to free
572 * the data structure in case of UMH_NO_WAIT.
573 */
574 sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
575 sub_info->wait = wait;
576
577 queue_work(system_unbound_wq, &sub_info->work);
}
所以我们重点看看call_usermodehelper_exec_work的实现
321 static void call_usermodehelper_exec_work(struct work_struct *work)
322 {
323 struct subprocess_info *sub_info =
324 container_of(work, struct subprocess_info, work);
325
326 if (sub_info->wait & UMH_WAIT_PROC) {
327 call_usermodehelper_exec_sync(sub_info);
328 } else {
329 pid_t pid;
330 /*
331 * Use CLONE_PARENT to reparent it to kthreadd; we do not
332 * want to pollute current->children, and we need a parent
333 * that always ignores SIGCHLD to ensure auto-reaping.
334 */
335 pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
336 CLONE_PARENT | SIGCHLD);
337 if (pid < 0) {
338 sub_info->retval = pid;
339 umh_complete(sub_info);
340 }
341 }
342 }
这个函数分两种case,最终都会调用call_usermodehelper_exec_async
269 static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
270 {
271 pid_t pid;
272
273 /* If SIGCLD is ignored sys_wait4 won't populate the status. */
274 kernel_sigaction(SIGCHLD, SIG_DFL);
275 pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
276 if (pid < 0) {
277 sub_info->retval = pid;
278 } else {
279 int ret = -ECHILD;
280 /*
281 * Normally it is bogus to call wait4() from in-kernel because
282 * wait4() wants to write the exit code to a userspace address.
283 * But call_usermodehelper_exec_sync() always runs as kernel
284 * thread (workqueue) and put_user() to a kernel address works
285 * OK for kernel threads, due to their having an mm_segment_t
286 * which spans the entire address space.
287 *
288 * Thus the __user pointer cast is valid here.
289 */
290 sys_wait4(pid, (int __user *)&ret, 0, NULL);
291
292 /*
293 * If ret is 0, either call_usermodehelper_exec_async failed and
294 * the real error code is already in sub_info->retval or
295 * sub_info->retval is 0 anyway, so don't mess with it then.
296 */
297 if (ret)
298 sub_info->retval = ret;
299 }
300
301 /* Restore default kernel sig handler */
302 kernel_sigaction(SIGCHLD, SIG_IGN);
303
304 umh_complete(sub_info);
305 }
主要是建立一个 call_usermodehelper_exec_async thread
215 static int call_usermodehelper_exec_async(void *data)
216 {
217 struct subprocess_info *sub_info = data;
218 struct cred *new;
219 int retval;
220
221 spin_lock_irq(¤t->sighand->siglock);
222 flush_signal_handlers(current, 1);
223 spin_unlock_irq(¤t->sighand->siglock);
224
225 /*
226 * Our parent (unbound workqueue) runs with elevated scheduling
227 * priority. Avoid propagating that into the userspace child.
228 */
229 set_user_nice(current, 0);
230
231 retval = -ENOMEM;
232 new = prepare_kernel_cred(current);
233 if (!new)
234 goto out;
235
236 spin_lock(&umh_sysctl_lock);
237 new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset);
238 new->cap_inheritable = cap_intersect(usermodehelper_inheritable,
239 new->cap_inheritable);
240 spin_unlock(&umh_sysctl_lock);
241
242 if (sub_info->init) {
243 retval = sub_info->init(sub_info, new);
244 if (retval) {
245 abort_creds(new);
246 goto out;
247 }
248 }
249
250 commit_creds(new);
251
252 retval = do_execve(getname_kernel(sub_info->path),
253 (const char __user *const __user *)sub_info->argv,
254 (const char __user *const __user *)sub_info->envp);
255 out:
256 sub_info->retval = retval;
257 /*
258 * call_usermodehelper_exec_sync() will call umh_complete
259 * if UHM_WAIT_PROC.
260 */
261 if (!(sub_info->wait & UMH_WAIT_PROC))
262 umh_complete(sub_info);
263 if (!retval)
264 return 0;
265 do_exit(0);
266 }
看到这里就明白了,原来也是通过do_execve来让user space执行insmod命令.
的行为时user space 主动的,但是如果想在kernel space insmod ko是否可以呢?
答案是肯定的。
如下面这个例子就在在kernel space 主动要求安装rtc-ds1685
http://lxr.free-electrons.com/source/arch/mips/sgi-ip32/ip32-reset.c#L50
request_module("rtc-ds1685");
下面我们看看request_module 是如何实现主动安装ko的
request_module源码如下:
http://lxr.free-electrons.com/source/include/linux/kmod.h#L37
38 #define request_module_nowait(mod...) __request_module(false, mod)
继续看__request_module的实现
http://lxr.free-electrons.com/source/kernel/kmod.c#L124
124 int __request_module(bool wait, const char *fmt, ...)
125 {
126 va_list args;
127 char module_name[MODULE_NAME_LEN];
128 unsigned int max_modprobes;
129 int ret;
130 static atomic_t kmod_concurrent = ATOMIC_INIT(0);
131 #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
132 static int kmod_loop_msg;
133
134 /*
135 * We don't allow synchronous module loading from async. Module
136 * init may invoke async_synchronize_full() which will end up
137 * waiting for this task which already is waiting for the module
138 * loading to complete, leading to a deadlock.
139 */
140 WARN_ON_ONCE(wait && current_is_async());
141
142 if (!modprobe_path[0])
143 return 0;
144
145 va_start(args, fmt);
146 ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
147 va_end(args);
148 if (ret >= MODULE_NAME_LEN)
149 return -ENAMETOOLONG;
150
151 ret = security_kernel_module_request(module_name);
152 if (ret)
153 return ret;
154
155 /* If modprobe needs a service that is in a module, we get a recursive
156 * loop. Limit the number of running kmod threads to max_threads/2 or
157 * MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method
158 * would be to run the parents of this process, counting how many times
159 * kmod was invoked. That would mean accessing the internals of the
160 * process tables to get the command line, proc_pid_cmdline is static
161 * and it is not worth changing the proc code just to handle this case.
162 * KAO.
163 *
164 * "trace the ppid" is simple, but will fail if someone's
165 * parent exits. I think this is as good as it gets. --RR
166 */
167 max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
168 atomic_inc(&kmod_concurrent);
169 if (atomic_read(&kmod_concurrent) > max_modprobes) {
170 /* We may be blaming an innocent here, but unlikely */
171 if (kmod_loop_msg < 5) {
172 printk(KERN_ERR
173 "request_module: runaway loop modprobe %s\n",
174 module_name);
175 kmod_loop_msg++;
176 }
177 atomic_dec(&kmod_concurrent);
178 return -ENOMEM;
179 }
180
181 trace_module_request(module_name, wait, _RET_IP_);
182
183 ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
184
185 atomic_dec(&kmod_concurrent);
186 return ret;
187 }
188 EXPORT_SYMBOL(__request_module);
在146行通过vsnprintf来格式化字符串,例如本例就是rtc-ds1685,注意ko 名字的长度不能超过MODULE_NAME_LEN
151 kernel 做security检查,在android系统总就对应的selinux
167~179行计算当前有多少个ko是kernel 主动要求insmod的,不能超过thread 个数的一般。
最重要的是183行调用call_modprobe来让user space insmod这个ko
69 static int call_modprobe(char *module_name, int wait)
70 {
71 struct subprocess_info *info;
72 static char *envp[] = {
73 "HOME=/",
74 "TERM=linux",
75 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
76 NULL
77 };
78
79 char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL);
80 if (!argv)
81 goto out;
82
83 module_name = kstrdup(module_name, GFP_KERNEL);
84 if (!module_name)
85 goto free_argv;
86
87 argv[0] = modprobe_path;
88 argv[1] = "-q";
89 argv[2] = "--";
90 argv[3] = module_name; /* check free_modprobe_argv() */
91 argv[4] = NULL;
92
93 info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
94 NULL, free_modprobe_argv, NULL);
95 if (!info)
96 goto free_module_name;
97
98 return call_usermodehelper_exec(info, wait | UMH_KILLABLE);
99
100 free_module_name:
101 kfree(module_name);
102 free_argv:
103 kfree(argv);
104 out:
105 return -ENOMEM;
106 }
87~91建立env注意其中的 char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
93行调用call_usermodehelper_setup 来初始化一个sub_info->work,如530行所示
struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
520 char **envp, gfp_t gfp_mask,
521 int (*init)(struct subprocess_info *info, struct cred *new),
522 void (*cleanup)(struct subprocess_info *info),
523 void *data)
524 {
525 struct subprocess_info *sub_info;
526 sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
527 if (!sub_info)
528 goto out;
529
530 INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
531 sub_info->path = path;
532 sub_info->argv = argv;
533 sub_info->envp = envp;
534
535 sub_info->cleanup = cleanup;
536 sub_info->init = init;
537 sub_info->data = data;
538 out:
539 return sub_info;
540 }
541 EXPORT_SYMBOL(call_usermodehelper_setup);
98调用 call_usermodehelper_exec 来让user space 通过modprobe这个命令来insmod这个ko。
我们这个例子中user space要执行的命令为 modprobe rtc-ds1685.ko
我们看call_usermodehelper_exec 主要就是通过system_unbound_wq执行call_usermodehelper_exec
中建立的work。及call_usermodehelper_exec_work
555 int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
556 {
557 DECLARE_COMPLETION_ONSTACK(done);
558 int retval = 0;
559
560 if (!sub_info->path) {
561 call_usermodehelper_freeinfo(sub_info);
562 return -EINVAL;
563 }
564 helper_lock();
565 if (usermodehelper_disabled) {
566 retval = -EBUSY;
567 goto out;
568 }
569 /*
570 * Set the completion pointer only if there is a waiter.
571 * This makes it possible to use umh_complete to free
572 * the data structure in case of UMH_NO_WAIT.
573 */
574 sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
575 sub_info->wait = wait;
576
577 queue_work(system_unbound_wq, &sub_info->work);
}
所以我们重点看看call_usermodehelper_exec_work的实现
321 static void call_usermodehelper_exec_work(struct work_struct *work)
322 {
323 struct subprocess_info *sub_info =
324 container_of(work, struct subprocess_info, work);
325
326 if (sub_info->wait & UMH_WAIT_PROC) {
327 call_usermodehelper_exec_sync(sub_info);
328 } else {
329 pid_t pid;
330 /*
331 * Use CLONE_PARENT to reparent it to kthreadd; we do not
332 * want to pollute current->children, and we need a parent
333 * that always ignores SIGCHLD to ensure auto-reaping.
334 */
335 pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
336 CLONE_PARENT | SIGCHLD);
337 if (pid < 0) {
338 sub_info->retval = pid;
339 umh_complete(sub_info);
340 }
341 }
342 }
这个函数分两种case,最终都会调用call_usermodehelper_exec_async
269 static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
270 {
271 pid_t pid;
272
273 /* If SIGCLD is ignored sys_wait4 won't populate the status. */
274 kernel_sigaction(SIGCHLD, SIG_DFL);
275 pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
276 if (pid < 0) {
277 sub_info->retval = pid;
278 } else {
279 int ret = -ECHILD;
280 /*
281 * Normally it is bogus to call wait4() from in-kernel because
282 * wait4() wants to write the exit code to a userspace address.
283 * But call_usermodehelper_exec_sync() always runs as kernel
284 * thread (workqueue) and put_user() to a kernel address works
285 * OK for kernel threads, due to their having an mm_segment_t
286 * which spans the entire address space.
287 *
288 * Thus the __user pointer cast is valid here.
289 */
290 sys_wait4(pid, (int __user *)&ret, 0, NULL);
291
292 /*
293 * If ret is 0, either call_usermodehelper_exec_async failed and
294 * the real error code is already in sub_info->retval or
295 * sub_info->retval is 0 anyway, so don't mess with it then.
296 */
297 if (ret)
298 sub_info->retval = ret;
299 }
300
301 /* Restore default kernel sig handler */
302 kernel_sigaction(SIGCHLD, SIG_IGN);
303
304 umh_complete(sub_info);
305 }
主要是建立一个 call_usermodehelper_exec_async thread
215 static int call_usermodehelper_exec_async(void *data)
216 {
217 struct subprocess_info *sub_info = data;
218 struct cred *new;
219 int retval;
220
221 spin_lock_irq(¤t->sighand->siglock);
222 flush_signal_handlers(current, 1);
223 spin_unlock_irq(¤t->sighand->siglock);
224
225 /*
226 * Our parent (unbound workqueue) runs with elevated scheduling
227 * priority. Avoid propagating that into the userspace child.
228 */
229 set_user_nice(current, 0);
230
231 retval = -ENOMEM;
232 new = prepare_kernel_cred(current);
233 if (!new)
234 goto out;
235
236 spin_lock(&umh_sysctl_lock);
237 new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset);
238 new->cap_inheritable = cap_intersect(usermodehelper_inheritable,
239 new->cap_inheritable);
240 spin_unlock(&umh_sysctl_lock);
241
242 if (sub_info->init) {
243 retval = sub_info->init(sub_info, new);
244 if (retval) {
245 abort_creds(new);
246 goto out;
247 }
248 }
249
250 commit_creds(new);
251
252 retval = do_execve(getname_kernel(sub_info->path),
253 (const char __user *const __user *)sub_info->argv,
254 (const char __user *const __user *)sub_info->envp);
255 out:
256 sub_info->retval = retval;
257 /*
258 * call_usermodehelper_exec_sync() will call umh_complete
259 * if UHM_WAIT_PROC.
260 */
261 if (!(sub_info->wait & UMH_WAIT_PROC))
262 umh_complete(sub_info);
263 if (!retval)
264 return 0;
265 do_exit(0);
266 }
看到这里就明白了,原来也是通过do_execve来让user space执行insmod命令.