reboot 涉及到了
console : 读取"reboot"字符串
busybox : busybox app 架构
busybox : reboot进程与 init 进程的交互
glibc : glibc中 reboot系统调用的实现
glibc : kill实现
glibc : reboot函数实现
armv6架构:swi异常
linux : swi异常的处理方法
从reboot 的执行流程来看 reboot过程
1./bin/bash shell下键入 reboot,console对字符的处理
# cat /proc/cmdline
root=/dev/nfs rw nfsroot=10.10.11.59:/home/suws/ok6410/system-new/buildroot/output/images/rootfs console=tty0 console=ttySAC0,115200 init=/linuxrc ip=10.10.11.120
# echo $SHELL
/bin/bash
# ls -l /bin/bash
-rwxr-xr-x 1 root root 640080 Apr 20 2021 /bin/bash
2. 回车按键后,bash 对 reboot 字符串的处理,并fork子进程,填充reboot命令到子进程,并执行
# which reboot
/sbin/reboot
# ls -l /sbin/reboot
lrwxrwxrwx 1 root root 14 Apr 15 2021 /sbin/reboot -> ../bin/busybox
3.reboot开始执行,并发信号给init进程(linuxrc)
reboot 进程
main // libbb/appletlib.c int main(int argc UNUSED_PARAM, char **argv)
run_applet_and_exit(applet_name, argv);
int applet = find_applet_by_name(name);
run_applet_no_and_exit(applet, name, argv);
xfunc_error_retval = applet_main[applet_no](argc, argv); // 调用 halt_main
xfunc_die
exit(xfunc_error_retval);
/*
int (*const applet_main[])(int argc, char **argv) = { // (include/applet_tables.h)
halt_main
*/
halt_main(init/halt.c)
pid_t *pidlist = find_pid_by_name("linuxrc");
rc = kill(pidlist[0], signals[which]);
return rc;
-----------------------------------------
// kill 在 glibc 中实现,busybox 中只是做了引用
/* Send signal SIG to process number PID. If PID is zero,
send SIG to all processes in the current process's process group.
If PID is < -1, send SIG to all processes in process group - PID. */
#ifdef __USE_POSIX
extern int kill (__pid_t __pid, int __sig) __THROW;
#endif /* Use POSIX. */
4. linuxrc 接收到信号,并开始 做 重启
check_delayed_sigs
sigtimedwait // 阻塞在这里, 被 reboot 进程 kill 发信号后返回
halt_reboot_pwoff
run_shutdown_and_kill_processes
run_actions(SHUTDOWN);
Stopping Xorg: OK
...
kill(-1, SIGTERM);
kill(-1, SIGKILL);
pause_and_low_level_reboot(rb);
pid = vfork()
if (pid == 0) reboot(magic); // son
waitpid(pid, NULL, 0); //father
-----------------------------------------
vfork reboot waitpid 在 glibc 中实现,在busybox中被引用
vfork reboot waitpid 这些实现被称为 系统调用 .
系统调用一般表现形式为 函数, 而这些函数的实现只是通过内嵌汇编的形式封装了 一些 系统调用相关的 指令(例如arm的swi指令,x86的int指令)
每一个系统调用 都对应一个 syscall number,即对应 一个内核的 sys_xxx
arm的swi指令并不是系统调用,而是被用作实现系统调用的指令.
output/host/arm-buildroot-linux-gnueabi/sysroot/usr/include/sys/reboot.h
__BEGIN_DECLS
/* Reboot or halt the system. */
extern int reboot (int __howto) __THROW;
__END_DECLS
5. glibc 中的reboot
根据 rootfs中的动态链接库来源 可以看到 glibc 来自于 交叉编译工具链,且glibc版本为 glibc-2.18
我们探究一下 glibc-2.18 中 reboot函数的定义以及实现
glibc-2.18/sysdeps/unix/sysv/linux/reboot.c
24 /* Call kernel with additional two arguments the syscall requires. */
25 int
26 reboot (int howto)
27 {
28 return INLINE_SYSCALL (reboot, 3, (int) 0xfee1dead, 672274793, howto);
29 }
glibc-2.18/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
320 /* Define a macro which expands into the inline wrapper code for a system
321 call. */
322 #undef INLINE_SYSCALL
323 #define INLINE_SYSCALL(name, nr, args...) \
324 ({ unsigned int _sys_result = INTERNAL_SYSCALL (name, , nr, args); \
325 if (__builtin_expect (INTERNAL_SYSCALL_ERROR_P (_sys_result, ), 0)) \
326 { \
327 __set_errno (INTERNAL_SYSCALL_ERRNO (_sys_result, )); \
328 _sys_result = (unsigned int) -1; \
329 } \
330 (int) _sys_result; })
glibc-2.18/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
376 #undef INTERNAL_SYSCALL
377 #define INTERNAL_SYSCALL(name, err, nr, args...) \
378 INTERNAL_SYSCALL_RAW(SYS_ify(name), err, nr, args)
glibc-2.18/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
363 # undef INTERNAL_SYSCALL_RAW
364 # define INTERNAL_SYSCALL_RAW(name, err, nr, args...) \
365 ({ \
366 register int _a1 asm ("r0"), _nr asm ("r7"); \
367 LOAD_ARGS_##nr (args) \
368 _nr = name; \
369 asm volatile ("swi 0x0 @ syscall " #name \
370 : "=r" (_a1) \
371 : "r" (_nr) ASM_ARGS_##nr \
372 : "memory"); \
373 _a1; })
// 这里的处理手段,将 syscall number 放到了 寄存器r7 中 , 符合 EABI 标准
// OABI(已经被废弃了) 是 将 syscall number 放到了 swi 指令中
// linux-5.11 肯定要 遵循 EABI 标准 去 r7 找 syscall number
// 这里 r0 充当输出变量
// 输入变量 存储 从 r0-r6
// syscall number 放在了 r7
glibc-2.18/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
40 #undef SYS_ify
41 #define SYS_ify(syscall_name) (__NR_##syscall_name)
// 在 linux代码中的 arch/arm/include/uapi/asm/unistd.h -> arch/arm/include/generated/uapi/asm/unistd-common.h 中定义
// #define __NR_reboot (__NR_SYSCALL_BASE + 88)
glibc-2.18/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
#define LOAD_ARGS_0()
#define ASM_ARGS_0
#define LOAD_ARGS_1(a1) \
int _a1tmp = (int) (a1); \
LOAD_ARGS_0 () \
_a1 = _a1tmp; // _a1 为 r0 寄存器变量
#define ASM_ARGS_1 ASM_ARGS_0, "r" (_a1)
#define LOAD_ARGS_2(a1, a2) \
int _a2tmp = (int) (a2); \
LOAD_ARGS_1 (a1) \
register int _a2 asm ("a2") = _a2tmp; // 这里的 "a2" 是什么意思 ? arm32 没有a2寄存器 , TODO
#define ASM_ARGS_2 ASM_ARGS_1, "r" (_a2)
#define LOAD_ARGS_3(a1, a2, a3) \
int _a3tmp = (int) (a3); \
LOAD_ARGS_2 (a1, a2) \
register int _a3 asm ("a3") = _a3tmp;
#define ASM_ARGS_3 ASM_ARGS_2, "r" (_a3)
#define LOAD_ARGS_4(a1, a2, a3, a4) \
int _a4tmp = (int) (a4); \
LOAD_ARGS_3 (a1, a2, a3) \
register int _a4 asm ("a4") = _a4tmp;
#define ASM_ARGS_4 ASM_ARGS_3, "r" (_a4)
#define LOAD_ARGS_5(a1, a2, a3, a4, a5) \
int _v1tmp = (int) (a5); \
LOAD_ARGS_4 (a1, a2, a3, a4) \
register int _v1 asm ("v1") = _v1tmp;
#define ASM_ARGS_5 ASM_ARGS_4, "r" (_v1)
#define LOAD_ARGS_6(a1, a2, a3, a4, a5, a6) \
int _v2tmp = (int) (a6); \
LOAD_ARGS_5 (a1, a2, a3, a4, a5) \
register int _v2 asm ("v2") = _v2tmp;
#define ASM_ARGS_6 ASM_ARGS_5, "r" (_v2)
#ifndef __thumb__
# define LOAD_ARGS_7(a1, a2, a3, a4, a5, a6, a7) \
int _v3tmp = (int) (a7); \
LOAD_ARGS_6 (a1, a2, a3, a4, a5, a6) \
register int _v3 asm ("v3") = _v3tmp;
# define ASM_ARGS_7 ASM_ARGS_6, "r" (_v3)
#endif
6. swi指令产生后硬件的动作
A2.6.4 Software Interrupt exception
The Software Interrupt instruction ( SWI ) enters Supervisor mode to request a particular supervisor (operating system) function.
When a SWI is executed, the following actions are performed:
R14_svc = address of next instruction after the SWI instruction
SPSR_svc = CPSR
CPSR[4:0] = 0b10011 /* Enter Supervisor mode */
CPSR[5] = 0 /* Execute in ARM state */
/* CPSR[6] is unchanged */
CPSR[7] = 1 /* Disable normal interrupts */
/* CPSR[8] is unchanged */
CPSR[9] = CP15_reg1_EEbit /* Endianness on exception entry */
if high vectors configured then
PC = 0xFFFF0008
else
PC = 0x00000008
To return after performing the SWI operation,
use the following instruction to restore the PC (from R14_svc) and CPSR (from SPSR_svc)
and return to the instruction following the SWI:
MOVS PC,R14
7. linux-5.11中 对 swi异常的处理
- linux-5.11 ok6410a 是怎么放置 异常向量表的
放到了哪里? 放置的代码在哪里?
high vectors 有没有配置
异常向量表 是 一段地址
0x00000000 - 0x0000001C
或者
0xFFFF0000 - 0xFFFF001C
地址中的内容 是 一般为 跳转指令
arch/arm/kernel/entry-armv.S
.section .vectors, "ax", %progbits
1182 .section .vectors, "ax", %progbits
1183 .L__vectors_start:
1184 W(b) vector_rst
1185 W(b) vector_und
1186 W(ldr) pc, .L__vectors_start + 0x1000
1187 W(b) vector_pabt
1188 W(b) vector_dabt
1189 W(b) vector_addrexcptn
1190 W(b) vector_irq
1191 W(b) vector_fiq
arch/arm/kernel/vmlinux.lds
__init_begin = .;
__vectors_start = .; .vectors 0xffff0000 : AT(__vectors_start) { *(.vectors) } . = __vectors_start + SIZEOF(.vectors); __vectors_end = .; __stubs_start = .; .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) { *(.stubs) } . = __stubs_start + SIZEOF(.stubs); __stubs_end = .; PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
arch/arm/kernel/entry-armv.S
1042 .section .stubs, "ax", %progbits
1043 @ This must be the first word
1044 .word vector_swi
// .stubs段的起始位置存放的就是swi的 异常处理程序入口地址
// .stubs段中的其他部分 是 其他 各种异常处理程序 实现 vector_xxx
// armv6 异常 一共有 7 种
arch/arm/kernel/entry-common.S
163 /*=============================================================================
164 * SWI handler
165 *-----------------------------------------------------------------------------
166 */
167
168 .align 5
169 ENTRY(vector_swi)
170 #ifdef CONFIG_CPU_V7M // 不进入
171 v7m_exception_entry
172 #else
173 sub sp, sp, #PT_REGS_SIZE
174 stmia sp, {r0 - r12} @ Calling r0 - r12
...
...
...
260 invoke_syscall tbl, scno, r10, __ret_fast_syscall // __ret_fast_syscall 为返回地址
261
262 add r1, sp, #S_OFF
263 2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
264 eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back
265 bcs arm_syscall
266 mov why, #0 @ no longer a real syscall
267 b sys_ni_syscall @ not private func
...
...
...
284 ENDPROC(vector_swi)
arch/arm/kernel/entry-header.S
379 .macro invoke_syscall, table, nr, tmp, ret, reload=0
380 #ifdef CONFIG_CPU_SPECTRE
381 mov \tmp, \nr
382 cmp \tmp, #NR_syscalls @ check upper syscall limit
383 movcs \tmp, #0
384 csdb
385 badr lr, \ret @ return address // 设置返回地址
386 .if \reload
387 add r1, sp, #S_R0 + S_OFF @ pointer to regs
388 ldmiacc r1, {r0 - r6} @ reload r0-r6
389 stmiacc sp, {r4, r5} @ update stack arguments
390 .endif
391 ldrcc pc, [\table, \tmp, lsl #2] @ call sys_* routine // 调用了 sys_xxx
392 #else
...
402 .endm
arch/arm/kernel/entry-common.S
47 ret_fast_syscall: // 在内核进程上下文打印stack 的时候 ret_fast_syscall 是 第一个 函数,下面就是SyS_xxx
48 __ret_fast_syscall:
49 UNWIND(.fnstart )
50 UNWIND(.cantunwind )
51 disable_irq_notrace @ disable interrupts
52 ldr r2, [tsk, #TI_ADDR_LIMIT]
53 ldr r1, =TASK_SIZE
54 cmp r2, r1
55 blne addr_limit_check_failed
56 ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
57 movs r1, r1, lsl #16
58 bne fast_work_pending
59
60
61 /* perform architecture specific actions before user return */
62 arch_ret_to_user r1, lr // 为空
63
64 restore_user_regs fast = 1, offset = S_OFF
65 UNWIND(.fnend )
66 ENDPROC(ret_fast_syscall)
arch/arm/kernel/entry-header.S
294 .macro restore_user_regs, fast = 0, offset = 0
295 uaccess_enable r1, isb=0
...
...
...
345 add sp, sp, #PT_REGS_SIZE - S_SP
346 movs pc, lr @ return & move spsr_svc into cpsr // 返回用户空间前的最后一句特权级别指令
347 1: bug "Returning to usermode but unexpected PSR bits set?", \@
348 #endif /* !CONFIG_THUMB2_KERNEL */
349 .endm
- SYSCALL_DEFINE4(reboot 的展开 sys_reboot
kernel/reboot.c:312:SYSCALL_DEFINE4(reboot
展开 为
# 312 "kernel/reboot.c"
; ; long sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg) __attribute__((alias("__se_sys_reboot"))); ;
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((__no_instrument_function__)) long __do_sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg);
long __se_sys_reboot(
__typeof(__builtin_choose_expr((__builtin_types_compatible_p(typeof(( int)0), typeof(0LL)) || __builtin_types_compatible_p(typeof(( int)0), typeof(0ULL))), 0LL, 0L))
magic1, __typeof(__builtin_choose_expr((__builtin_types_compatible_p(typeof(( int)0), typeof(0LL)) || __builtin_types_compatible_p(typeof(( int)0), typeof(0ULL))), 0LL, 0L))
magic2, __typeof(__builtin_choose_expr((__builtin_types_compatible_p(typeof(( unsigned int)0), typeof(0LL)) || __builtin_types_compatible_p(typeof(( unsigned int)0), typeof(0ULL))), 0LL, 0L))
cmd, __typeof(__builtin_choose_expr((__builtin_types_compatible_p(typeof(( void *)0), typeof(0LL)) || __builtin_types_compatible_p(typeof(( void *)0), typeof(0ULL))), 0LL, 0L))
arg);
long __se_sys_reboot(
__typeof(__builtin_choose_expr((__builtin_types_compatible_p(typeof(( int)0), typeof(0LL)) || __builtin_types_compatible_p(typeof(( int)0), typeof(0ULL))), 0LL, 0L))
magic1, __typeof(__builtin_choose_expr((__builtin_types_compatible_p(typeof(( int)0), typeof(0LL)) || __builtin_types_compatible_p(typeof(( int)0), typeof(0ULL))), 0LL, 0L))
magic2, __typeof(__builtin_choose_expr((__builtin_types_compatible_p(typeof(( unsigned int)0), typeof(0LL)) || __builtin_types_compatible_p(typeof(( unsigned int)0), typeof(0ULL))), 0LL, 0L))
cmd, __typeof(__builtin_choose_expr((__builtin_types_compatible_p(typeof(( void *)0), typeof(0LL)) || __builtin_types_compatible_p(typeof(( void *)0), typeof(0ULL))), 0LL, 0L))
arg)
{
long ret = __do_sys_reboot(( int) magic1, ( int) magic2, ( unsigned int) cmd, ( void *) arg);
(void)((int)(sizeof(struct { int:(-!!(!(__builtin_types_compatible_p(typeof(( int)0), typeof(0LL)) || __builtin_types_compatible_p(typeof(( int)0), typeof(0ULL))) && sizeof(int) > sizeof(long))); }))),
(void)((int)(sizeof(struct { int:(-!!(!(__builtin_types_compatible_p(typeof(( int)0), typeof(0LL)) || __builtin_types_compatible_p(typeof(( int)0), typeof(0ULL))) && sizeof(int) > sizeof(long))); }))),
(void)((int)(sizeof(struct { int:(-!!(!(__builtin_types_compatible_p(typeof(( unsigned int)0), typeof(0LL)) || __builtin_types_compatible_p(typeof(( unsigned int)0), typeof(0ULL))) && sizeof(unsigned int) > sizeof(long))); }))),
(void)((int)(sizeof(struct { int:(-!!(!(__builtin_types_compatible_p(typeof(( void *)0), typeof(0LL)) || __builtin_types_compatible_p(typeof(( void *)0), typeof(0ULL))) && sizeof(void *) > sizeof(long))); })));
do { } while (0);
return ret;
}
#pragma GCC diagnostic pop
# 312 "kernel/reboot.c"
; static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((__no_instrument_function__)) long __do_sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg)
{
struct pid_namespace *pid_ns = task_active_pid_ns((current_thread_info()->task));
char buffer[256];
int ret = 0;
...
...
...
}
- sys_reboot的实现
sys_reboot/__se_sys_reboot kernel/reboot.c
__do_sys_reboot kernel/reboot.c
kernel_restart kernel/reboot.c
machine_restart arch/arm/kernel/reboot.c
do_kernel_restart kernel/reboot.c
atomic_notifier_call_chain(&restart_handler_list, reboot_mode, cmd); kernel/notifier.c
watchdog_restart_notifier drivers/watchdog/watchdog_core.c
wdd->ops->restart(wdd, action, data); // 即 s3c2410wdt_restart
s3c2410wdt_restart drivers/watchdog/s3c2410_wdt.c
353 struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
354 void __iomem *wdt_base = wdt->reg_base;
355
356 /* disable watchdog, to be safe */
357 writel(0, wdt_base + S3C2410_WTCON);
358
359 /* put initial values into count and data */
360 writel(0x80, wdt_base + S3C2410_WTCNT);
361 writel(0x80, wdt_base + S3C2410_WTDAT);
362
363 /* set the watchdog to go and reset... */
364 writel(S3C2410_WTCON_ENABLE | S3C2410_WTCON_DIV16 | S3C2410_WTCON_RSTEN | S3C2410_WTCON_PRESCALE(0x20), wdt_base +
S3C2410_WTCON);
365
366 /* wait for reset to assert... */
367 mdelay(500);
看起来 linux-5.11 ok6410a 是 是用 看门狗 来实现 reboot 的
然而 u-boot-2021.01 ok6410a 是利用 do_reset -> reset_cpu -> SW_RST_REG = 0x6410; 来实现reboot的
#define SW_RST_REG __REG(ELFIN_CLOCK_POWER_BASE+SW_RST_OFFSET)
#define ELFIN_CLOCK_POWER_BASE 0x7e00f000
#define SW_RST_OFFSET 0x114
但是这个 寄存器 在 s3c6410 数据手册P139中查到 是 RESERVED 的,并没有 reboot 的作用
参考 vector_swi -> ret_fast_syscall -> restore_user_regs -> movs pc, lr
放回到用户空间执行的第一句指令 为
if (__builtin_expect (INTERNAL_SYSCALL_ERROR_P (_sys_result, ), 0)) // 全局搜索