先看如何增加系统调用
diff --git a/kernel4.4/arch/arm/include/uapi/asm/unistd.h b/kernel4.4/arch/arm/include/uapi/asm/unistd.h
index ede692f..081ab28 100644
--- a/kernel4.4/arch/arm/include/uapi/asm/unistd.h
+++ b/kernel4.4/arch/arm/include/uapi/asm/unistd.h
@@ -417,6 +417,7 @@
#define __NR_userfaultfd (__NR_SYSCALL_BASE+388)
#define __NR_membarrier (__NR_SYSCALL_BASE+389)
#define __NR_mlock2 (__NR_SYSCALL_BASE+390)
+#define __NR_dump (__NR_SYSCALL_BASE+391)
/*
* The following SWIs are ARM private.
diff --git a/kernel4.4/arch/arm/kernel/Makefile b/kernel4.4/arch/arm/kernel/Makefile
index 28b4e38..9b768ca 100644
--- a/kernel4.4/arch/arm/kernel/Makefile
+++ b/kernel4.4/arch/arm/kernel/Makefile
@@ -18,7 +18,7 @@ CFLAGS_REMOVE_return_address.o = -pg
obj-y := elf.o entry-common.o irq.o opcodes.o \
process.o ptrace.o reboot.o return_address.o \
setup.o signal.o sigreturn_codes.o \
- stacktrace.o sys_arm.o time.o traps.o
+ stacktrace.o sys_arm.o time.o traps.o mysyscall.o
obj-$(CONFIG_ATAGS) += atags_parse.o
obj-$(CONFIG_ATAGS_PROC) += atags_proc.o
diff --git a/kernel4.4/arch/arm/kernel/calls.S b/kernel4.4/arch/arm/kernel/calls.S
index ac368bb..dc1bf8a 100644
--- a/kernel4.4/arch/arm/kernel/calls.S
+++ b/kernel4.4/arch/arm/kernel/calls.S
@@ -400,6 +400,7 @@
CALL(sys_userfaultfd)
CALL(sys_membarrier)
CALL(sys_mlock2)
+ CALL(sys_dump)
#ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted
diff --git a/kernel4.4/include/linux/syscalls.h b/kernel4.4/include/linux/syscalls.h
index c2b66a2..02ee903 100644
--- a/kernel4.4/include/linux/syscalls.h
+++ b/kernel4.4/include/linux/syscalls.h
@@ -888,5 +888,5 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename,
asmlinkage long sys_membarrier(int cmd, int flags);
asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
-
+asmlinkage long sys_dump(void);
#endif
diff --git a/kernel4.4/kernel/sys_ni.c b/kernel4.4/kernel/sys_ni.c
index 0623787..44d04a7 100644
--- a/kernel4.4/kernel/sys_ni.c
+++ b/kernel4.4/kernel/sys_ni.c
@@ -249,3 +249,4 @@ cond_syscall(sys_execveat);
/* membarrier */
cond_syscall(sys_membarrier);
+cond_syscall(sys_dump);
kernel4.4/arch/arm/kernel/mysyscall.c
#include <linux/syscalls.h>
#include <asm-generic/bug.h>
SYSCALL_DEFINE0(dump)
{
long ret=0;
WARN(1,"my syscall");
return ret;
}
应用层的测试程序syscall.c
#include <errno.h>
#include <string.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/ioctl.h>
#define sys_dump() __asm__ __volatile__ ("mov r7,#391\n\t" "swi 0\n\t" )
int main()
{
int ret;
ret=syscall(391);
sys_dump();
printf("sys_dump\n");
return ret;
}
sys_dump或者syscall两种方式都可以定义
Android.mk
LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE_TAGS := eng
LOCAL_SRC_FILES:=syscall.c
LOCAL_MODULE:=syscall
LOCAL_CPPFLAGS += -DANDROID
LOCAL_SHARED_LIBRARIES:=libc
LOCAL_STATIC_LIBRARIES :=
include $(BUILD_EXECUTABLE)
http://androidxref.com/9.0.0_r3/xref/bionic/libc/arch-arm/bionic/syscall.S#31
ENTRY(syscall)
mov ip, sp
stmfd sp!, {r4, r5, r6, r7}
.cfi_def_cfa_offset 16
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
.cfi_rel_offset r6, 8
.cfi_rel_offset r7, 12
mov r7, r0
mov r0, r1
mov r1, r2
mov r2, r3
ldmfd ip, {r3, r4, r5, r6}
swi #0
ldmfd sp!, {r4, r5, r6, r7}
.cfi_def_cfa_offset 0
cmn r0, #(MAX_ERRNO + 1)
bxls lr
neg r0, r0
b __set_errno_internal
END(syscall)
kernel4.4/arch/arm/include/asm/unistd.h
#define __NR_syscalls (392)
同时这个值按需要进行修改
arch/arm/kernel/entry-common.S
.equ NR_syscalls,0
#define CALL(x) .equ NR_syscalls,NR_syscalls+1
#include "calls.S"
/*
* Ensure that the system call table is equal to __NR_syscalls,
* which is the value the rest of the system sees
*/
.ifne NR_syscalls - __NR_syscalls
.error "__NR_syscalls is not equal to the size of the syscall table"
.endif
.equ NR_syscall,0 相当与NR_syscall=0
#define CALL(x) .equ NR_syscalls,NR_syscalls+1每一个syscall定义,变量NR_syscall自加1
kernel4.4/arch/arm/kernel/calls.S
/* 385 */ CALL(sys_memfd_create)
CALL(sys_bpf)
CALL(sys_execveat)
CALL(sys_userfaultfd)
CALL(sys_membarrier)
CALL(sys_mlock2)
CALL(sys_dump)
#ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted
#endif
.rept syscalls_padding
CALL(sys_ni_syscall)
.endr
.rept重复次数
添加CALL(sys_ni_syscall)
比如CALL(sys_mlock2)已经是390,NR_syscalls就是391,((NR_syscalls + 3) & ~3) - NR_syscalls=0
添加CALL(sys_dump),NR_syscalls就是392,((NR_syscalls + 3) & ~3) - NR_syscalls=0
假如再添加一个CALL(sys_dump1),NR_syscalls就是393,((NR_syscalls + 3) & ~3) - NR_syscalls=3,__NR_syscalls就需要定义为396
看下系统调用过程
CONFIG_CPU_V7M/CONFIG_OABI_COMPAT/CONFIG_ARM_THUMB都是没定义的,只定义CONFIG_AEABI
#undef CALL
#define CALL(x) .long x
/*=============================================================================
* SWI handler
*-----------------------------------------------------------------------------
*/
ENTRY(vector_swi)
#ifdef CONFIG_CPU_V7M
v7m_exception_entry
#else
sub sp, sp, #S_FRAME_SIZE
stmia sp, {r0 - r12} @ Calling r0 - r12
ARM( add r8, sp, #S_PC )
ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr
THUMB( mov r8, sp )
THUMB( store_user_sp_lr r8, r10, S_SP ) @ calling sp, lr
mrs r8, spsr @ called from non-FIQ mode, so ok.
str lr, [sp, #S_PC] @ Save calling PC
str r8, [sp, #S_PSR] @ Save CPSR
str r0, [sp, #S_OLD_R0] @ Save OLD_R0
#endif
zero_fp
alignment_trap r10, ip, __cr_alignment
enable_irq
ct_user_exit
get_thread_info tsk
/*
* Get the system call number.
*/
#if defined(CONFIG_OABI_COMPAT)
/*
* If we have CONFIG_OABI_COMPAT then we need to look at the swi
* value to determine if it is an EABI or an old ABI call.
*/
#ifdef CONFIG_ARM_THUMB
tst r8, #PSR_T_BIT
movne r10, #0 @ no thumb OABI emulation
USER( ldreq r10, [lr, #-4] ) @ get SWI instruction
#else
USER( ldr r10, [lr, #-4] ) @ get SWI instruction
#endif
ARM_BE8(rev r10, r10) @ little endian instruction
#elif defined(CONFIG_AEABI)
/*
* Pure EABI user space always put syscall number into scno (r7).
*/
#elif defined(CONFIG_ARM_THUMB)
/* Legacy ABI only, possibly thumb mode. */
tst r8, #PSR_T_BIT @ this is SPSR from save_user_regs
addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in
USER( ldreq scno, [lr, #-4] )
#else
/* Legacy ABI only. */
USER( ldr scno, [lr, #-4] ) @ get SWI instruction
#endif
uaccess_disable tbl
adr tbl, sys_call_table @ load syscall table pointer
#if defined(CONFIG_OABI_COMPAT)
/*
* If the swi argument is zero, this is an EABI call and we do nothing.
*
* If this is an old ABI call, get the syscall number into scno and
* get the old ABI syscall table address.
*/
bics r10, r10, #0xff000000
eorne scno, r10, #__NR_OABI_SYSCALL_BASE
ldrne tbl, =sys_oabi_call_table
#elif !defined(CONFIG_AEABI)
bic scno, scno, #0xff000000 @ mask off SWI op-code
eor scno, scno, #__NR_SYSCALL_BASE @ check OS number
#endif
local_restart:
ldr r10, [tsk, #TI_FLAGS] @ check for syscall tracing
stmdb sp!, {r4, r5} @ push fifth and sixth args
tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls?
bne __sys_trace
cmp scno, #NR_syscalls @ check upper syscall limit
badr lr, ret_fast_syscall @ return address
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
add r1, sp, #S_OFF
2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back
bcs arm_syscall
mov why, #0 @ no longer a real syscall
b sys_ni_syscall @ not private func
#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
/*
* We failed to handle a fault trying to access the page
* containing the swi instruction, but we're not really in a
* position to return -EFAULT. Instead, return back to the
* instruction and re-enter the user fault handling path trying
* to page it in. This will likely result in sending SEGV to the
* current task.
*/
9001:
sub lr, lr, #4
str lr, [sp, #S_PC]
b ret_fast_syscall
#endif
ENDPROC(vector_swi)
.type sys_call_table, #object
ENTRY(sys_call_table)
#include "calls.S"
#undef ABI
#undef OBSOLETE
adr tbl, sys_call_table @ load syscall table pointer
badr lr, ret_fast_syscall @ return address
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine调到系统调用
系统调用号存于r7,而r7等同于scno
arch/arm/kernel/entry-header.S
/*
* These are the registers used in the syscall handler, and allow us to
* have in theory up to 7 arguments to a function - r0 to r6.
*
* r7 is reserved for the system call number for thumb mode.
*
* Note that tbl == why is intentional.
*
* We must set at least "tsk" and "why" when calling ret_with_reschedule.
*/
scno .req r7 @ syscall number
tbl .req r8 @ syscall table pointer
why .req r8 @ Linux syscall (!= 0)
tsk .req r9 @ current thread_info
kernel4.4/include/linux/syscalls.h
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
asmlinkage long sys_##sname(void)
include/linux/syscalls.h
#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
#define SYSCALL_DEFINEx(x, sname, ...) \
SYSCALL_METADATA(sname, x, __VA_ARGS__) \
__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
看下syscall返回函数ret_fast_syscall
/*
* This is the fast syscall return path. We do as little as possible here,
* such as avoiding writing r0 to the stack. We only use this path if we
* have tracing and context tracking disabled - the overheads from those
* features make this path too inefficient.
*/
ret_fast_syscall:
UNWIND(.fnstart )
UNWIND(.cantunwind )
disable_irq_notrace @ disable interrupts xxxxxxxxxxxxxxxxxxxxxxxxxxx
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
bne fast_work_pending
/* perform architecture specific actions before user return */
arch_ret_to_user r1, lr
restore_user_regs fast = 1, offset = S_OFF
UNWIND(.fnend )
ENDPROC(ret_fast_syscall)
/* Ok, we need to do extra processing, enter the slow path. */
fast_work_pending:
str r0, [sp, #S_R0+S_OFF]! @ returned r0
/* fall through to work_pending */
tst r1, #_TIF_SYSCALL_WORK
bne __sys_trace_return_nosave
slow_work_pending:
mov r0, sp @ 'regs'
mov r2, why @ 'syscall'
bl do_work_pending
cmp r0, #0
beq no_work_pending
movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
ldmia sp, {r0 - r6} @ have to reload r0 - r6
b local_restart @ ... and off we go
ENDPROC(ret_fast_syscall)
有标志,跳到fast_work_pending--->fast_work_pending
/* Checks for any syscall work in entry-common.S */
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
/*
* Change these and you break ASM code in entry-common.S
*/
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE)
asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
{
/*
* The assembly code enters us with IRQs off, but it hasn't
* informed the tracing code of that for efficiency reasons.
* Update the trace code with the current status.
*/
trace_hardirqs_off();
do {
if (likely(thread_flags & _TIF_NEED_RESCHED)) {
schedule();
} else {
if (unlikely(!user_mode(regs)))
return 0;
local_irq_enable();
if (thread_flags & _TIF_SIGPENDING) {
int restart = do_signal(regs, syscall);
if (unlikely(restart)) {
/*
* Restart without handlers.
* Deal with it without leaving
* the kernel space.
*/
return restart;
}
syscall = 0;
} else if (thread_flags & _TIF_UPROBE) {
uprobe_notify_resume(regs);
} else {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
}
local_irq_disable();
thread_flags = current_thread_info()->flags;
} while (thread_flags & _TIF_WORK_MASK);
return 0;
}
asmlinkage __visible void __sched schedule(void)
{
struct task_struct *tsk = current;
sched_submit_work(tsk);
do {
preempt_disable();
__schedule(false);
sched_preempt_enable_no_resched();
} while (need_resched());
}