ENTRY(entry_SYSCALL_64)
movq %rsp, PER_CPU_VAR(rsp_scratch) // 保存用户堆栈指针
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp // 获取tss的sp1段作为内核栈, 系统调用默认会将切换调用者到sp0, 但并不改变rsp指针
=># define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
/*
* On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
* the top of the kernel stack. Use an extra percpu variable to track the
* top of the kernel stack directly.
*/
DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
/* 讲用户寄存器压到tss段 */
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
GLOBAL(entry_SYSCALL_64_after_hwframe)
pushq %rax /* pt_regs->orig_ax */
PUSH_AND_CLEAR_REGS rax=$-ENOSYS
=>.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
/*
* Push registers and sanitize registers of values that a
* speculation attack might otherwise want to exploit. The
* lower registers are likely clobbered well before they
* could be put to use in a speculative execution gadget.
* Interleave XOR with PUSH for better uop scheduling:
*/
.if \save_ret
pushq %rsi /* pt_regs->si */
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
.else
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
.endif
pushq \rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
xorl %r8d, %r8d /* nospec r8 */
pushq %r9 /* pt_regs->r9 */
xorl %r9d, %r9d /* nospec r9 */
pushq %r10 /* pt_regs->r10 */
xorl %r10d, %r10d /* nospec r10 */
pushq %r11 /* pt_regs->r11 */
xorl %r11d, %r11d /* nospec r11*/
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx*/
pushq %rbp /* pt_regs->rbp */
xorl %ebp, %ebp /* nospec rbp*/
pushq %r12 /* pt_regs->r12 */
xorl %r12d, %r12d /* nospec r12*/
pushq %r13 /* pt_regs->r13 */
xorl %r13d, %r13d /* nospec r13*/
pushq %r14 /* pt_regs->r14 */
xorl %r14d, %r14d /* nospec r14*/
pushq %r15 /* pt_regs->r15 */
xorl %r15d, %r15d /* nospec r15*/
UNWIND_HINT_REGS
.if \save_ret
pushq %rsi /* return address on top of stack */
.endif
.endm
/* IRQs are off. */
movq %rsp, %rdi // rsp 作为内核栈, 也就是说 tss 是内核栈
call do_syscall_64 /* returns with IRQs disabled */
=>__visible void do_syscall_64(struct pt_regs *regs)
pt_regs的定义如下
struct pt_regs {
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long rbp;
unsigned long rbx;
/* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
unsigned long r8;
unsigned long rax;
unsigned long rcx;
unsigned long rdx;
unsigned long rsi;
unsigned long rdi;
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
unsigned long orig_rax;
/* Return frame for iretq */
unsigned long rip;
unsigned long cs;
unsigned long eflags;
unsigned long rsp;
unsigned long ss;
/* top of stack page */
};
POP_REGS pop_rdi=0 skip_r11rcx=1
=>.macro POP_REGS pop_rdi=1 skip_r11rcx=0
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
.if \skip_r11rcx
popq %rsi
.else
popq %r11
.endif
popq %r10
popq %r9
popq %r8
popq %rax
.if \skip_r11rcx
popq %rsi
.else
popq %rcx
.endif
popq %rdx
popq %rsi
.if \pop_rdi
popq %rdi
.endif
.endm
/*
* Now all regs are restored except RSP and RDI.
* Save old stack pointer and switch to trampoline stack.
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
pushq RSP-RDI(%rdi) /* RSP */
pushq (%rdi) /* RDI */
/*
* We are on the trampoline stack. All regs except RDI are live.
* We can do future final exit work right here.
*/
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
=>.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
pushq %rax
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
popq %rax
.endm
popq %rdi
popq %rsp
USERGS_SYSRET64
END(entry_SYSCALL_64)
【Linux系统调用】汇编级理解Linux系统调用
https://blog.csdn.net/Alan_cqu_cj/article/details/106272204
深入理解系统调用
https://cnblogs.com/logan233/p/12972732.html
Socket与系统调用深度分析 ——X86 64环境下Linux5.0以上的内核中
https://cnblogs.com/qfdzztt/p/12057457.html
x86体系下linux中的任务切换与TSS
https://blog.csdn.net/dog250/article/details/6203529
Linux Kernel系统调用分析
https://blog.csdn.net/weixin_41943030/article/details/88651574
syscall是x86_64上的指令吗?
http://www.voidcn.com/article/p-hfqyhngm-bud.html
Is syscall an instruction on x86_64?
https://stackoverflow.com/questions/10583891/is-syscall-an-instruction-on-x86-64
What are the calling conventions for UNIX & Linux system calls (and user-space functions) on i386 and x86-64
https://stackoverflow.com/questions/2535989/what-are-the-calling-conventions-for-unix-linux-system-calls-and-user-space-f
Linux syscall过程分析(万字长文)
https://cloud.tencent.com/developer/article/1492374
[原创]简析syscall,sysret和sysenter,sysexit的具体过程
https://bbs.pediy.com/thread-226254.html
Linux内核TSS的使用
https://www.cnblogs.com/long123king/p/3501853.html