进程fork
前一段我们看了系统调用,那么fork不出意外,也是个系统调用,在x86下面也是int 80,那么既然这样子的话,我们分析一下fork下的sys_fork
linux0.11中的fork
static inline _syscall0(int ,fork)
#define _syscall0(type,name) \
type name(void ) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
: "0" (__NR_##name)); \
if (__res >= 0 ) \
return (type) __res; \
errno = -__res; \
return -1 ; \
}
int fork(void )
{
long __res;
__asm__ volatile
(
"int $0x80"
: "=a" (__res)
: "0" (__res_NR_fork)
);
if (__res >= 0 )
return (int ) __res;
errno = -__res;
return -1 ;
}
_system_call:
cmpl $nr_system_calls-1 ,%eax
ja bad_sys_call
push %ds
push %es
push %fs
pushl %edx
pushl %ecx # push %ebx,%ecx,%edx as parameters
pushl %ebx # to the system call
movl $0x10 ,%edx # set up ds,es to kernel space
mov %dx,%ds
mov %dx,%es
movl $0x17 ,%edx # fs points to local data space
mov %dx,%fs
call _sys_call_table(,%eax,4 )
pushl %eax
movl _current,%eax
cmpl $0 ,state(%eax) # state
jne reschedule
cmpl $0 ,counter(%eax) # counter
je reschedule
ret_from_sys_call:
movl _current,%eax # task[0] cannot have signals
cmpl _task,%eax
je 3f
cmpw $0x0f ,CS(%esp) # was old code segment supervisor ?
jne 3f
cmpw $0x17 ,OLDSS(%esp) # was stack segment = 0x17 ?
jne 3f
movl signal(%eax),%ebx
movl blocked(%eax),%ecx
notl %ecx
andl %ebx,%ecx
bsfl %ecx,%ecx
je 3f
btrl %ecx,%ebx
movl %ebx,signal(%eax)
incl %ecx
pushl %ecx
call _do_signal
popl %eax
3 : popl %eax
popl %ebx
popl %ecx
popl %edx
pop %fs
pop %es
pop %ds
iret
;
;
;
align 4
_sys_fork:
call _find_empty_process ;
test eax,eax
js l2
push gs
push esi
push edi
push ebp
push eax
call _copy_process ;
add esp,20 ;
l2: ret
int copy_process(int nr,long ebp,long edi,long esi,long gs,long none,
long ebx,long ecx,long edx,
long fs,long es,long ds,
long eip,long cs,long eflags,long esp,long ss)
{
struct task_struct *p;
int i;
struct file *f;
p = (struct task_struct *) get_free_page();
if (!p)
return -EAGAIN;
task[nr] = p;
*p = *current;
p->state = TASK_UNINTERRUPTIBLE;
p->pid = last_pid;
p->father = current->pid;
p->counter = p->priority;
p->signal = 0 ;
p->alarm = 0 ;
p->leader = 0 ;
p->utime = p->stime = 0 ;
p->cutime = p->cstime = 0 ;
p->start_time = jiffies;
p->tss.back_link = 0 ;
p->tss.esp0 = PAGE_SIZE + (long ) p;
p->tss.ss0 = 0x10 ;
p->tss.eip = eip;
p->tss.eflags = eflags;
p->tss.eax = 0 ;
p->tss.ecx = ecx;
p->tss.edx = edx;
p->tss.ebx = ebx;
p->tss.esp = esp;
p->tss.ebp = ebp;
p->tss.esi = esi;
p->tss.edi = edi;
p->tss.es = es & 0xffff ;
p->tss.cs = cs & 0xffff ;
p->tss.ss = ss & 0xffff ;
p->tss.ds = ds & 0xffff ;
p->tss.fs = fs & 0xffff ;
p->tss.gs = gs & 0xffff ;
p->tss.ldt = _LDT(nr);
p->tss.trace_bitmap = 0x80000000 ;
if (last_task_used_math == current)
__asm__("clts ; fnsave %0" ::"m" (p->tss.i387));
if (copy_mem(nr,p)) {
task[nr] = NULL;
free_page((long ) p);
return -EAGAIN;
}
for (i=0 ; i<NR_OPEN;i++)
if (f=p->filp[i])
f->f_count++;
if (current->pwd)
current->pwd->i_count++;
if (current->root)
current->root->i_count++;
if (current->executable)
current->executable->i_count++;
set_tss_desc(gdt+(nr<<1 )+FIRST_TSS_ENTRY,&(p->tss));
set_ldt_desc(gdt+(nr<<1 )+FIRST_LDT_ENTRY,&(p->ldt));
p->state = TASK_RUNNING;
return last_pid;
}
为什么fork一次,返回两次?
在A进程fork的时候,创建了一个进程块B,并设置了B的ebp esp eip ,然后A进程返回 B进程块的id
这是A做的事情
B被创建之后,什么东西都被设置好了,下一步自然是被调度了.但是我们不知道,什么时候被调度到占用cpu,不过只要占用cpu了,那么返回一个为0 的值,看起来是A进程中的fork的返回值,看起来也是走的 A进程的 指令序列.
但是B进程,
1 /被调度的第一句就是返回.并没有执行fork
2 /走的是A进程的指令序列,因为A进程返回时的eip和B进程返回时的eip一样
3 /和A进程的返回值不一样,是因为AB进程的eax不一样,所以返回值不一样.
下面的说的是glibc-2.25和linux-3.10中的调用路径
glibc路径
#define ARCH_FORK() \
INLINE_SYSCALL (clone, 5 , \
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | SIGCHLD, 0 , \
NULL, NULL, &THREAD_SELF->tid)
# define INLINE_SYSCALL(name, nr, args...) \
({ \
unsigned int resultvar = INTERNAL_SYSCALL (name, , nr, args); \
__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (resultvar, )) \
? __syscall_error (-INTERNAL_SYSCALL_ERRNO (resultvar, )) \
: (int ) resultvar; })
kernel路径
#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
{
#ifdef CONFIG_MMU
return do_fork(SIGCHLD, 0 , 0 , NULL, NULL);
#else
return (-EINVAL);
#endif
}
#endif
long do_fork(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr)
{
...
p = copy_process(clone_flags, stack_start, stack_size,child_tidptr, NULL, trace);
...
}