linux内核中断发展史,Linux 缺页中断发展历史

慢慢来吧~~

Linux V0.11

缺页中断响应代码:

.globl _page_fault

_page_fault:

xchgl %eax,(%esp)

pushl %ecx

pushl %edx

push %ds

push %es

push %fs

movl $0x10,%edx

mov %dx,%ds

mov %dx,%es

mov %dx,%fs

movl %cr2,%edx

pushl %edx

pushl %eax

testl $1,%eax // 检测当前页的共享位

jne 1f

call _do_no_page // 没有共享就缺页

jmp 2f

1:call _do_wp_page // 有共享就是写时复制

2:addl $8,%esp

pop %fs

pop %es

pop %ds

popl %edx

popl %ecx

popl %eax

iret

核心接口:

1. 缺页处理接口  do_no_page

void do_no_page(unsigned long error_code,unsigned long address)

{

int nr[4];

unsigned long tmp;

unsigned long page;

int block,i;

address &= 0xfffff000;

tmp = address - current->start_code;

if (!current->executable || tmp >= current->end_data) {

get_empty_page(address);

return;

}

if (share_page(tmp))

return;

if (!(page = get_free_page()))

oom();

/* remember that 1 block is used for header */

block = 1 + tmp/BLOCK_SIZE;

for (i=0 ; i<4 ; block++,i++)

nr[i] = bmap(current->executable,block);

bread_page(page,current->executable->i_dev,nr);

i = tmp + 4096 - current->end_data;

tmp = page + 4096;

while (i-- > 0) {

tmp--;

*(char *)tmp = 0;

}

if (put_page(page,address))

return;

free_page(page);

oom();

}

流程图

03efaf8cde1f04b210457f3abd666fe6.png

2.写时复制的复制函数: do_wp_page

void un_wp_page(unsigned long * table_entry)

{

unsigned long old_page,new_page;

old_page = 0xfffff000 & *table_entry;

if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) {

*table_entry |= 2;

invalidate();

return;

}

if (!(new_page=get_free_page()))

oom();

if (old_page >= LOW_MEM)

mem_map[MAP_NR(old_page)]--;

*table_entry = new_page | 7;

invalidate();

copy_page(old_page,new_page);

}

/*

* This routine handles present pages, when users try to write

* to a shared page. It is done by copying the page to a new address

* and decrementing the shared-page counter for the old page.

*

* If it's in code space we exit with a segment error.

*/

void do_wp_page(unsigned long error_code,unsigned long address)

{

#if 0

/* we cannot do this yet: the estdio library writes to code space */

/* stupid, stupid. I really want the libc.a from GNU */

if (CODE_SPACE(address))

do_exit(SIGSEGV);

#endif

un_wp_page((unsigned long *)

(((address>>10) & 0xffc) + (0xfffff000 &

*((unsigned long *) ((address>>20) &0xffc)))));

}

3. 写时复制函数

void write_verify(unsigned long address)

{

unsigned long page;

if (!( (page = *((unsigned long *) ((address>>20) & 0xffc)) )&1)) //页表有效

return;

page &= 0xfffff000;

page += ((address>>10) & 0xffc); // 页表偏移

if ((3 & *(unsigned long *) page) == 1) /* non-writeable, present */

un_wp_page((unsigned long *) page);

return;

}

Linux V0.12

1. 对do_wp_page 添加了参数有效性检测:

void do_wp_page(unsigned long error_code,unsigned long address)

{

if (address < TASK_SIZE)

printk("\n\rBAD! KERNEL MEMORY WP-ERR!\n\r");

if (address - current->start_code > TASK_SIZE) {

printk("Bad things happen: page error in do_wp_page\n\r");

do_exit(SIGSEGV);

}

un_wp_page((unsigned long *)

(((address>>10) & 0xffc) + (0xfffff000 &

*((unsigned long *) ((address>>20) &0xffc)))));

}

2. do_no_page 加入了地址有效性和对虚拟内存的支持:

源码:

void do_no_page(unsigned long error_code,unsigned long address)

{

int nr[4];

unsigned long tmp;

unsigned long page;

int block,i;

struct m_inode * inode;

if (address < TASK_SIZE)

printk("\n\rBAD!! KERNEL PAGE MISSING\n\r");

if (address - current->start_code > TASK_SIZE) {

printk("Bad things happen: nonexistent page error in do_no_page\n\r");

do_exit(SIGSEGV);

}

page = *(unsigned long *) ((address >> 20) & 0xffc);

if (page & 1) {

page &= 0xfffff000;

page += (address >> 10) & 0xffc;

tmp = *(unsigned long *) page;

if (tmp && !(1 & tmp)) {

swap_in((unsigned long *) page);

return;

}

}

address &= 0xfffff000;

tmp = address - current->start_code;

if (tmp >= LIBRARY_OFFSET ) {

inode = current->library;

block = 1 + (tmp-LIBRARY_OFFSET) / BLOCK_SIZE;

} else if (tmp < current->end_data) {

inode = current->executable;

block = 1 + tmp / BLOCK_SIZE;

} else {

inode = NULL;

block = 0;

}

if (!inode) {

get_empty_page(address);

return;

}

if (share_page(inode,tmp))

return;

if (!(page = get_free_page()))

oom();

/* remember that 1 block is used for header */

for (i=0 ; i<4 ; block++,i++)

nr[i] = bmap(inode,block);

bread_page(page,inode->i_dev,nr);

i = tmp + 4096 - current->end_data;

if (i>4095)

i = 0;

tmp = page + 4096;

while (i-- > 0) {

tmp--;

*(char *)tmp = 0;

}

if (put_page(page,address))

return;

free_page(page);

oom();

}

流程图:

009f6a5cd369663cad9e1ecd349ee54d.png

Linux V0.95

1 . 首先, 缺页中段的响应代码从page.s移除( page.s 不存在了) , 加入到了 kernek/asm.s.  且直接调用do_page_fault

_page_fault:

pushl $_do_page_fault

jmp error_code

2. 对与引起缺页中断的原有的判断在 do_page_fault内

/* This routine handles page faults. It determines the address,

and the problem then passes it off to one of the appropriate

routines. */

void do_page_fault (unsigned long *esp, unsigned long error_code)

{

unsigned long address;

/* get the address */

__asm__ ("movl %%cr2,%0":"=r" (address));

if (!(error_code & 1)) {

do_no_page(error_code, address, current);

return;

} else {

do_wp_page(error_code, address);

return;

}

}

3. 写时复制调用的的接口 un_wp_page 对于内存耗尽的情况做了循环和更多的保护, 不再直接操作page数组的count , 该用free_page 来释放一个引用.

void un_wp_page(unsigned long * table_entry)

{

unsigned long old_page;

unsigned long new_page = 0;

unsigned long dirty;

repeat:

old_page = *table_entry;

dirty = old_page & PAGE_DIRTY;

if (!(old_page & 1)) {

if (new_page)

free_page(new_page);

return;

}

old_page &= 0xfffff000;

if (old_page >= HIGH_MEMORY) {

if (new_page)

free_page(new_page);

printk("bad page address\n\r");

do_exit(SIGSEGV);

}

if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) {

*table_entry |= 2;

invalidate();

if (new_page)

free_page(new_page);

return;

}

if (!new_page) {

if (!(new_page=get_free_page()))

oom(); // 防止 oom 没有推出程序

goto repeat; // 再来一遍

}

copy_page(old_page,new_page);

*table_entry = new_page | dirty | 7 ;

free_page(old_page); // 释放一个引用

invalidate();

}

4. do_no_page 加入更多检测合法性的代码.

Linux V0.95a

加入更多的合法性检测

if (*page_table) {

printk("put_dirty_page: page already exists\n");

*page_table = 0;

invalidate();

}

Linux V0.95c

加入一点对进程的操作

例如在do_wp_page中

++current->min_flt;

Linux V0.96b

将获取空内存页的接口进行了一层包装, 处理了内存不足的问题. 从而像接口  do_no_page 之类的可以不再考虑内存不足.

/*

* fill in an empty page or directory if none exists

*/

static unsigned long get_empty(unsigned long * p)

{

unsigned long page = 0;

repeat:

if (1 & *p) {

free_page(page);

return *p;

}

if (*p) {

printk("get_empty: bad page entry \n");

*p = 0;

}

if (page) {

*p = page | 7;

return *p;

}

if (!(page = get_free_page()))

oom();

goto repeat;

}

接口do_no_page

void do_no_page(unsigned long error_code, unsigned long address,

struct task_struct *tsk, unsigned long user_esp)

{

static unsigned int last_checked = 0;

int nr[4];

unsigned long tmp;

unsigned long page;

unsigned int block,i;

struct inode * inode;

/* Thrashing ? Make it interruptible, but don't penalize otherwise */

for (i = 0; i < CHECK_LAST_NR; i++)

if ((address & 0xfffff000) == last_pages[i]) {

current->counter = 0;

schedule();

}

last_checked++;

if (last_checked >= CHECK_LAST_NR)

last_checked = 0;

last_pages[last_checked] = address & 0xfffff000;

if (address < TASK_SIZE) {

printk("\n\rBAD!! KERNEL PAGE MISSING\n\r");

do_exit(SIGSEGV);

}

if (address - tsk->start_code >= TASK_SIZE) {

printk("Bad things happen: nonexistent page error in do_no_page\n\r");

do_exit(SIGSEGV);

}

page = get_empty((unsigned long *) ((address >> 20) & 0xffc)); // 这里直接获取新内存页, 不再考虑内存不足

page &= 0xfffff000;

page += (address >> 10) & 0xffc;

tmp = *(unsigned long *) page;

if (tmp & 1) {

printk("bogus do_no_page\n");

return;

}

++tsk->rss;

if (tmp) {

++tsk->maj_flt;

swap_in((unsigned long *) page);

return;

}

address &= 0xfffff000;

tmp = address - tsk->start_code;

inode = NULL;

block = 0;

if (tmp < tsk->end_data) {

inode = tsk->executable;

block = 1 + tmp / BLOCK_SIZE;

} else {

i = tsk->numlibraries;

while (i-- > 0) {

if (tmp < tsk->libraries[i].start)

continue;

block = tmp - tsk->libraries[i].start;

if (block >= tsk->libraries[i].length)

continue;

inode = tsk->libraries[i].library;

block = 1 + block / BLOCK_SIZE;

break;

}

}

if (!inode) {

++tsk->min_flt;

get_empty_page(address);

if (tsk != current)

return;

if (tmp >= LIBRARY_OFFSET || tmp < tsk->brk)

return;

if (tmp+8192 >= (user_esp & 0xfffff000))

return;

send_sig(SIGSEGV,tsk,1);

return;

}

if (tsk == current)

if (share_page(inode,tmp)) {

++tsk->min_flt;

return;

}

++tsk->maj_flt;

if (!(page = get_free_page()))

oom();

for (i=0 ; i<4 ; block++,i++)

nr[i] = bmap(inode,block);

bread_page(page,inode->i_dev,nr);

i = tmp + 4096 - tsk->end_data;

if (i>4095)

i = 0;

tmp = page + 4096;

while (i--) {

tmp--;

*(char *)tmp = 0;

}

if (put_page(page,address))

return;

free_page(page);

oom();

}

之后的版本不再默认一级页表总是在地址0

越来越复杂了

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值