mm/oom_kill.c
*
* 忙,并且忙了很久,占有的少,和权利大的靠边,并毫不谦让,直接出手的有更
* 多生还机会
*
超级纯粹的一个模块,实现out of memory killer.当内存严重不足的时候选择
一个"弱者",同过强制信号kill掉,释放出内存。
提供了两个接口:
void oom_kill(void)
int out_of_memory(void)
并且只有一个地方使用,vmscan.c:
int kswapd(void *unused)
{
........
else if (out_of_memory()) {
oom_kill();
}
}
分析filemap.c的时候,了解过kswapd的作用,罗列一下:
*****kswapd (mm/vmscan.c)
+---->do_try_to_free_pages (如果内存已经不够用)
+-->page_launder
| +-->扫描 <inactive_dirty_list>
| +-->对dirty页启动回写(包括mapping和buffer cache)
+-->refill_inactive
+-->refill_inactive_scan
+-->扫描<active_list>,选择合适页面移入
<inactive_dirty_list>
+-->swap_out,对进程启动页面换出
+-->try_to_swap_out将选中页面放入
<inactive_dirty_list>
+----->refill_inactive_scan
先来看看什么叫做内存严重不足:
int out_of_memory(void)
{
struct sysinfo swp_info;
/* Enough free memory? Not OOM. */
if (nr_free_pages() > freepages.min)//每个node,每个zone的buddy系统页面余量不足
return 0;
if (nr_free_pages() + nr_inactive_clean_pages() > freepages.low)//即使算上可以立即
return 0; //回收的页面,也少的可怜
/* Enough swap space left? Not OOM. */
si_swapinfo(&swp_info);
if (swp_info.freeswap > 0)//连磁盘上的空间都不够用了
return 0;
/* Else... */
return 1; //请找个替死鬼吧
}
然后就是选择哪个进程的问题了:
void oom_kill(void)
{
//选择一个进程
struct task_struct *p = select_bad_process();
...
//授予选中的进程很高的优先级,让他尽快结束运行
p->counter = 5 * HZ;
p->flags |= PF_MEMALLOC;
/* This process has hardware access, be more careful. */
//强制发送信号,结束其运行
if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) {
force_sig(SIGTERM, p);
} else {
force_sig(SIGKILL, p);
}
/*
* Make kswapd go out of the way, so "p" has a good chance of
* killing itself before someone else gets the chance to ask
* for more memory.
*/
//只有kswapd调用此函数,所以,这就是让kswapd让出cpu
//重新调度的时候,被选中的进程可可能尽快结束运行,释放资源
current->policy |= SCHED_YIELD;
schedule();
return;
}
kswapd(oom_kill)选中要kill的进程,发送一个信号给这个进程,然后kswapd让
出cpu,是选中的进程可以得到调度.当选中的进程开始运行的时候,因为有一个信号
处于peding状态,所以系统建立一个运行signal的环境,并开始处理信号.最后被选
中的进程退出运行,释放资源.至于更详细的过程等到分析信号的时候再议.
select_bad_process选择badness值最高的进程,我们来看看什么样的进程最有
可能为系统牺牲:
static int badness(struct task_struct *p)
{
int points, cpu_time, run_time;
if (!p->mm)
return 0;
/*
* The memory size of the process is the basis for the badness.
*/
points = p->mm->total_vm; //占有内存越多,越可能被干掉
/*
* CPU time is in seconds and run time is in minutes. There is no
* particular reason for this other than that it turned out to work
* very well in practice. This is not safe against jiffie wraps
* but we don't care _that_ much...
*/
cpu_time = (p->times.tms_utime + p->times.tms_stime) >> (SHIFT_HZ + 3);
run_time = (jiffies - p->start_time) >> (SHIFT_HZ + 10);
points /= int_sqrt(cpu_time);//占有cpu越多,即越忙,就越有可能生存
points /= int_sqrt(int_sqrt(run_time));//运行时间越久越有生存机会
/*
* Niced processes are most likely less important, so double
* their badness points.
*/
if (p->nice > 0)
points *= 2;//越谦让就死的越快(不要说不公平啊)
/*
* Superuser processes are usually more important, so we make it
* less likely that we kill those.
*/
//管理员的程序需要尽力保留下来
if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN) ||
p->uid == 0 || p->euid == 0)
points /= 4;
/*
* We don't want to kill a process with direct hardware access.
* Not only could that mess up the hardware, but usually users
* tend to only have this flag set on applications they think
* of as important.
*/
//那些直接操作硬件的程序可以获取更多生存机会
if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO))
points /= 4;
#ifdef DEBUG
printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n",
p->pid, p->comm, points);
#endif
return points;
}
*
* 忙,并且忙了很久,占有的少,和权利大的靠边,并毫不谦让,直接出手的有更
* 多生还机会
*
超级纯粹的一个模块,实现out of memory killer.当内存严重不足的时候选择
一个"弱者",同过强制信号kill掉,释放出内存。
提供了两个接口:
void oom_kill(void)
int out_of_memory(void)
并且只有一个地方使用,vmscan.c:
int kswapd(void *unused)
{
........
else if (out_of_memory()) {
oom_kill();
}
}
分析filemap.c的时候,了解过kswapd的作用,罗列一下:
*****kswapd (mm/vmscan.c)
+---->do_try_to_free_pages (如果内存已经不够用)
+-->page_launder
| +-->扫描 <inactive_dirty_list>
| +-->对dirty页启动回写(包括mapping和buffer cache)
+-->refill_inactive
+-->refill_inactive_scan
+-->扫描<active_list>,选择合适页面移入
<inactive_dirty_list>
+-->swap_out,对进程启动页面换出
+-->try_to_swap_out将选中页面放入
<inactive_dirty_list>
+----->refill_inactive_scan
先来看看什么叫做内存严重不足:
int out_of_memory(void)
{
struct sysinfo swp_info;
/* Enough free memory? Not OOM. */
if (nr_free_pages() > freepages.min)//每个node,每个zone的buddy系统页面余量不足
return 0;
if (nr_free_pages() + nr_inactive_clean_pages() > freepages.low)//即使算上可以立即
return 0; //回收的页面,也少的可怜
/* Enough swap space left? Not OOM. */
si_swapinfo(&swp_info);
if (swp_info.freeswap > 0)//连磁盘上的空间都不够用了
return 0;
/* Else... */
return 1; //请找个替死鬼吧
}
然后就是选择哪个进程的问题了:
void oom_kill(void)
{
//选择一个进程
struct task_struct *p = select_bad_process();
...
//授予选中的进程很高的优先级,让他尽快结束运行
p->counter = 5 * HZ;
p->flags |= PF_MEMALLOC;
/* This process has hardware access, be more careful. */
//强制发送信号,结束其运行
if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) {
force_sig(SIGTERM, p);
} else {
force_sig(SIGKILL, p);
}
/*
* Make kswapd go out of the way, so "p" has a good chance of
* killing itself before someone else gets the chance to ask
* for more memory.
*/
//只有kswapd调用此函数,所以,这就是让kswapd让出cpu
//重新调度的时候,被选中的进程可可能尽快结束运行,释放资源
current->policy |= SCHED_YIELD;
schedule();
return;
}
kswapd(oom_kill)选中要kill的进程,发送一个信号给这个进程,然后kswapd让
出cpu,是选中的进程可以得到调度.当选中的进程开始运行的时候,因为有一个信号
处于peding状态,所以系统建立一个运行signal的环境,并开始处理信号.最后被选
中的进程退出运行,释放资源.至于更详细的过程等到分析信号的时候再议.
select_bad_process选择badness值最高的进程,我们来看看什么样的进程最有
可能为系统牺牲:
static int badness(struct task_struct *p)
{
int points, cpu_time, run_time;
if (!p->mm)
return 0;
/*
* The memory size of the process is the basis for the badness.
*/
points = p->mm->total_vm; //占有内存越多,越可能被干掉
/*
* CPU time is in seconds and run time is in minutes. There is no
* particular reason for this other than that it turned out to work
* very well in practice. This is not safe against jiffie wraps
* but we don't care _that_ much...
*/
cpu_time = (p->times.tms_utime + p->times.tms_stime) >> (SHIFT_HZ + 3);
run_time = (jiffies - p->start_time) >> (SHIFT_HZ + 10);
points /= int_sqrt(cpu_time);//占有cpu越多,即越忙,就越有可能生存
points /= int_sqrt(int_sqrt(run_time));//运行时间越久越有生存机会
/*
* Niced processes are most likely less important, so double
* their badness points.
*/
if (p->nice > 0)
points *= 2;//越谦让就死的越快(不要说不公平啊)
/*
* Superuser processes are usually more important, so we make it
* less likely that we kill those.
*/
//管理员的程序需要尽力保留下来
if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN) ||
p->uid == 0 || p->euid == 0)
points /= 4;
/*
* We don't want to kill a process with direct hardware access.
* Not only could that mess up the hardware, but usually users
* tend to only have this flag set on applications they think
* of as important.
*/
//那些直接操作硬件的程序可以获取更多生存机会
if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO))
points /= 4;
#ifdef DEBUG
printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n",
p->pid, p->comm, points);
#endif
return points;
}