Linux 1.0 memory.c 学习日记

最新推荐文章于 2023-03-26 12:55:32 发布

nodeathphoenix

最新推荐文章于 2023-03-26 12:55:32 发布

阅读量1.4k

点赞数

分类专栏： kernel

本文链接：https://blog.csdn.net/nodeathphoenix/article/details/9237433

版权

kernel 专栏收录该内容

17 篇文章 0 订阅

订阅专栏

/*
* linux/mm/memory.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/

/*
* demand-loading started 01.12.91 - seems it is high on the list of
* things wanted, and it should be easy to implement. - Linus
*/

/*
* Ok, demand-loading was easy, shared pages a little bit tricker. Shared
* pages started 02.12.91, seems to work. - Linus.
*
* Tested sharing by executing about 30 /bin/sh: under the old kernel it
* would have taken more than the 6M I have free, but it worked well as
* far as I could see.
*
* Also corrected some "invalidate()"s - I wasn't doing enough of them.
*/

/*
* Real VM (paging to/from disk) started 18.12.91. Much more work and
* thought has to go into this. Oh, well..
* 19.12.91 - works, somewhat. Sometimes I get faults, don't know why.
*       Found it. Everything seems to work now.
* 20.12.91 - Ok, making the swap-device changeable like the root.
*/

#include <asm/system.h>
#include <linux/config.h>

#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/head.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mman.h>

unsigned long high_memory = 0;

extern unsigned long pg0[1024];       /* page table for 0-4MB for everybody */

extern void sound_mem_init(void);
extern void die_if_kernel(char *,struct pt_regs *,long);

int nr_swap_pages = 0;
int nr_free_pages = 0;
unsigned long free_page_list = 0;
/*
* The secondary free_page_list is used for malloc() etc things that
* may need pages during interrupts etc. Normal get_free_page() operations
* don't touch it, so it stays as a kind of "panic-list", that can be
* accessed when all other mm tricks have failed.
*/
int nr_secondary_pages = 0;
unsigned long secondary_page_list = 0;

#define copy_page(from,to) \
__asm__("cld ; rep ; movsl": :"S" (from),"D" (to),"c" (1024):"cx","di","si")

unsigned short * mem_map = NULL;

#define CODE_SPACE(addr,p) ((addr) < (p)->end_code)

/*
* oom() prints a message (so that the user knows why the process died),
* and gives the process an untrappable SIGSEGV.
*/
void oom(struct task_struct * task)
{
   printk("\nout of memory\n");
   task->sigaction[SIGKILL-1].sa_handler = NULL;
   task->blocked &= ~(1<<(SIGKILL-1));
   send_sig(SIGKILL,task,1);
}

static void free_one_table(unsigned long * page_dir)
{
   int j;
   unsigned long pg_table = *page_dir;
   unsigned long * page_table;

   if (!pg_table)
       return;
   *page_dir = 0;
   if (pg_table >= high_memory || !(pg_table & PAGE_PRESENT)) {
       printk("Bad page table: [%p]=%08lx\n",page_dir,pg_table);
       return;
   }
   if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
       return;
   page_table = (unsigned long *) (pg_table & PAGE_MASK);
   for (j = 0 ; j < PTRS_PER_PAGE ; j++,page_table++) {
       unsigned long pg = *page_table;

       if (!pg)
           continue;
       *page_table = 0;
       if (pg & PAGE_PRESENT)
           free_page(PAGE_MASK & pg);
       else
           swap_free(pg);
   }
   free_page(PAGE_MASK & pg_table);
}

/*
* This function clears all user-level page tables of a process - this
* is needed by execve(), so that old pages aren't in the way. Note that
* unlike 'free_page_tables()', this function still leaves a valid
* page-table-tree in memory: it just removes the user pages. The two
* functions are similar, but there is a fundamental difference.
*/
void clear_page_tables(struct task_struct * tsk)
{
   int i;
   unsigned long pg_dir;
   unsigned long * page_dir;

   if (!tsk)
       return;
   if (tsk == task[0])
       panic("task[0] (swapper) doesn't support exec()\n");
   pg_dir = tsk->tss.cr3;
   page_dir = (unsigned long *) pg_dir;
   if (!page_dir || page_dir == swapper_pg_dir) {
       printk("Trying to clear kernel page-directory: not good\n");
       return;
   }
   if (mem_map[MAP_NR(pg_dir)] > 1) {
       unsigned long * new_pg;

       if (!(new_pg = (unsigned long*) get_free_page(GFP_KERNEL))) {
           oom(tsk);
           return;
       }
       for (i = 768 ; i < 1024 ; i++)
           new_pg[i] = page_dir[i];
       free_page(pg_dir);
       tsk->tss.cr3 = (unsigned long) new_pg;
       return;
   }
   for (i = 0 ; i < 768 ; i++,page_dir++)
       free_one_table(page_dir);
   invalidate();
   return;
}

/*
* This function frees up all page tables of a process when it exits.
*/
void free_page_tables(struct task_struct * tsk)
{
   int i;
   unsigned long pg_dir;
   unsigned long * page_dir;

   if (!tsk)
       return;
   if (tsk == task[0]) {
       printk("task[0] (swapper) killed: unable to recover\n");
       panic("Trying to free up swapper memory space");
   }
   pg_dir = tsk->tss.cr3;
   if (!pg_dir || pg_dir == (unsigned long) swapper_pg_dir) {
       printk("Trying to free kernel page-directory: not good\n");
       return;
   }
   tsk->tss.cr3 = (unsigned long) swapper_pg_dir;
   if (tsk == current)
       __asm__ __volatile__("movl %0,%%cr3": :"a" (tsk->tss.cr3));
   if (mem_map[MAP_NR(pg_dir)] > 1) {
       free_page(pg_dir);
       return;
   }
   page_dir = (unsigned long *) pg_dir;
   for (i = 0 ; i < PTRS_PER_PAGE ; i++,page_dir++)
       free_one_table(page_dir);
   free_page(pg_dir);
   invalidate();
}

/*
* clone_page_tables() clones the page table for a process - both
* processes will have the exact same pages in memory. There are
* probably races in the memory management with cloning, but we'll
* see..
*/
int clone_page_tables(struct task_struct * tsk)
{
   unsigned long pg_dir;

   pg_dir = current->tss.cr3;
   mem_map[MAP_NR(pg_dir)]++;
   tsk->tss.cr3 = pg_dir;
   return 0;
}

/*
* copy_page_tables() just copies the whole process memory range:
* note the special handling of RESERVED (ie kernel) pages, which
* means that they are always shared by all processes.
*/
int copy_page_tables(struct task_struct * tsk)
{
   int i;
   unsigned long old_pg_dir, *old_page_dir;
   unsigned long new_pg_dir, *new_page_dir;

   if (!(new_pg_dir = get_free_page(GFP_KERNEL)))
       return -ENOMEM;
   old_pg_dir = current->tss.cr3;
   tsk->tss.cr3 = new_pg_dir;
   old_page_dir = (unsigned long *) old_pg_dir;
   new_page_dir = (unsigned long *) new_pg_dir;
   for (i = 0 ; i < PTRS_PER_PAGE ; i++,old_page_dir++,new_page_dir++) {
       int j;
       unsigned long old_pg_table, *old_page_table;
       unsigned long new_pg_table, *new_page_table;

       old_pg_table = *old_page_dir;
       if (!old_pg_table)
           continue;
       if (old_pg_table >= high_memory || !(old_pg_table & PAGE_PRESENT)) {
           printk("copy_page_tables: bad page table: "
               "probable memory corruption");
           *old_page_dir = 0;
           continue;
       }
       if (mem_map[MAP_NR(old_pg_table)] & MAP_PAGE_RESERVED) {
           *new_page_dir = old_pg_table;
           continue;
       }
       if (!(new_pg_table = get_free_page(GFP_KERNEL))) {
           free_page_tables(tsk);
           return -ENOMEM;
       }
       old_page_table = (unsigned long *) (PAGE_MASK & old_pg_table);
       new_page_table = (unsigned long *) (PAGE_MASK & new_pg_table);
       for (j = 0 ; j < PTRS_PER_PAGE ; j++,old_page_table++,new_page_table++) {
           unsigned long pg;
           pg = *old_page_table;
           if (!pg)
               continue;
           if (!(pg & PAGE_PRESENT)) {
               *new_page_table = swap_duplicate(pg);
               continue;
           }
           if ((pg & (PAGE_RW | PAGE_COW)) == (PAGE_RW | PAGE_COW))
               pg &= ~PAGE_RW;
           *new_page_table = pg;
           if (mem_map[MAP_NR(pg)] & MAP_PAGE_RESERVED)
               continue;
           *old_page_table = pg;
           mem_map[MAP_NR(pg)]++;
       }
       *new_page_dir = new_pg_table | PAGE_TABLE;
   }
   invalidate();
   return 0;
}

/*
* a more complete version of free_page_tables which performs with page
* granularity.
*/
int unmap_page_range(unsigned long from, unsigned long size)
{
   unsigned long page, page_dir;
   unsigned long *page_table, *dir;
   unsigned long poff, pcnt, pc;

   if (from & ~PAGE_MASK) {
       printk("unmap_page_range called with wrong alignment\n");
       return -EINVAL;
   }
   size = (size + ~PAGE_MASK) >> PAGE_SHIFT;
   dir = PAGE_DIR_OFFSET(current->tss.cr3,from);
   poff = (from >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
   if ((pcnt = PTRS_PER_PAGE - poff) > size)
       pcnt = size;

   for ( ; size > 0; ++dir, size -= pcnt,
         pcnt = (size > PTRS_PER_PAGE ? PTRS_PER_PAGE : size)) {
       if (!(page_dir = *dir))   {
           poff = 0;
           continue;
       }
       if (!(page_dir & PAGE_PRESENT)) {
           printk("unmap_page_range: bad page directory.");
           continue;
       }
       page_table = (unsigned long *)(PAGE_MASK & page_dir);
       if (poff) {
           page_table += poff;
           poff = 0;
       }
       for (pc = pcnt; pc--; page_table++) {
           if ((page = *page_table) != 0) {
               *page_table = 0;
               if (1 & page) {
                   if (!(mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED))
                       if (current->rss > 0)
                           --current->rss;
                   free_page(PAGE_MASK & page);
               } else
                   swap_free(page);
           }
       }
       if (pcnt == PTRS_PER_PAGE) {
           *dir = 0;
           free_page(PAGE_MASK & page_dir);
       }
   }
   invalidate();
   return 0;
}

int zeromap_page_range(unsigned long from, unsigned long size, int mask)
{
   unsigned long *page_table, *dir;
   unsigned long poff, pcnt;
   unsigned long page;

   if (mask) {
       if ((mask & (PAGE_MASK|PAGE_PRESENT)) != PAGE_PRESENT) {
           printk("zeromap_page_range: mask = %08x\n",mask);
           return -EINVAL;
       }
       mask |= ZERO_PAGE;
   }
   if (from & ~PAGE_MASK) {
       printk("zeromap_page_range: from = %08lx\n",from);
       return -EINVAL;
   }
   dir = PAGE_DIR_OFFSET(current->tss.cr3,from);
   size = (size + ~PAGE_MASK) >> PAGE_SHIFT;
   poff = (from >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
   if ((pcnt = PTRS_PER_PAGE - poff) > size)
       pcnt = size;

   while (size > 0) {
       if (!(PAGE_PRESENT & *dir)) {
               /* clear page needed here? SRB. */
           if (!(page_table = (unsigned long*) get_free_page(GFP_KERNEL))) {
               invalidate();
               return -ENOMEM;
           }
           if (PAGE_PRESENT & *dir) {
               free_page((unsigned long) page_table);
               page_table = (unsigned long *)(PAGE_MASK & *dir++);
           } else
               *dir++ = ((unsigned long) page_table) | PAGE_TABLE;
       } else
           page_table = (unsigned long *)(PAGE_MASK & *dir++);
       page_table += poff;
       poff = 0;
       for (size -= pcnt; pcnt-- ;) {
           if ((page = *page_table) != 0) {
               *page_table = 0;
               if (page & PAGE_PRESENT) {
                   if (!(mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED))
                       if (current->rss > 0)
                           --current->rss;
                   free_page(PAGE_MASK & page);
               } else
                   swap_free(page);
           }
           *page_table++ = mask;
       }
       pcnt = (size > PTRS_PER_PAGE ? PTRS_PER_PAGE : size);
   }
   invalidate();
   return 0;
}

/*
* maps a range of physical memory into the requested pages. the old
* mappings are removed. any references to nonexistent pages results
* in null mappings (currently treated as "copy-on-access")
*/
int remap_page_range(unsigned long from, unsigned long to, unsigned long size, int mask)
{
   unsigned long *page_table, *dir;
   unsigned long poff, pcnt;
   unsigned long page;

   if (mask) {
       if ((mask & (PAGE_MASK|PAGE_PRESENT)) != PAGE_PRESENT) {
           printk("remap_page_range: mask = %08x\n",mask);
           return -EINVAL;
       }
   }
   if ((from & ~PAGE_MASK) || (to & ~PAGE_MASK)) {
       printk("remap_page_range: from = %08lx, to=%08lx\n",from,to);
       return -EINVAL;
   }
   dir = PAGE_DIR_OFFSET(current->tss.cr3,from);
   size = (size + ~PAGE_MASK) >> PAGE_SHIFT;
   poff = (from >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
   if ((pcnt = PTRS_PER_PAGE - poff) > size)
       pcnt = size;

   while (size > 0) {
       if (!(PAGE_PRESENT & *dir)) {
           /* clearing page here, needed? SRB. */
           if (!(page_table = (unsigned long*) get_free_page(GFP_KERNEL))) {
               invalidate();
               return -1;
           }
           *dir++ = ((unsigned long) page_table) | PAGE_TABLE;
       }
       else
           page_table = (unsigned long *)(PAGE_MASK & *dir++);
       if (poff) {
           page_table += poff;
           poff = 0;
       }

       for (size -= pcnt; pcnt-- ;) {
           if ((page = *page_table) != 0) {
               *page_table = 0;
               if (PAGE_PRESENT & page) {
                   if (!(mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED))
                       if (current->rss > 0)
                           --current->rss;
                   free_page(PAGE_MASK & page);
               } else
                   swap_free(page);
           }

           /*
           * the first condition should return an invalid access
           * when the page is referenced. current assumptions
           * cause it to be treated as demand allocation in some
           * cases.
           */
           if (!mask)
               *page_table++ = 0;   /* not present */
           else if (to >= high_memory)
               *page_table++ = (to | mask);
           else if (!mem_map[MAP_NR(to)])
               *page_table++ = 0;   /* not present */
           else {
               *page_table++ = (to | mask);
               if (!(mem_map[MAP_NR(to)] & MAP_PAGE_RESERVED)) {
                   ++current->rss;
                   mem_map[MAP_NR(to)]++;
               }
           }
           to += PAGE_SIZE;
       }
       pcnt = (size > PTRS_PER_PAGE ? PTRS_PER_PAGE : size);
   }
   invalidate();
   return 0;
}

/*
* This function puts a page in memory at the wanted address.
* It returns the physical address of the page gotten, 0 if
* out of memory (either when trying to access page-table or
* page.)
*/
unsigned long put_page(struct task_struct * tsk,unsigned long page,
   unsigned long address,int prot)
{
   unsigned long *page_table;

   if ((prot & (PAGE_MASK|PAGE_PRESENT)) != PAGE_PRESENT)
       printk("put_page: prot = %08x\n",prot);
   if (page >= high_memory) {
       printk("put_page: trying to put page %08lx at %08lx\n",page,address);
       return 0;
   }
   page_table = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
   if ((*page_table) & PAGE_PRESENT)
       page_table = (unsigned long *) (PAGE_MASK & *page_table);
   else {
       printk("put_page: bad page directory entry\n");
       oom(tsk);
       *page_table = BAD_PAGETABLE | PAGE_TABLE;
       return 0;
   }
   page_table += (address >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
   if (*page_table) {
       printk("put_page: page already exists\n");
       *page_table = 0;
       invalidate();
   }
   *page_table = page | prot;
/* no need for invalidate */
   return page;
}

/*
* The previous function doesn't work very well if you also want to mark
* the page dirty: exec.c wants this, as it has earlier changed the page,
* and we want the dirty-status to be correct (for VM). Thus the same
* routine, but this time we mark it dirty too.
*/
unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
{
   unsigned long tmp, *page_table;

   if (page >= high_memory)
       printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address);
   if (mem_map[MAP_NR(page)] != 1)
       printk("mem_map disagrees with %08lx at %08lx\n",page,address);
   page_table = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
   if (PAGE_PRESENT & *page_table)
       page_table = (unsigned long *) (PAGE_MASK & *page_table);
   else {
       if (!(tmp = get_free_page(GFP_KERNEL)))
           return 0;
       if (PAGE_PRESENT & *page_table) {
           free_page(tmp);
           page_table = (unsigned long *) (PAGE_MASK & *page_table);
       } else {
           *page_table = tmp | PAGE_TABLE;
           page_table = (unsigned long *) tmp;
       }
   }
   page_table += (address >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
   if (*page_table) {
       printk("put_dirty_page: page already exists\n");
       *page_table = 0;
       invalidate();
   }
   *page_table = page | (PAGE_DIRTY | PAGE_PRIVATE);
/* no need for invalidate */
   return page;
}

/*
* This routine handles present pages, when users try to write
* to a shared page. It is done by copying the page to a new address
* and decrementing the shared-page counter for the old page.
*
* Note that we do many checks twice (look at do_wp_page()), as
* we have to be careful about race-conditions.
*
* Goto-purists beware: the only reason for goto's here is that it results
* in better assembly code.. The "default" path will see no jumps at all.
*/
static void __do_wp_page(unsigned long error_code, unsigned long address,
   struct task_struct * tsk, unsigned long user_esp)
{
   unsigned long *pde, pte, old_page, prot;
   unsigned long new_page;

   new_page = __get_free_page(GFP_KERNEL);
   pde = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
   pte = *pde;
   if (!(pte & PAGE_PRESENT))
       goto end_wp_page;
   if ((pte & PAGE_TABLE) != PAGE_TABLE || pte >= high_memory)
       goto bad_wp_pagetable;
   pte &= PAGE_MASK;
   pte += PAGE_PTR(address);
   old_page = *(unsigned long *) pte;
   if (!(old_page & PAGE_PRESENT))
       goto end_wp_page;
   if (old_page >= high_memory)
       goto bad_wp_page;
   if (old_page & PAGE_RW)
       goto end_wp_page;
   tsk->min_flt++;
   prot = (old_page & ~PAGE_MASK) | PAGE_RW;
   old_page &= PAGE_MASK;
   if (mem_map[MAP_NR(old_page)] != 1) {
       if (new_page) {
           if (mem_map[MAP_NR(old_page)] & MAP_PAGE_RESERVED)
               ++tsk->rss;
           copy_page(old_page,new_page);
           *(unsigned long *) pte = new_page | prot;
           free_page(old_page);
           invalidate();
           return;
       }
       free_page(old_page);
       oom(tsk);
       *(unsigned long *) pte = BAD_PAGE | prot;
       invalidate();
       return;
   }
   *(unsigned long *) pte |= PAGE_RW;
   invalidate();
   if (new_page)
       free_page(new_page);
   return;
bad_wp_page:
   printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
   *(unsigned long *) pte = BAD_PAGE | PAGE_SHARED;
   send_sig(SIGKILL, tsk, 1);
   goto end_wp_page;
bad_wp_pagetable:
   printk("do_wp_page: bogus page-table at address %08lx (%08lx)\n",address,pte);
   *pde = BAD_PAGETABLE | PAGE_TABLE;
   send_sig(SIGKILL, tsk, 1);
end_wp_page:
   if (new_page)
       free_page(new_page);
   return;
}

/*
* check that a page table change is actually needed, and call
* the low-level function only in that case..
*/
void do_wp_page(unsigned long error_code, unsigned long address,
   struct task_struct * tsk, unsigned long user_esp)
{
   unsigned long page;
   unsigned long * pg_table;

   pg_table = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
   page = *pg_table;
   if (!page)
       return;
   if ((page & PAGE_PRESENT) && page < high_memory) {
       pg_table = (unsigned long *) ((page & PAGE_MASK) + PAGE_PTR(address));
       page = *pg_table;
       if (!(page & PAGE_PRESENT))
           return;
       if (page & PAGE_RW)
           return;
       if (!(page & PAGE_COW)) {
           if (user_esp && tsk == current) {
               current->tss.cr2 = address;
               current->tss.error_code = error_code;
               current->tss.trap_no = 14;
               send_sig(SIGSEGV, tsk, 1);
               return;
           }
       }
       if (mem_map[MAP_NR(page)] == 1) {
           *pg_table |= PAGE_RW | PAGE_DIRTY;
           invalidate();
           return;
       }
       __do_wp_page(error_code, address, tsk, user_esp);
       return;
   }
   printk("bad page directory entry %08lx\n",page);
   *pg_table = 0;
}

int __verify_write(unsigned long start, unsigned long size)
{
   size--;
   size += start & ~PAGE_MASK;
   size >>= PAGE_SHIFT;
   start &= PAGE_MASK;
   do {
       do_wp_page(1,start,current,0);
       start += PAGE_SIZE;
   } while (size--);
   return 0;
}

static inline void get_empty_page(struct task_struct * tsk, unsigned long address)
{
   unsigned long tmp;

   if (!(tmp = get_free_page(GFP_KERNEL))) {
       oom(tsk);
       tmp = BAD_PAGE;
   }
   if (!put_page(tsk,tmp,address,PAGE_PRIVATE))
       free_page(tmp);
}

/*
* try_to_share() checks the page at address "address" in the task "p",
* to see if it exists, and if it is clean. If so, share it with the current
* task.
*
* NOTE! This assumes we have checked that p != current, and that they
* share the same executable or library.
*
* We may want to fix this to allow page sharing for PIC pages at different
* addresses so that ELF will really perform properly. As long as the vast
* majority of sharable libraries load at fixed addresses this is not a
* big concern. Any sharing of pages between the buffer cache and the
* code space reduces the need for this as well. - ERY
*/
static int try_to_share(unsigned long address, struct task_struct * tsk,
   struct task_struct * p, unsigned long error_code, unsigned long newpage)
{
   unsigned long from;
   unsigned long to;
   unsigned long from_page;
   unsigned long to_page;

   from_page = (unsigned long)PAGE_DIR_OFFSET(p->tss.cr3,address);
   to_page = (unsigned long)PAGE_DIR_OFFSET(tsk->tss.cr3,address);
/* is there a page-directory at from? */
   from = *(unsigned long *) from_page;
   if (!(from & PAGE_PRESENT))
       return 0;
   from &= PAGE_MASK;
   from_page = from + PAGE_PTR(address);
   from = *(unsigned long *) from_page;
/* is the page clean and present? */
   if ((from & (PAGE_PRESENT | PAGE_DIRTY)) != PAGE_PRESENT)
       return 0;
   if (from >= high_memory)
       return 0;
   if (mem_map[MAP_NR(from)] & MAP_PAGE_RESERVED)
       return 0;
/* is the destination ok? */
   to = *(unsigned long *) to_page;
   if (!(to & PAGE_PRESENT))
       return 0;
   to &= PAGE_MASK;
   to_page = to + PAGE_PTR(address);
   if (*(unsigned long *) to_page)
       return 0;
/* share them if read - do COW immediately otherwise */
   if (error_code & PAGE_RW) {
       if(!newpage)   /* did the page exist? SRB. */
           return 0;
       copy_page((from & PAGE_MASK),newpage);
       to = newpage | PAGE_PRIVATE;
   } else {
       mem_map[MAP_NR(from)]++;
       from &= ~PAGE_RW;
       to = from;
       if(newpage)   /* only if it existed. SRB. */
           free_page(newpage);
   }
   *(unsigned long *) from_page = from;
   *(unsigned long *) to_page = to;
   invalidate();
   return 1;
}

/*
* share_page() tries to find a process that could share a page with
* the current one. Address is the address of the wanted page relative
* to the current data space.
*
* We first check if it is at all feasible by checking executable->i_count.
* It should be >1 if there are other tasks sharing this inode.
*/
int share_page(struct vm_area_struct * area, struct task_struct * tsk,
   struct inode * inode,
   unsigned long address, unsigned long error_code, unsigned long newpage)
{
   struct task_struct ** p;

   if (!inode || inode->i_count < 2 || !area->vm_ops)
       return 0;
   for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
       if (!*p)
           continue;
       if (tsk == *p)
           continue;
       if (inode != (*p)->executable) {
              if(!area) continue;
           /* Now see if there is something in the VMM that
               we can share pages with */
           if(area){
              struct vm_area_struct * mpnt;
              for (mpnt = (*p)->mmap; mpnt; mpnt = mpnt->vm_next) {
                if (mpnt->vm_ops == area->vm_ops &&
                   mpnt->vm_inode->i_ino == area->vm_inode->i_ino&&
                   mpnt->vm_inode->i_dev == area->vm_inode->i_dev){
                  if (mpnt->vm_ops->share(mpnt, area, address))
               break;
                };
              };
              if (!mpnt) continue; /* Nope. Nuthin here */
           };
       }
       if (try_to_share(address,tsk,*p,error_code,newpage))
           return 1;
   }
   return 0;
}

/*
* fill in an empty page-table if none exists.
*/
static inline unsigned long get_empty_pgtable(struct task_struct * tsk,unsigned long address)
{
   unsigned long page;
   unsigned long *p;

   p = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
   if (PAGE_PRESENT & *p)
       return *p;
   if (*p) {
       printk("get_empty_pgtable: bad page-directory entry \n");
       *p = 0;
   }
   page = get_free_page(GFP_KERNEL);
   p = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
   if (PAGE_PRESENT & *p) {
       free_page(page);
       return *p;
   }
   if (*p) {
       printk("get_empty_pgtable: bad page-directory entry \n");
       *p = 0;
   }
   if (page) {
       *p = page | PAGE_TABLE;
       return *p;
   }
   oom(current);
   *p = BAD_PAGETABLE | PAGE_TABLE;
   return 0;
}

void do_no_page(unsigned long error_code, unsigned long address,
   struct task_struct *tsk, unsigned long user_esp)
{
   unsigned long tmp;
   unsigned long page;
   struct vm_area_struct * mpnt;

   page = get_empty_pgtable(tsk,address);
   if (!page)
       return;
   page &= PAGE_MASK;
   page += PAGE_PTR(address);
   tmp = *(unsigned long *) page;
   if (tmp & PAGE_PRESENT)
       return;
   ++tsk->rss;
   if (tmp) {
       ++tsk->maj_flt;
       swap_in((unsigned long *) page);
       return;
   }
   address &= 0xfffff000;
   tmp = 0;
   for (mpnt = tsk->mmap; mpnt != NULL; mpnt = mpnt->vm_next) {
       if (address < mpnt->vm_start)
           break;
       if (address >= mpnt->vm_end) {
           tmp = mpnt->vm_end;
           continue;
       }
       if (!mpnt->vm_ops || !mpnt->vm_ops->nopage) {
           ++tsk->min_flt;
           get_empty_page(tsk,address);
           return;
       }
       mpnt->vm_ops->nopage(error_code, mpnt, address);
       return;
   }
   if (tsk != current)
       goto ok_no_page;
   if (address >= tsk->end_data && address < tsk->brk)
       goto ok_no_page;
   if (mpnt && mpnt == tsk->stk_vma &&
        address - tmp > mpnt->vm_start - address &&
        tsk->rlim[RLIMIT_STACK].rlim_cur > mpnt->vm_end - address) {
       mpnt->vm_start = address;
       goto ok_no_page;
   }
   tsk->tss.cr2 = address;
   current->tss.error_code = error_code;
   current->tss.trap_no = 14;
   send_sig(SIGSEGV,tsk,1);
   if (error_code & 4)   /* user level access? */
       return;
ok_no_page:
   ++tsk->min_flt;
   get_empty_page(tsk,address);
}

/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
* routines.
*/
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
   unsigned long address;
   unsigned long user_esp = 0;
   unsigned int bit;

   /* get the address */
   __asm__("movl %%cr2,%0":"=r" (address));
   if (address < TASK_SIZE) {
       if (error_code & 4) {   /* user mode access? */
           if (regs->eflags & VM_MASK) {
               bit = (address - 0xA0000) >> PAGE_SHIFT;
               if (bit < 32)
                   current->screen_bitmap |= 1 << bit;
           } else
               user_esp = regs->esp;
       }
       if (error_code & 1)
           do_wp_page(error_code, address, current, user_esp);
       else
           do_no_page(error_code, address, current, user_esp);
       return;
   }
   address -= TASK_SIZE;
   if (wp_works_ok < 0 && address == 0 && (error_code & PAGE_PRESENT)) {
       wp_works_ok = 1;
       pg0[0] = PAGE_SHARED;
       printk("This processor honours the WP bit even when in supervisor mode. Good.\n");
       return;
   }
   if (address < PAGE_SIZE) {
       printk("Unable to handle kernel NULL pointer dereference");
       pg0[0] = PAGE_SHARED;
   } else
       printk("Unable to handle kernel paging request");
   printk(" at address %08lx\n",address);
   die_if_kernel("Oops", regs, error_code);
   do_exit(SIGKILL);
}

/*
* BAD_PAGE is the page that is used for page faults when linux
* is out-of-memory. Older versions of linux just did a
* do_exit(), but using this instead means there is less risk
* for a process dying in kernel mode, possibly leaving a inode
* unused etc..
*
* BAD_PAGETABLE is the accompanying page-table: it is initialized
* to point to BAD_PAGE entries.
*
* ZERO_PAGE is a special page that is used for zero-initialized
* data and COW.
*/
unsigned long __bad_pagetable(void)
{
   extern char empty_bad_page_table[PAGE_SIZE];

   __asm__ __volatile__("cld ; rep ; stosl":
       :"a" (BAD_PAGE + PAGE_TABLE),
       "D" ((long) empty_bad_page_table),
       "c" (PTRS_PER_PAGE)
       :"di","cx");
   return (unsigned long) empty_bad_page_table;
}

unsigned long __bad_page(void)
{
   extern char empty_bad_page[PAGE_SIZE];

   __asm__ __volatile__("cld ; rep ; stosl":
       :"a" (0),
       "D" ((long) empty_bad_page),
       "c" (PTRS_PER_PAGE)
       :"di","cx");
   return (unsigned long) empty_bad_page;
}

unsigned long __zero_page(void)
{
   extern char empty_zero_page[PAGE_SIZE];

   __asm__ __volatile__("cld ; rep ; stosl":
       :"a" (0),
       "D" ((long) empty_zero_page),
       "c" (PTRS_PER_PAGE)
       :"di","cx");
   return (unsigned long) empty_zero_page;
}

void show_mem(void)
{
   int i,free = 0,total = 0,reserved = 0;
   int shared = 0;

   printk("Mem-info:\n");
   printk("Free pages:      %6dkB\n",nr_free_pages<<(PAGE_SHIFT-10));
   printk("Secondary pages: %6dkB\n",nr_secondary_pages<<(PAGE_SHIFT-10));
   printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
   i = high_memory >> PAGE_SHIFT;
   while (i-- > 0) {
       total++;
       if (mem_map[i] & MAP_PAGE_RESERVED)
           reserved++;
       else if (!mem_map[i])
           free++;
       else
           shared += mem_map[i]-1;
   }
   printk("%d pages of RAM\n",total);
   printk("%d free pages\n",free);
   printk("%d reserved pages\n",reserved);
   printk("%d pages shared\n",shared);
   show_buffers();
}

/*
* paging_init() sets up the page tables - note that the first 4MB are
* already mapped by head.S.
*
* This routines also unmaps the page at virtual kernel address 0, so
* that we can trap those pesky NULL-reference errors in the kernel.
*/
//从0xC0000000~0x3FFFFFFF的1GB虚拟空间为内核态空间。所以，如果物理内存足够大，也最多只能映射1GB的物理内存。
//内核从0xc0000000(即3G)开始的那一段虚拟地址都是是连续映射到从0开始的物理地址的。
//在此版本的kernel中，物理内存的最大值假定为16M。所以，只能从0xC0000000到0xC0000000+16M的虚拟地址，映射到0到16M的物理地址。

//从内核虚拟地址0xc0000000开始的16M虚拟地址都是连续映射到从0~16M的的物理地址的。
//所以显然可知，内核物理地址加上0xC0000000即得到内核虚拟地址；内核虚拟地址减去0xC0000000得到内核物理地址。
unsigned long paging_init(unsigned long start_mem, unsigned long end_mem)
{
   unsigned long * pg_dir;
   unsigned long * pg_table;
   unsigned long tmp;
   unsigned long address;

/*
* Physical page 0 is special; it's not touched by Linux since BIOS
* and SMM (for laptops with [34]86/SL chips) may need it. It is read
* and write protected to detect null pointer references in the
* kernel.
*/
#if 0
   memset((void *) 0, 0, PAGE_SIZE);
#endif
   start_mem = PAGE_ALIGN(start_mem); //start_mem为内核源代码的end物理地址的next物理地址
   address = 0; //映射的物理地址是从0开始的
   pg_dir = swapper_pg_dir;   //swapper_pg_dir为head.S中建立的页目录表，它的0项和768项都是页_pg0的地址
   while (address < end_mem) {
       tmp = *(pg_dir + 768);       /* at virtual addr 0xC0000000 */
       if (!tmp) {
           tmp = start_mem | PAGE_TABLE;
           *(pg_dir + 768) = tmp;//即从内核源代码的end物理地址的next物理地址开始分配二级页表
           start_mem += PAGE_SIZE;//二级页表的大小为4096
       }
        //页目录表中，第0项和第768项都指向页_pg0(对应0～4M的物理地址);第1项和769项都指向新分配的二级页表(对应4~8M的物理地址);后面依次类推，直到16M内存map完了。
       *pg_dir = tmp;           /* also map it in at 0x0000000 for init */
       pg_dir++;
       pg_table = (unsigned long *) (tmp & PAGE_MASK);//tmp为二级页表的地址和页表属性相加的结果，所以用PAGE_MASK去掉页表属性即得到二级页表的地址。

//PTRS_PER_PAGE为1024，所以为新的二级页表pg_table的1024个项分配一个物理页的首地址+页属性。
//对于第768项，它指向_pg0，所对应的0～4M的物理地址在head.S中已经map过了，这里又重新map了下，不过没关系，还是与head.S中一样映射到0～4M的物理地址。这里paging_init中真正map的就是4~16M的物理内存。

       for (tmp = 0 ; tmp < PTRS_PER_PAGE ; tmp++,pg_table++) {
           if (address < end_mem)    //如果物理地址16M还没有分配完
               *pg_table = address | PAGE_SHARED;
           else
               *pg_table = 0;
           address += PAGE_SIZE; //每分配一项，物理地址就减少4096
       }
   }
   invalidate();
   return start_mem;

}

//mem_init()告诉我们一共有多少内存，并且标示出其中的空闲可用内存区域等信息。

//start_low_mem为地址1M之前的可用内存的起始地址(即内核代码段之前的内存);start_mem位内核代码段之后的可用内存的起始地址；
//end_mem为内核代码段之后的可用内存的最大地址。

void mem_init(unsigned long start_low_mem,
          unsigned long start_mem, unsigned long end_mem)
{
   int codepages = 0;
   int reservedpages = 0;
   int datapages = 0;
   unsigned long tmp;
   unsigned short * p;
   extern int etext;

   cli();
   end_mem &= PAGE_MASK;
   high_memory = end_mem;
   start_mem += 0x0000000f;
   start_mem &= ~0x0000000f;
   tmp = MAP_NR(end_mem); //tmp为根据最大内存地址算出来的内存页数
   mem_map = (unsigned short *) start_mem;//mem_map数组的开始地址为当前可用内存的开始地址，即这里分配当前可用内存的开头一部分作为mem_map数组
   p = mem_map + tmp; //start_mem开始的tmp*4个字节分配为mem_map数组
   start_mem = (unsigned long) p;//start_mem的新起始地址就是start_mem加上tmp*4
   while (p > mem_map) //将数组mem_map的tmp个元素一一赋值为MAP_PAGE_RESERVED
       *--p = MAP_PAGE_RESERVED;
   start_low_mem = PAGE_ALIGN(start_low_mem);
   start_mem = PAGE_ALIGN(start_mem);
   while (start_low_mem < 0xA0000) { //在0x1000~0xA0000内存是可用的
       mem_map[MAP_NR(start_low_mem)] = 0; //0x1000/PAGE_SIZE刚好得到0x1000物理地址所对应的mem_map里的index为1，即刚好是4~8M内存内存。mem_map[0]即是对应0~4k内存。
       start_low_mem += PAGE_SIZE; //映射一页后低的可用内存减少PAGE_SIZE大小
   }
   while (start_mem < end_mem) { //当前可用内存的开始地址为内核代码后除去二级页表和mem_map数组等占用的内存后的起始地址
       mem_map[MAP_NR(start_mem)] = 0;//start_mem/PAGE_SIZE得到该地址所对应的第几个物理页，及时mem_map中的index
       start_mem += PAGE_SIZE;//映射一页后可用内存减少PAGE_SIZE大小
   }
#ifdef CONFIG_SOUND
   sound_mem_init();
#endif
   free_page_list = 0;
   nr_free_pages = 0;
   for (tmp = 0 ; tmp < end_mem ; tmp += PAGE_SIZE) {
       if (mem_map[MAP_NR(tmp)]) {//0~16M内存中，除去0x1000~0xA0000和start_mem ~ end_mem之间的物理页可用外，其它都为MAP_PAGE_RESERVED，不可用
           if (tmp >= 0xA0000 && tmp < 0x100000) //0xA0000~0x100000为显示内存区和BIOS中断处理程序区，是保留的区域
               reservedpages++;
           else if (tmp < (unsigned long) &etext) //etext应该是内核代码的end地址，所以0x100000~etext为内核代码页
               codepages++;
           else    //否则的话都算作是数据代码页，像0x0~0x1000为中断向量表和BIOS数据区,内核代码段后面的二级页表数组和mem_map数组等内存都是数据页了。
               datapages++;
           continue;
       }
       *(unsigned long *) tmp = free_page_list;
       free_page_list = tmp;
       nr_free_pages++;
   }
   tmp = nr_free_pages << PAGE_SHIFT;
   printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data)\n",
       tmp >> 10,
       end_mem >> 10,
       codepages << (PAGE_SHIFT-10),
       reservedpages << (PAGE_SHIFT-10),
       datapages << (PAGE_SHIFT-10));
/* test if the WP bit is honoured in supervisor mode */
   wp_works_ok = -1;
   pg0[0] = PAGE_READONLY;
   invalidate();
   __asm__ __volatile__("movb 0,%%al ; movb %%al,0": : :"ax", "memory");
   pg0[0] = 0;
   invalidate();
   if (wp_works_ok < 0)
       wp_works_ok = 0;
   return;
}

void si_meminfo(struct sysinfo *val)
{
   int i;

   i = high_memory >> PAGE_SHIFT;
   val->totalram = 0;
   val->freeram = 0;
   val->sharedram = 0;
   val->bufferram = buffermem;
   while (i-- > 0) {
       if (mem_map[i] & MAP_PAGE_RESERVED)
           continue;
       val->totalram++;
       if (!mem_map[i]) {
           val->freeram++;
           continue;
       }
       val->sharedram += mem_map[i]-1;
   }
   val->totalram <<= PAGE_SHIFT;
   val->freeram <<= PAGE_SHIFT;
   val->sharedram <<= PAGE_SHIFT;
   return;
}

/* This handles a generic mmap of a disk file */
void file_mmap_nopage(int error_code, struct vm_area_struct * area, unsigned long address)
{
   struct inode * inode = area->vm_inode;
   unsigned int block;
   unsigned long page;
   int nr[8];
   int i, j;
   int prot = area->vm_page_prot;

   address &= PAGE_MASK;
   block = address - area->vm_start + area->vm_offset;
   block >>= inode->i_sb->s_blocksize_bits;

   page = get_free_page(GFP_KERNEL);
   if (share_page(area, area->vm_task, inode, address, error_code, page)) {
       ++area->vm_task->min_flt;
       return;
   }

   ++area->vm_task->maj_flt;
   if (!page) {
       oom(current);
       put_page(area->vm_task, BAD_PAGE, address, PAGE_PRIVATE);
       return;
   }
   for (i=0, j=0; i< PAGE_SIZE ; j++, block++, i += inode->i_sb->s_blocksize)
       nr[j] = bmap(inode,block);
   if (error_code & PAGE_RW)
       prot |= PAGE_RW | PAGE_DIRTY;
   page = bread_page(page, inode->i_dev, nr, inode->i_sb->s_blocksize, prot);

   if (!(prot & PAGE_RW)) {
       if (share_page(area, area->vm_task, inode, address, error_code, page))
           return;
   }
   if (put_page(area->vm_task,page,address,prot))
       return;
   free_page(page);
   oom(current);
}

void file_mmap_free(struct vm_area_struct * area)
{
   if (area->vm_inode)
       iput(area->vm_inode);
#if 0
   if (area->vm_inode)
       printk("Free inode %x:%d (%d)\n",area->vm_inode->i_dev,
               area->vm_inode->i_ino, area->vm_inode->i_count);
#endif
}

/*
* Compare the contents of the mmap entries, and decide if we are allowed to
* share the pages
*/
int file_mmap_share(struct vm_area_struct * area1,
            struct vm_area_struct * area2,
            unsigned long address)
{
   if (area1->vm_inode != area2->vm_inode)
       return 0;
   if (area1->vm_start != area2->vm_start)
       return 0;
   if (area1->vm_end != area2->vm_end)
       return 0;
   if (area1->vm_offset != area2->vm_offset)
       return 0;
   if (area1->vm_page_prot != area2->vm_page_prot)
       return 0;
   return 1;
}

struct vm_operations_struct file_mmap = {
   NULL,           /* open */
   file_mmap_free,       /* close */
   file_mmap_nopage,   /* nopage */
   NULL,           /* wppage */
   file_mmap_share,   /* share */
   NULL,           /* unmap */
};

nodeathphoenix

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Linux 1.0 memory.c 学习日记

/* * linux/mm/memory.c * * Copyright (C) 1991, 1992 Linus Torvalds *//* * demand-loading started 01.12.91 - seems it is high on the list of * things wanted, and it should be easy to
复制链接

扫一扫