目录
1.进程地址描述符
struct task_struct{
struct mm_struct *mm, *active_mm;
}
linux内核使用 task_struct 描述一个进程(线程),在 sched.h 这个文件文件中定义,task_struct的 mm 成员变量是该进程地址空间的描述符。
struct mm_struct {
struct vm_area_struct *mmap; /* list of VMAs */
struct rb_root mm_rb;
u64 vmacache_seqnum; /* per-thread vmacache */
#ifdef CONFIG_MMU
unsigned long (*get_unmapped_area) (struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags);
#endif
unsigned long mmap_base; /* base of mmap area */
unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
unsigned long task_size; /* size of task vm space */
unsigned long highest_vm_end; /* highest vma end address */
pgd_t * pgd;
atomic_t mm_users; /* How many users with user space? */
atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
atomic_long_t nr_ptes; /* PTE page table pages */
int map_count; /* number of VMAs */
spinlock_t page_table_lock; /* Protects page tables and some counters */
struct rw_semaphore mmap_sem;
struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung
* together off init_mm.mmlist, and are protected
* by mmlist_lock
*/
unsigned long hiwater_rss; /* High-watermark of RSS usage */
unsigned long hiwater_vm; /* High-water virtual memory usage */
unsigned long total_vm; /* Total pages mapped */
unsigned long locked_vm; /* Pages that have PG_mlocked set */
unsigned long pinned_vm; /* Refcount permanently increased */
unsigned long shared_vm; /* Shared pages (files) */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */
unsigned long stack_vm; /* VM_GROWSUP/DOWN */
unsigned long def_flags;
unsigned long start_code, end_code, start_data, end_data;
unsigned long start_brk, brk, start_stack;
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
/*
* Special counters, in some configurations protected by the
* page_table_lock, in other configurations by being atomic.
*/
struct mm_rss_stat rss_stat;
struct linux_binfmt *binfmt;
cpumask_var_t cpu_vm_mask_var;
/* Architecture-specific MM context */
mm_context_t context;
unsigned long flags; /* Must use atomic bitops to access the bits */
struct core_state *core_state; /* coredumping support */
struct user_namespace *user_ns;
/* store ref to file /proc/<pid>/exe symlink points to */
struct file __rcu *exe_file;
struct uprobes_state uprobes_state;
};
其中详细描述地址段的描述符分别使用链表和红黑树的形式表达,方便遍历和查找,链表方式存储是以地址从小到大的方式链接的mmap指向第一个vma,每个 vm_area_struct 都表示一个拥有相同权限的连续的地址段。
struct vm_area_struct *mmap; /* list of VMAs 链表*/
struct rb_root mm_rb; /*红黑树表示*/
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */
unsigned long vm_start; /* Our start address within vm_mm. */
unsigned long vm_end; /* The first byte after our end address
within vm_mm. */
/* linked list of VM areas per task, sorted by address */
struct vm_area_struct *vm_next, *vm_prev;
struct rb_node vm_rb;
/*
* Largest free memory gap in bytes to the left of this VMA.
* Either between this VMA and vma->vm_prev, or between one of the
* VMAs below us in the VMA rbtree and its ->vm_prev. This helps
* get_unmapped_area find a free area of the right size.
*/
unsigned long rb_subtree_gap;
/* Second cache line starts here. */
struct mm_struct *vm_mm; /* The address space we belong to. */
pgprot_t vm_page_prot; /* Access permissions of this VMA. */
unsigned long vm_flags; /* Flags, see mm.h. */
/*
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree.
*/
struct {
struct rb_node rb;
unsigned long rb_subtree_last;
} shared;
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages. A MAP_SHARED vma
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
* or brk vma (with NULL file) can only be in an anon_vma list.
*/
struct list_head anon_vma_chain; /* Serialized by mmap_sem &
* page_table_lock */
struct anon_vma *anon_vma; /* Serialized by page_table_lock */
/* Function pointers to deal with this struct. */
const struct vm_operations_struct *vm_ops;
/* Information about our backing store: */
unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
units, *not* PAGE_CACHE_SIZE */
struct file * vm_file; /* File we map to (can be NULL). */
void * vm_private_data; /* was vm_pte (shared mem) */
#ifndef CONFIG_MMU
struct vm_region *vm_region; /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
#endif
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
};
还有描述页目录的基地址。
pgd_t * pgd; //一般是个整数 unsigned long int
2.进程地址区间查看
在proc文件系统里面可以查看进程,进入proc进程的id目录 cat maps即可查看 :
3.关于地址区间的查找
/*查找addr所在vm_area_struct 区间,或者第一个vm_area_struct 起始地址大于addr的vm_area_struct */
struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
/*返回第一个小于addr的vma指针,pprev参数存放指向先于addr的vma指针*/
struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
struct vm_area_struct **pprev);
/*返回第一个和指定区域相交的vma指针*/
static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
4.创建删除地址区间mmap与do_mmap
内核使用do_mmap创建一个新的线性地址(这里是线性地址,不一定是新建vma,因为新地址如果在以前的地址空间中属性相同相交的话就会融入到原有进程地址空间的vma里面):
/*file为0则是和文件无关的匿名映射,指定
file 文件
offest文件内偏移
len字节范围
addr可选参数,搜索空闲起始地址
port指定访问权限读写执行 mman.h中定义
flag指定类型改变映射行为 mman.h中定义
如果有无效参数或者映射失败返回负数
*/
unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
如果新创建的区间不能合并到原有区间,则从slab里面分配一个新的 vm_area_struct 结构体,使用vma_link()函数将新分配的区域添加到地址空间,更新total_vm,mmap,mm_rb
/*
* Helper for vma_adjust() in the split_vma insert case: insert a vma into the
* mm's list and rbtree. It has already been inserted into the interval tree.
*/
static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
{
struct vm_area_struct *prev;
struct rb_node **rb_link, *rb_parent;
if (find_vma_links(mm, vma->vm_start, vma->vm_end,
&prev, &rb_link, &rb_parent))
BUG();
__vma_link(mm, vma, prev, rb_link, rb_parent);
mm->map_count++;
}
系统应用程序通过mmap系统调用,调用do_mmap函数,系统调用mmap调用在新的linux内核已经修改为mmap2函数,mmap映射函数使用方式,可以使用man指令查看
asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff);
使用munmap调用系统调用do_munmap删除地址区间:
int munmap(void *addr, size_t length);
long sys_munmap(unsigned long addr, size_t len);//返回当前进程 mm给do_munmap
int do_munmap(struct mm_struct *, unsigned long, size_t);
5.在字符设备中实现mmap调用
在字符设备的file_operation函数集中包含mmap指针,实现该函数后,再调用字符设备时可以使用mmap映射:
int test_mmap(struct file *f, struct vm_area_struct *vma)
{
int ret=0;
struct dev_test*p = f->private_data;
printk("test_mmap virt_to_phys(p->buffer) = %0x\n",virt_to_phys(p->buffer));
long addr = virt_to_phys(p->buffer) >> PAGE_SHIFT ;
ret = remap_pfn_range(vma, vma->vm_start,
addr,
BUFFER_SIZE, vma->vm_page_prot);
return ret;
}
这里主要是使用 remap_pfn_range函数:
int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
pgd_t *pgd;
unsigned long next;
unsigned long end = addr + PAGE_ALIGN(size);
struct mm_struct *mm = vma->vm_mm;
int err;
if (is_cow_mapping(vma->vm_flags)) {
if (addr != vma->vm_start || end != vma->vm_end)
return -EINVAL;
vma->vm_pgoff = pfn;
}
err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
if (err)
return -EINVAL;
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
BUG_ON(addr >= end);
pfn -= addr >> PAGE_SHIFT;
pgd = pgd_offset(mm, addr);
flush_cache_range(vma, addr, end);
do {
next = pgd_addr_end(addr, end);
err = remap_pud_range(mm, pgd, addr, next,
pfn + (addr >> PAGE_SHIFT), prot);
if (err)
break;
} while (pgd++, addr = next, addr != end);
if (err)
untrack_pfn(vma, pfn, PAGE_ALIGN(size));
return err;
}
EXPORT_SYMBOL(remap_pfn_range);
这里注意:unsigned long end = addr + PAGE_ALIGN(size);这个强制结束地址是整页+addr所以实际映射地址可能会比size大一些,而且映射的时候肯定是整数于整页大小。
参考资料:《linux内核设计与实现》