在上一篇文章详细介绍了Linux内存映射之mmap之后,这篇文章主要描述mmap的具体实现,我的编译环境是2.6.35.6内核版本。在我的例子中,分别采用了remap_pfn_range与nopage两种建立内存映射方式。
内核驱动程序:
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/kernel.h> /* printk() */
#include <linux/slab.h> /* kmalloc() */
#include <linux/fs.h> /* everything... */
#include <linux/errno.h> /* error codes */
#include <linux/types.h> /* size_t */
#include <linux/mm.h>
#include <linux/kdev_t.h>
#include <asm/page.h>
#include <linux/cdev.h>
#include <linux/device.h>
static int simple_major = 0;
module_param(simple_major, int, 0);
static int simple_open(struct inode *inode, struct file *filp)
{
return 0;
}
static int simple_release(struct inode *inode, struct file *filp)
{
return 0;
}
void simple_vma_open(struct vm_area_struct *vma)
{
printk(KERN_NOTICE "Simple VMA open, virt %lx, phys %lx\n",
vma->vm_start, vma->vm_pgoff << PAGE_SHIFT);
}
void simple_vma_close(struct vm_area_struct *vma)
{
printk(KERN_NOTICE "Simple VMA close.\n");
}
static struct vm_operations_struct simple_remap_vm_ops = {
.open = simple_vma_open,
.close = simple_vma_close,
};
static int simple_remap_mmap(struct file *filp, struct vm_area_struct *vma)
{
if(remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, vma->vm_page_prot)){
return -EAGAIN;
}
vma->vm_ops = &simple_remap_vm_ops;
simple_vma_open(vma);
return 0;
}
/************************************************************
当访问的页面不在内存,即发生缺页中断时,nopage就会被调用。
这是因为,当发生缺页中断时,系统会经过如下处理过程:
1.找到缺页的虚拟地址所在的VMA。
2.如果必要,分配中间页目录表和页表。
3.如果页表项对应的物理页面不存在,则调用nopage函数,它返回物理页面的页描述符。
4.将物理页面的地址填充到页表中。
**********************************************************************/
int simple_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *pageptr;
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
unsigned long physaddr = (unsigned long)vmf->virtual_address - vma->vm_start+offset;
unsigned long pageframe = physaddr >> PAGE_SHIFT;
printk (KERN_NOTICE "---- Nopage, off %lx phys %lx\n", offset, physaddr);
printk (KERN_NOTICE "VA is %p\n", __va (physaddr));
printk (KERN_NOTICE "Page at %p\n", virt_to_page (__va (physaddr)));
if(!pfn_valid(pageframe))
return 0;
pageptr = pfn_to_page(pageframe);
printk (KERN_NOTICE "page->index = %ld mapping %p\n", pageptr->index, pageptr->mapping);
printk (KERN_NOTICE "Page frame %ld\n", pageframe);
get_page(pageptr);
vmf->page = pageptr;
return 0;
}
static struct vm_operations_struct simple_nopage_vm_ops = {
.open = simple_vma_open,
.close = simple_vma_close,
.fault = simple_vma_fault,
};
static int simple_nopage_mmap(struct file *filp, struct vm_area_struct *vma)
{
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
if(offset >= __pa(high_memory) || (filp->f_flags & O_SYNC))
vma->vm_flags |= VM_IO;
vma->vm_flags |= VM_RESERVED;
vma->vm_ops = &simple_nopage_vm_ops;
simple_vma_open(vma);
return 0;
}
static void simple_setup_cdev(struct cdev *dev, int minor, struct file_operations *fops)
{
int err, devno = MKDEV(simple_major, minor);
cdev_init(dev, fops);
dev->owner = THIS_MODULE;
dev->ops= fops;
err = cdev_add(dev, devno, 1);
if(err){
printk (KERN_NOTICE "Error %d adding simple%d", err, minor);
}
}
/*************************************************
当用户调用mmap系统调用时,内核会进行如下处理:
1.在进程的虚拟空间查找一块VMA.
2.将这块VMA进行映射.
3.如果设备驱动程序中定义了mmap函数,则调用它.
4.将这个VMA插入到进程的VMA链表中.
内存映射工作大部分由内核完成,驱动程序中的mmap函数只需要为该地址范围建立合适的页表,
并将vma->vm_ops替换为一系列的新操作就可以了。
有两种建立页表的方法,一是使用remap_pfn_range函数一次全部建立,
或者通过nopage方法每次建立一个页表。
**************************************************/
/* Device 0 uses remap_pfn_range */
static struct file_operations simple_remap_ops = {
.owner = THIS_MODULE,
.open = simple_open,
.release = simple_release,
.mmap = simple_remap_mmap,
};
/* Device 1 uses nopage */
static struct file_operations simple_nopage_ops = {
.owner = THIS_MODULE,
.open = simple_open,
.release = simple_release,
.mmap = simple_nopage_mmap,
};
#define MAX_SIMPLE_DEV 2
static struct cdev SimpleDevs[MAX_SIMPLE_DEV];
static int simple_init(void)
{
int result;
dev_t dev = MKDEV(simple_major, 0);
if(simple_major){
result = register_chrdev_region(dev, 2, "simple");
}else{
result = alloc_chrdev_region(&dev, 0, 2, "simple");
simple_major = MAJOR(dev);
}
if(result < 0){
printk(KERN_WARNING "simple: unable to get major %d\n",simple_major);
return result;
}
if(simple_major == 0)
simple_major = result;
simple_setup_cdev(SimpleDevs, 0, &simple_remap_ops);
simple_setup_cdev(SimpleDevs+1, 1, &simple_nopage_ops);
return 0;
}
static void simple_cleanup(void)
{
cdev_del(SimpleDevs);
cdev_del(SimpleDevs+1);
unregister_chrdev_region(MKDEV(simple_major, 0), 2);
}
MODULE_AUTHOR("Fang Xieyun");
MODULE_LICENSE("GPL");
module_init(simple_init);
module_exit(simple_cleanup);
测试程序:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
int main(void )
{
int fd;
char *start;
char *buf;
fd = open("/dev/simple",O_RDWR);
buf = (char *)malloc(100);
memset(buf, 0, 100);
start = mmap(NULL,100,PROT_READ|PROT_WRITE,MAP_SHARED,fd,0);
strcpy(buf,start);
sleep(1);
printf("buf = %s\n",buf);
strcpy(start,"Buf Is Not Null!");
memset(buf,0,100);
strcpy(buf,start);
sleep(1);
printf("buf 2 = %s\n",buf);
munmap(start,100);
free(buf);
close(fd);
return 0;
}