系统调用mmap()通过映射一个普通文件实现共享内存。系统V则是通过映射特殊文件系统shm中的文件实现进程间的共享内存通信。也就是说,每个共享内存区域对应特殊文件系统shm中的一个文件(这是通过shmid_kernel结构联系起来的)。
struct shmid_kernel /* private to the kernel */
{
struct kern_ipc_perm shm_perm;
struct file * shm_file; //关键是通过它来达到共享。
int id;
unsigned long shm_nattch;
unsigned long shm_segsz;
time_t shm_atim;
time_t shm_dtim;
time_t shm_ctim;
pid_t shm_cprid;
pid_t shm_lprid;
};
sys_shmget创建一个shm对象,保存在全局数组shm_ids中,同进创建一个shm文件。
sys_shmat通过调用do_mmap在当前进程中创建一段线性地址vm_area
struct vm_area_struct {
struct mm_struct * vm_mm; /* The address space we belong to. */
unsigned long vm_start; 线性地址的开始地址
unsigned long vm_end; 线性地址的结束地址
......
struct file *vm_file; 与其关联的文件(对共享内存来说,对应shmid_kernel->shm_file)
}
比如进程A调用shmat(1, NULL, 0, NULL)映射的共享内存线性地址为vm_A, 进程B调用shmat(1, NULL, 0, NULL)映射的共享内存线性地址为vm_B, 当进程A第一次访问VM_A中的地址是,会产生一个缺页中断(因为物理地址还没映射),内核会调用do_no_page为其映射一页物理地址,下面我们看看进程B是如何把它映射到同一页物理地址的:
static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long address, int write_access, pte_t *page_table)
{
struct page * new_page;
pte_t entry;
if (!vma->vm_ops || !vma->vm_ops->nopage)
return do_anonymous_page(mm, vma, page_table, write_access, address);
spin_unlock(&mm->page_table_lock);
new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0); // 映射一页物理地址,
....................................
}
static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp)
{
struct address_space *mapping = inode->i_mapping; //shm_file对应的地址空间
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo;
struct page *filepage = *pagep;
struct page *swappage;
swp_entry_t *entry;
swp_entry_t swap;
int error = 0;
if (idx >= SHMEM_MAX_INDEX) {
error = -EFBIG;
goto failed;
}
/*
* Normally, filepage is NULL on entry, and either found
* uptodate immediately, or allocated and zeroed, or read
* in under swappage, which is then assigned to filepage.
* But shmem_readpage and shmem_prepare_write pass in a locked
* filepage, which may be found not uptodate by other callers
* too, and may need to be copied from the swappage read in.
*/
repeat:
if (!filepage)
filepage = find_lock_page(mapping, idx); //通过vm中页号查找file->i_node->mapping是否有同样的页
if (filepage && Page_Uptodate(filepage)) //找到就返回,没有则映射一个新页
goto done;
.............................
}
mapping的定义:
struct address_space {
struct list_head clean_pages; /* list of clean pages */
struct list_head dirty_pages; /* list of dirty pages */
struct list_head locked_pages; /* list of locked pages */
unsigned long nrpages; /* number of total pages */
struct address_space_operations *a_ops; /* methods */
struct inode *host; /* owner: inode, block_device */
struct vm_area_struct *i_mmap; /* list of private mappings */
struct vm_area_struct *i_mmap_shared; /* list of shared mappings */
spinlock_t i_shared_lock; /* and spinlock protecting it */
int gfp_mask; /* how to allocate the pages */
};
clean_pages、dirty_pages、locked_pages是当前地址空间已映射的page,find_lock_page(mapping, idx)通过查找这些page确认是否已映射物理内存,查找是通过page->index与idx的对应关系来的。
typedef struct page {
struct list_head list; /* ->mapping has some page lists. */
struct address_space *mapping; /* 当前页属于那个地址空间 */
unsigned long index; /* 当前页在地址空间的相对位置. */
struct page *next_hash; /* Next page sharing our hash bucket in
the pagecache hash table. */
atomic_t count; /* Usage count, see below. */
unsigned long flags; /* atomic flags, some possibly
updated asynchronously */
struct list_head lru; /* Pageout list, eg. active_list;
protected by pagemap_lru_lock !! */
struct page **pprev_hash; /* Complement to *next_hash. */
struct buffer_head * buffers; /* Buffer maps us to a disk block. */