mmap原理
mmap完成的是将物理内存映射到用户态虚拟内存,中间不需要任何的内存拷贝,文件映射实质上就是在创建内核文件的时候,给文件挂上一个mmap钩子,下面将讲解mmap系统调用是如何调用到文件mmap钩子函数。
首先是系统调用,由于mmap是对物理内存对映射,因此需要准从MMU在CPU架构上对差异,这里选择了arm64架构对函数实现,系统调用函数入口在arch/arm64/kernel/sys.c中,不同架构实现方式基本相同,对于不同对内核版本,有不同对实现方式,对于4.X内核,系统调用内部调用对是一个隐藏系统调用函数sys_mmap_pgoff,实质上就是SYSCALL_DEFINE6(mmap_pgoff…),逻辑上没啥差异,新版本内核也有该函数,调用对也是ksys_mmap_pgoff(),下面主要以5.12.1版本对内核进行讲解:
/*
* @addr:表示用户空间传入的地址,用于映射到指定用户空间内存位置,一般情况下用
* NULL,让用户空间自适应映射;
* @len:映射对内存大小,是需要映射的物理内存大小,在系统调用过后,该大小做了页
* 面对齐操作,因为mmap映射对内存大小必须是整页映射。
* @prot:表示映射的保护权限,有以下四种权限:
* PROT_EXEC:映射页面可以为可执行的;
* PROT_READ:映射页面是可读的;
* PROT_WRITE:映射页面是可写的;
* PROT_NONE:映射页面是不可访问的;
* @flags:表示映射的标志位,决定了映射区域对其他(映射了相同区域的)进程是否可
* 见,并决定了是否将映射更新到基础文件,用得比较多的有MAP_SHARED、
* MAP_PRIVATE、MAP_HUGETLB,详细含义参考mmap函数帮助说明;
* @fd:open得到的文件描述符;
* @off:表示映射的页面偏移,一般情况下,该值为0,需要注意的是,off的大小必须是
* 内存页面的整数倍,如系统采用4K页面,则off的值为0、4、8...
* @return:mmap返回一个指针,指向映射的内存区域,如果映射失败,则返回
* MAP_FAILED(-1),同时将错误码保存在errno中。
*/
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
unsigned long, fd, unsigned long, off)
{
if (offset_in_page(off) != 0)
return -EINVAL;
return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
}
unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
{
struct file *file = NULL;
unsigned long retval;
if (!(flags & MAP_ANONYMOUS)) {//不使用文件映射
audit_mmap_fd(fd, flags);
file = fget(fd);
if (!file)
return -EBADF;
if (is_file_hugepages(file)) {//巨页映射
len = ALIGN(len, huge_page_size(hstate_file(file)));
} else if (unlikely(flags & MAP_HUGETLB)) {
retval = -EINVAL;
goto out_fput;
}
} else if (flags & MAP_HUGETLB) {//巨页映射
struct user_struct *user = NULL;
struct hstate *hs;
hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
if (!hs)
return -EINVAL;
len = ALIGN(len, huge_page_size(hs));
/*
* VM_NORESERVE is used because the reservations will be
* taken when vm_ops->mmap() is called
* A dummy user value is used because we are not locking
* memory so no accounting is necessary
*/
file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
VM_NORESERVE,
&user, HUGETLB_ANONHUGE_INODE,
(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
if (IS_ERR(file))
return PTR_ERR(file);
}
flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);//忽略这两个标志位
retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
out_fput:
if (file)
fput(file);
return retval;
}
unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
unsigned long flag, unsigned long pgoff)
{
unsigned long ret;
struct mm_struct *mm = current->mm;
unsigned long populate;
LIST_HEAD(uf);
/*
* 检查映射文件的安全性,需要安全钩子函数支持mmap_file的检查,并通过
* ima_file_mmap来完成进程文件测量的收集与存储。
*/
ret = security_mmap_file(file, prot, flag);
if (!ret) {
if (mmap_write_lock_killable(mm))
return -EINTR;
//执行mmap过程
ret = do_mmap(file, addr, len, prot, flag, pgoff, &populate,
&uf);
mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
if (populate)
mm_populate(ret, populate);
}
return ret;
}
unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
unsigned long flags, unsigned long pgoff,
unsigned long *populate, struct list_head *uf)
{
struct mm_struct *mm = current->mm;
vm_flags_t vm_flags;
int pkey = 0;
*populate = 0;
if (!len)
return -EINVAL;
/*
* Does the application expect PROT_READ to imply PROT_EXEC?
*
* (the exception is when the underlying filesystem is noexec
* mounted, in which case we dont add PROT_EXEC.)
*/
if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
if (!(file && path_noexec(&file->f_path)))
prot |= PROT_EXEC;
/* force arch specific MAP_FIXED handling in get_unmapped_area */
if (flags & MAP_FIXED_NOREPLACE)
flags |= MAP_FIXED;
if (!(flags & MAP_FIXED))
addr = round_hint_to_min(addr);
/* Careful about overflows.. */
len = PAGE_ALIGN(len);//页面对齐,防止内存溢出
if (!len)
return -ENOMEM;
/* offset overflow? */
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)//防止偏移后内存溢出
return -EOVERFLOW;
/* Too many mappings? */
if (mm->map_count > sysctl_max_map_count)//每次映射都会对映射区域的映射计数器增加一次,防止多次映射
return -ENOMEM;
/* Obtain the address to map to. we verify (or select) it and ensure
* that it represents a valid section of the address space.
*/
addr = get_unmapped_area(file, addr, len, pgoff, flags);//获取没有映射的区域
if (IS_ERR_VALUE(addr))
return addr;
if (flags & MAP_FIXED_NOREPLACE) {
struct vm_area_struct *vma = find_vma(mm, addr);
if (vma && vma->vm_start < addr + len)
return -EEXIST;
}
if (prot == PROT_EXEC) {
pkey = execute_only_pkey(mm);
if (pkey < 0)
pkey = 0;
}
/* Do simple checking here so the lower-level routines won't have
* to. we assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_LOCKED)
if (!can_do_mlock())
return -EPERM;
if (mlock_future_check(mm, vm_flags, len))
return -EAGAIN;
if (file) {
struct inode *inode = file_inode(file);
unsigned long flags_mask;
if (!file_mmap_ok(file, inode, pgoff, len))//根据文件节点和文件,判断是否可被映射
return -EOVERFLOW;
flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
switch (flags & MAP_TYPE) {
case MAP_SHARED:
/*
* Force use of MAP_SHARED_VALIDATE with non-legacy
* flags. E.g. MAP_SYNC is dangerous to use with
* MAP_SHARED as you don't know which consistency model
* you will get. We silently ignore unsupported flags
* with MAP_SHARED to preserve backward compatibility.
*/
flags &= LEGACY_MAP_MASK;
fallthrough;
case MAP_SHARED_VALIDATE:
...
case MAP_PRIVATE:
...
break;
default:
return -EINVAL;
}
} else {//如果文件对应的file数据结构不存在,则只支持MAP_SHARED和MAP_PRIVATE方式的映射
switch (flags & MAP_TYPE) {
case MAP_SHARED:
if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
return -EINVAL;
/*
* Ignore pgoff.
*/
pgoff = 0;
vm_flags |= VM_SHARED | VM_MAYSHARE;
break;
case MAP_PRIVATE:
/*
* Set pgoff according to addr for anon_vma.
*/
pgoff = addr >> PAGE_SHIFT;
break;
default:
return -EINVAL;
}
}
/*
* Set 'VM_NORESERVE' if we should not account for the
* memory use of this mapping.
*/
if (flags & MAP_NORESERVE) {
/* We honor MAP_NORESERVE if allowed to overcommit */
if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
vm_flags |= VM_NORESERVE;
/* hugetlb applies strict overcommit unless MAP_NORESERVE */
if (file && is_file_hugepages(file))
vm_flags |= VM_NORESERVE;
}
addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);//这里实现了内存区域的映射
if (!IS_ERR_VALUE(addr) &&
((vm_flags & VM_LOCKED) ||
(flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
*populate = len;
return addr;
}
/*
* 该函数主要是创建映射区域对应的用户态虚拟内存空间,即创建一个struct
* vm_area_struct变量,存放映射区域的首地址、映射长度、映射标志位等,同时在映射
* 文件存在struct file数据的情况下,还会找到文件挂的mmap钩子函数,实现自定义的
* 映射过程,这里就可以将内核中创建的内存对应的物理内存映射到用户空间。
*/
unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
struct list_head *uf)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev, *merge;
int error;
struct rb_node **rb_link, *rb_parent;
unsigned long charged = 0;
/* Check against address space limit. */
if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
...
}
/* 清除旧映射, 设置prev、rb_link、rb_parent以及uf */
if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
return -ENOMEM;
/*
* Private writable mapping: check memory availability
*/
if (accountable_mapping(file, vm_flags)) {
..
}
/*
* 判断是否能够扩展旧的映射(已经merged了的映射区域)
*/
vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
if (vma)
goto out;
/*
* Determine the object being mapped and call the appropriate
* specific mapper. the address has already been validated, but
* not unmapped, but the maps are removed from the list.
*/
vma = vm_area_alloc(mm);
if (!vma) {
error = -ENOMEM;
goto unacct_error;
}
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_flags = vm_flags;
vma->vm_page_prot = vm_get_page_prot(vm_flags);
vma->vm_pgoff = pgoff;
if (file) {
if (vm_flags & VM_DENYWRITE) {
error = deny_write_access(file);
if (error)
goto free_vma;
}
if (vm_flags & VM_SHARED) {
error = mapping_map_writable(file->f_mapping);
if (error)
goto allow_write_and_free_vma;
}
/* ->mmap() can change vma->vm_file, but must guarantee that
* vma_link() below can deny write-access if VM_DENYWRITE is set
* and map writably if VM_SHARED is set. This usually means the
* new file must not have been exposed to user-space, yet.
*/
vma->vm_file = get_file(file);//获取映射文件的数据
/*
* 调用文件挂的mmap钩子,到这里就会进入到struct file_operations数据
* 结构中的mmap钩子,完成自定义的映射过程,具体实现参考用例
*/
error = call_mmap(file, vma);
if (error)
goto unmap_and_free_vma;
/* Can addr have changed??
*
* Answer: Yes, several device drivers can do it in their
* f_op->mmap method. -DaveM
* Bug: If addr is changed, prev, rb_link, rb_parent should
* be updated for vma_link()
*/
WARN_ON_ONCE(addr != vma->vm_start);
addr = vma->vm_start;
/* If vm_flags changed after call_mmap(), we should try merge vma again
* as we may succeed this time.
*/
if (unlikely(vm_flags != vma->vm_flags && prev)) {
merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX);
if (merge) {
/* ->mmap() can change vma->vm_file and fput the original file. So
* fput the vma->vm_file here or we would add an extra fput for file
* and cause general protection fault ultimately.
*/
fput(vma->vm_file);
vm_area_free(vma);
vma = merge;
/* Update vm_flags to pick up the change. */
vm_flags = vma->vm_flags;
goto unmap_writable;
}
}
vm_flags = vma->vm_flags;
} else if (vm_flags & VM_SHARED) {
error = shmem_zero_setup(vma);
if (error)
goto free_vma;
} else {
vma_set_anonymous(vma);
}
/* Allow architectures to sanity-check the vm_flags */
if (!arch_validate_flags(vma->vm_flags)) {
error = -EINVAL;
if (file)
goto unmap_and_free_vma;
else
goto free_vma;
}
vma_link(mm, vma, prev, rb_link, rb_parent);
/* Once vma denies write, undo our temporary denial count */
if (file) {
unmap_writable:
if (vm_flags & VM_SHARED)
mapping_unmap_writable(file->f_mapping);
if (vm_flags & VM_DENYWRITE)
allow_write_access(file);
}
file = vma->vm_file;
out:
perf_event_mmap(vma);
vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
is_vm_hugetlb_page(vma) ||
vma == get_gate_vma(current->mm))
vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
else
mm->locked_vm += (len >> PAGE_SHIFT);
}
if (file)
uprobe_mmap(vma);
/*
* New (or expanded) vma always get soft dirty status.
* Otherwise user-space soft-dirty page tracker won't
* be able to distinguish situation when vma area unmapped,
* then new mapped in-place (which must be aimed as
* a completely new data area).
*/
vma->vm_flags |= VM_SOFTDIRTY;
vma_set_page_prot(vma);
return addr;
unmap_and_free_vma:
fput(vma->vm_file);
vma->vm_file = NULL;
/* Undo any partial mapping done by a device driver. */
unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
charged = 0;
if (vm_flags & VM_SHARED)
mapping_unmap_writable(file->f_mapping);
allow_write_and_free_vma:
if (vm_flags & VM_DENYWRITE)
allow_write_access(file);
free_vma:
vm_area_free(vma);
unacct_error:
if (charged)
vm_unacct_memory(charged);
return error;
}
mmap映射类型
上述先入为主,先讲解了mmap的内核实现原理及其过程,由此可以看出,mmap映射是否和文件关联,即当file参数为空时就表示不关联文件,当file数据存在时,就表示关联上了文件,由此可以将Linux内核中的映射分为匿名映射(不关联文件的映射)和文件映射(需要内核文件数据结构(struct file_operations)挂上mmap钩子,自定义逻辑),详述如下:
匿名映射
私有匿名映射
私有匿名映射通常用于内存分配,当open文件的时候,返回的fd为-1,且flags为MAP_ANONYMOUS | MAP_PRIVATE时,创建的mmap映射就是私有匿名映射,私有匿名映射的最常见用于是在glibc分配大块内存时,通常情况下,malloc分配内存是先查找内存中可用的部分(该部分不一定是连续的),当不够的情况下,会通过伙伴系统brk来分配剩余的,当分配的内存大于128KB(即MMAP_THREADHOLD)时,glibc会使用mmap代替默认的brk来分配内存,需要注意的是,小于128字节时,在第一次读写之前,用户态分配的内存只有虚拟内存,还不存在物理内存,当第一次读写之后,才会通过伙伴系统分配对应的物理内存,但是当大于128KB时,由于不再使用brk分配内存,而是通过mmap分配内存,此时,会对应一片物理内存。
共享匿名映射
匿名映射通常用于进程间共享内存,当open文件的时候,返回的fd为-1,且flags为MAP_ANONYMOUS | MAP_SHARED时,创建的mmap映射就是共享匿名映射。共享匿名映射能够让相关进程共享一块内存,通常用于父子进程间通信,创建共享匿名映射的方式有以下两种:
(1)若Open打开的设备不是/dev/zero,且此时满足共享匿名映射,则在do_mmap()->mmap_region()中,会通过shmem_zero_setup()函数来完成映射,这里实则上还是通过/dev/zero这个特殊的设备文件来完成最终的映射,该方式映射出来的是一块可读写的内存区域。shmem_zero_setup函数实现如下:
/*
* 在这里的shmem_mmap处理处理内存页面映射逻辑,还将一个全局struct
* vm_operation_struct 变量shmem_vm_ops挂在虚拟内存数据结构的vm_ops(struct
* vm_area_struct成员)上,同时在shmem_zero_setup()函数中也会挂上该钩子函数
* 集,如果未直接通过dev/zero文件映射,则这里的钩子函数集不会有什么用处。
*/
static const struct file_operations shmem_file_operations = {
.mmap = shmem_mmap,
.get_unmapped_area = shmem_get_unmapped_area,
#ifdef CONFIG_TMPFS
.llseek = shmem_file_llseek,
.read_iter = shmem_file_read_iter,
.write_iter = generic_file_write_iter,
.fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = shmem_fallocate,
#endif
};
static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size,
unsigned long flags, unsigned int i_flags)
{
struct inode *inode;
struct file *res;
if (IS_ERR(mnt))
return ERR_CAST(mnt);
if (size < 0 || size > MAX_LFS_FILESIZE)
return ERR_PTR(-EINVAL);
if (shmem_acct_size(flags, size))
return ERR_PTR(-ENOMEM);
//获取挂载节点
inode = shmem_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0,
flags);
if (unlikely(!inode)) {
shmem_unacct_size(flags, size);
return ERR_PTR(-ENOSPC);
}
inode->i_flags |= i_flags;
inode->i_size = size;
clear_nlink(inode); /* It is unlinked */
res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
if (!IS_ERR(res))
res = alloc_file_pseudo(inode, mnt, name, O_RDWR,
&shmem_file_operations);//根据挂载节点获取RW权限的文件,且文件的钩子为shmem_file_operations
if (IS_ERR(res))
iput(inode);
return res;
}
/*
* 这里的shm_mnt是一个全局struct vfsmount数据结构变量,用于记录虚拟文件系统的
* 挂载信息,声明在include/linux/mount.h中,包括了mount的根节点、mount的超级
* 块、mount用户命名空间等。
*/
struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
{
return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE);
}
int shmem_zero_setup(struct vm_area_struct *vma)
{
struct file *file;
loff_t size = vma->vm_end - vma->vm_start;
/*
* Cloning a new file under mmap_lock leads to a lock ordering conflict
* between XFS directory reading and selinux: since this file is only
* accessible to the user through its mapping, use S_PRIVATE flag to
* bypass file security, in the same way as shmem_kernel_file_setup().
*/
file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags);
if (IS_ERR(file))
return PTR_ERR(file);
if (vma->vm_file)
fput(vma->vm_file);
vma->vm_file = file;
vma->vm_ops = &shmem_vm_ops;
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
(vma->vm_end & HPAGE_PMD_MASK)) {
khugepaged_enter(vma, vma->vm_flags);
}
return 0;
}
文件映射
文件映射实则上就是内核自定义了一个实现struct file_operations钩子的文件,且挂上了mmap。
私有文件映射
共享文件映射
mmap文件映射用例
内核态代码
#include <cgel.h>
#include <linux/list.h>
#include <linux/sched.h>
#include <linux/init_task.h>
#include <linux/string.h>
#include <linux/io.h>
#include <asm/uaccess.h>
static int task_num = 0;
static int old_task_num = 0;
static struct task_info *g_taskinfo;
static int get_bits(int num, unsigned int base)
{
int merchant;
int count = 0;
merchant = num;
while (merchant >= base) {
merchant /= base;
count ++;
}
return count + 1;
}
#define NUM_TASKINFO get_bits(task_num, 10)
static int taskinfo_mmap(struct file *file, struct vm_area_struct *vma)
{
unsigned long taskinfo_pages;
unsigned long size;
ssize_t mmap_size;
if (!g_taskinfo)
return -EINVAL;
mmap_size = task_num * sizeof(struct task_info);
taskinfo_pages = virt_to_phys(g_taskinfo);
size = (unsigned long)(vma->vm_end - vma->vm_start);
if ((size >> PAGE_SHIFT) > mmap_size) {
pr_err("invalid mem range, size %ld, task size :%ld\n",
size, mmap_size);
return -ERANGE;
}
return remap_pfn_range(vma,
vma->vm_start,
taskinfo_pages >> PAGE_SHIFT,
size,
vma->vm_page_prot);
}
static ssize_t get_taskinfo(struct file *fs,
char __user *buf, size_t len, loff_t *lf)
{
int count = 0;
int ret = 0;
struct task_info *taskinfo;
struct task_struct *task;
if (likely(access_ok(VERIFY_READ, &init_task,
sizeof(init_task)))) {
pr_err("init_task is in user space\n");
return -ERANGE;
}
for_each_process(task) {
if (likely(access_ok(VERIFY_READ, task,
sizeof(*task)))) {
pr_err("task is in the user-mem space\n");
continue;
}
if (task) {
taskinfo = &task->taskinfo;
memcpy(g_taskinfo + count * sizeof(struct task_info),
taskinfo, sizeof(struct task_info));
}
count ++;
if (count == task_num)
break;
}
len = get_bits(count, 10);
char statistic[len + 1];
memset(statistic, 0, len + 1);
snprintf(statistic, len + 1, "%d", count);
if ((ret = copy_to_user(buf, statistic, len)) < 0) {
pr_err("Failed to copy to user, ret %d\n",
ret);
return ret;
}
return len;
}
static ssize_t set_taskinfo(struct file *fs,
const char __user *buf, size_t len, loff_t *lf)
{
char num[len];
char *end;
int ret;
if (!buf) {
return -EINVAL;
}
memset(num, 0, len);
if((ret = copy_from_user(num, buf, len)) < 0) {
return ret;
}
task_num = simple_strtoll(num, &end, 10);
if (old_task_num != task_num) {
g_taskinfo = kzalloc(task_num * sizeof(struct task_info),
GFP_USER);
if (!g_taskinfo)
return -ENOMEM;
}
old_task_num = task_num;
return len;
}
static int taskinfo_close(struct inode *node, struct file *fs)
{
if (g_taskinfo)
kfree(g_taskinfo);
return 0;
}
static struct file_operations opts = {
.owner = THIS_MODULE,
.open = simple_open,
.read = get_taskinfo,
.write = set_taskinfo,
.mmap = taskinfo_mmap,
.release = taskinfo_close,
};
用户态代码
编译Makefile
CROSS_COMPILE ?=
ARCH ?=
CC = ${CROSS_COMPILE}gcc
CFLAGS += -g
SRC += taskinfo.c
OUT = taskinfo
all:
$(CC) $(CFLAGS) $(SRC) -o $(OUT)
clean:
rm -fr *.o
源码实现如下
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
struct taskinfo {
int pid;
int tgid;
};
static int get_bits(ssize_t num, unsigned int base)
{
int merchant;
int count = 0;
merchant = num;
while (merchant >= base) {
merchant /= base;
count ++;
}
return count + 1;
}
int main (int argc, char ** argv)
{
struct taskinfo *task;
char *file;
int num;
int fd;
int i;
int count = 0;
int task_num = 0;
ssize_t ret;
char *addr;
char *buf;
char *read_buf;
if (argc > 2) {
file = argv[1];
count = strlen(argv[2]);
num = atoi(argv[2]);
}
else {
perror("invalid arg\n");
return -1;
}
fd = open(file, O_RDWR);
if (fd < 0) {
printf("can't open %s\n", file);
return -2;
}
count = get_bits(num, 10);
buf = (char *)malloc(count);
snprintf(buf, count, "%d", num);
read_buf = (char *)malloc(count);
if ((ret = write(fd, buf, count)) < 0) {
perror("write failed\n");
return ret;
}
sleep(1);
if ((ret = read(fd, read_buf, count)) < 0) {
perror("read failed\n");
return ret;
}
task_num = atoi(read_buf);
task = (struct taskinfo *)mmap(NULL, num * sizeof(struct taskinfo),
PROT_READ, MAP_SHARED, fd, 0);
if (addr == MAP_FAILED) {
perror("mmap failed\n");
return -3;
}
count = 0;
printf("pid\t\ttgid\n");
for (i = 0; i < num; i ++) {
if (!task) {
break;
}
printf("%d\t\t%d\n", task->pid, task->tgid);
task = task + count * sizeof(struct taskinfo);
count ++;
if (count == task_num)
break;
}
sleep(1);
printf("start munmap\n");
munmap(task, num * sizeof(*task));
sleep(1);
printf("start close\n");
close(fd);
sleep(1);
return 0;
}