arm64采用4级页表。分别是pgd->pud->pmd->pte,每个页表占64bit(8Byte),因此一个物理的4K页表可以存放512个也表项。如果采用pmd最为pte的话,就简化成三级,则每个pmd可以表示512*4KB =2MB的size。
因此这设置CONFIG_ARM64_4K_PAGES=y的情况下打开CONFIG_HUGETLB_PAGE和CONFIG_HUGETLBFS的情况下,就可以通过hugetlbfs来将某一个目录作为hugepage来访问
要使用hugepage,必须mount hugetlbfs。
mount none /root/hugepagetest -t hugetlbfs
然后上层就可以通过下面的sample code来访问test
#include <fcntl.h>
#include <sys/mman.h>
#include <errno.h>
#define MAP_LENGTH (10*1024*1024)
int main()
{
int fd;
void * addr;
/* create a file in hugetlb fs */
fd = open("/root/hugepagetest/test", O_CREAT | O_RDWR);
if(fd < 0){
perror("Err: ");
return -1;
}
/* map the file into address space of current application process */
addr = mmap(0, MAP_LENGTH, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if(addr == MAP_FAILED){
perror("Err: ");
close(fd);
unlink("/root/hugepagetest/test");
return -1;
}
/* from now on, you can store application data on huage pages via addr */
munmap(addr, MAP_LENGTH);
close(fd);
unlink("/root/hugepagetest/test");
return 0;
}
这样可以在kernel使用4k page的情况下让/root/hugepagetest/test 这个目录使用2M的pagesize。
hugepage的初始化汗是在mm/hugetlb.c中的static int __init hugetlb_init(void)。
实际可以使用cat /proc/meminfo 来查询HugePages的信息
root@ubuntu:~# cat /proc/meminfo
MemTotal: 267795008 kB
MemFree: 259260224 kB
HugePages_Total: 0
HugePages_Free: 0
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
像普通的pagefault一样,会在handle_mm_fault 中通过is_vm_hugetlb_page 来判断是否是hugepage fault
int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
unsigned int flags)
{
int ret;
__set_current_state(TASK_RUNNING);
count_vm_event(PGFAULT);
mem_cgroup_count_vm_event(vma->vm_mm, PGFAULT);
/* do counter updates before entering really critical section. */
check_sync_rss_stat(current);
/*
* Enable the memcg OOM handling for faults triggered in user
* space. Kernel faults are handled more gracefully.
*/
if (flags & FAULT_FLAG_USER)
mem_cgroup_oom_enable();
if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
flags & FAULT_FLAG_INSTRUCTION,
flags & FAULT_FLAG_REMOTE))
return VM_FAULT_SIGSEGV;
if (unlikely(is_vm_hugetlb_page(vma)))
ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
}
static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
{
return !!(vma->vm_flags & VM_HUGETLB);
}
原来就是看vm是否有VM_HUGETLB 这个标识来确实是否是hugepage.
如果是hugepage则调用hugetlb_fault->huge_pte_alloc
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
pte_t *pte = NULL;
pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (!pud)
return NULL;
if (sz == PUD_SIZE) {
pte = (pte_t *)pud;
} else if (sz == (PAGE_SIZE * CONT_PTES)) {
pmd_t *pmd = pmd_alloc(mm, pud, addr);
WARN_ON(addr & (sz - 1));
/*
* Note that if this code were ever ported to the
* 32-bit arm platform then it will cause trouble in
* the case where CONFIG_HIGHPTE is set, since there
* will be no pte_unmap() to correspond with this
* pte_alloc_map().
*/
pte = pte_alloc_map(mm, pmd, addr);
} else if (sz == PMD_SIZE) {
if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
pud_none(*pud))
pte = huge_pmd_share(mm, addr, pud);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
} else if (sz == (PMD_SIZE * CONT_PMDS)) {
pmd_t *pmd;
pmd = pmd_alloc(mm, pud, addr);
WARN_ON(addr & (sz - 1));
return (pte_t *)pmd;
}
pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
sz, pte, pte_val(*pte));
return pte;
}
从huge_pte_alloc 可以发现hugepage的size可以分为一下四种
即pagesize =PUD_SIZE/(PAGE_SIZE * CONT_PTES)/PMD_SIZE/(sz == (PMD_SIZE * CONT_PMDS))
我们这里pagesize是2M的话,这里的size就等于PMD_SIZE
最后再通过pmd_alloc 来申请memory。
其次从开机log中也可以看到使用的是多大的pagesize
[ 5.340776] HugeTLB registered 2 MB page size, pre-allocated 0 pages
因此这设置CONFIG_ARM64_4K_PAGES=y的情况下打开CONFIG_HUGETLB_PAGE和CONFIG_HUGETLBFS的情况下,就可以通过hugetlbfs来将某一个目录作为hugepage来访问
要使用hugepage,必须mount hugetlbfs。
mount none /root/hugepagetest -t hugetlbfs
然后上层就可以通过下面的sample code来访问test
#include <fcntl.h>
#include <sys/mman.h>
#include <errno.h>
#define MAP_LENGTH (10*1024*1024)
int main()
{
int fd;
void * addr;
/* create a file in hugetlb fs */
fd = open("/root/hugepagetest/test", O_CREAT | O_RDWR);
if(fd < 0){
perror("Err: ");
return -1;
}
/* map the file into address space of current application process */
addr = mmap(0, MAP_LENGTH, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if(addr == MAP_FAILED){
perror("Err: ");
close(fd);
unlink("/root/hugepagetest/test");
return -1;
}
/* from now on, you can store application data on huage pages via addr */
munmap(addr, MAP_LENGTH);
close(fd);
unlink("/root/hugepagetest/test");
return 0;
}
这样可以在kernel使用4k page的情况下让/root/hugepagetest/test 这个目录使用2M的pagesize。
hugepage的初始化汗是在mm/hugetlb.c中的static int __init hugetlb_init(void)。
实际可以使用cat /proc/meminfo 来查询HugePages的信息
root@ubuntu:~# cat /proc/meminfo
MemTotal: 267795008 kB
MemFree: 259260224 kB
HugePages_Total: 0
HugePages_Free: 0
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
像普通的pagefault一样,会在handle_mm_fault 中通过is_vm_hugetlb_page 来判断是否是hugepage fault
int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
unsigned int flags)
{
int ret;
__set_current_state(TASK_RUNNING);
count_vm_event(PGFAULT);
mem_cgroup_count_vm_event(vma->vm_mm, PGFAULT);
/* do counter updates before entering really critical section. */
check_sync_rss_stat(current);
/*
* Enable the memcg OOM handling for faults triggered in user
* space. Kernel faults are handled more gracefully.
*/
if (flags & FAULT_FLAG_USER)
mem_cgroup_oom_enable();
if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
flags & FAULT_FLAG_INSTRUCTION,
flags & FAULT_FLAG_REMOTE))
return VM_FAULT_SIGSEGV;
if (unlikely(is_vm_hugetlb_page(vma)))
ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
}
static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
{
return !!(vma->vm_flags & VM_HUGETLB);
}
原来就是看vm是否有VM_HUGETLB 这个标识来确实是否是hugepage.
如果是hugepage则调用hugetlb_fault->huge_pte_alloc
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
pte_t *pte = NULL;
pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (!pud)
return NULL;
if (sz == PUD_SIZE) {
pte = (pte_t *)pud;
} else if (sz == (PAGE_SIZE * CONT_PTES)) {
pmd_t *pmd = pmd_alloc(mm, pud, addr);
WARN_ON(addr & (sz - 1));
/*
* Note that if this code were ever ported to the
* 32-bit arm platform then it will cause trouble in
* the case where CONFIG_HIGHPTE is set, since there
* will be no pte_unmap() to correspond with this
* pte_alloc_map().
*/
pte = pte_alloc_map(mm, pmd, addr);
} else if (sz == PMD_SIZE) {
if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
pud_none(*pud))
pte = huge_pmd_share(mm, addr, pud);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
} else if (sz == (PMD_SIZE * CONT_PMDS)) {
pmd_t *pmd;
pmd = pmd_alloc(mm, pud, addr);
WARN_ON(addr & (sz - 1));
return (pte_t *)pmd;
}
pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
sz, pte, pte_val(*pte));
return pte;
}
从huge_pte_alloc 可以发现hugepage的size可以分为一下四种
即pagesize =PUD_SIZE/(PAGE_SIZE * CONT_PTES)/PMD_SIZE/(sz == (PMD_SIZE * CONT_PMDS))
我们这里pagesize是2M的话,这里的size就等于PMD_SIZE
最后再通过pmd_alloc 来申请memory。
其次从开机log中也可以看到使用的是多大的pagesize
[ 5.340776] HugeTLB registered 2 MB page size, pre-allocated 0 pages