上篇文章介绍了如何查看内核使用的分页模式,笔者电脑上内核使用的分页模式是 4-level paging,这篇文章讲解 4-level paging分页模式如何将线性地址转换成物理地址.
先思考2个问题:
- 内核初始化时,线性地址0xffffffff8220a000转换成物理地址是多少?
- 内核初始化时,线性地址0xffff88800220a000转换成物理地址是多少?
1. 4-level paging 转换过程
先看一张intel手册上的4-level paging 4KB大小的页的转换图
下面介绍这张图表达的内容:
2. cr3寄存器介绍
CR3寄存器又叫页目录基址寄存器(Page Directory Base Register, PDGR), CR3中存放着当前任务页表目录的物理地址.
3. 内核中线性地址和物理地址转换宏
下面代码使用内核中定义的宏打印物理地址,代码中的宏选自linux5.4.34 arch/x86/include/asm/page.h
arch/x86/include/asm/page_64.h
#include <stdio.h>
#define __AC(X,Y) (X##Y)
#define _AC(X,Y) __AC(X,Y)
#define __PAGE_OFFSET_BASE_L4 _AC(0xffff888000000000, UL)
#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4
#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
// __va宏是将物理地址转换成线性地址,直接等于物理地址 + 0xffff888000000000
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
static inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
/* use the carry flag to determine if x was < __START_KERNEL_map */
// 笔者电脑上phys_base为0
x = y + ((x > y) ? 0 /* phys_base */ : (__START_KERNEL_map - PAGE_OFFSET));
return x;
}
#define __phys_addr(x) __phys_addr_nodebug(x)
#define __phys_addr_symbol(x) \
((unsigned long)(x) - __START_KERNEL_map + 0 /* phys_base */) // phys_base为0
#define __phys_reloc_hide(x) (x)
// __pa宏是将线性地址转换成物理地址
#define __pa(x) __phys_addr((unsigned long)(x))
// ___pa_symbol宏也是将线性地址转换成物理地址,转换以0xffffffff8开头的在vmlinux.lds.S中定义的符号
#define __pa_symbol(x) \
__phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
int main() {
printf("address: 0x%lx\n", __pa(0xffff88800220a000UL));
printf("address: 0x%lx\n", __pa(0xffffffff8220a000UL));
printf("address: 0x%lx\n", __pa_symbol(0xffffffff8220a000UL));
}
上面代码输出如下,所以问题1和问题2的答案为0x220a000,下面讲解怎么转换成0x220a000
address: 0x220a000
address: 0x220a000
address: 0x220a000
4. 各级页目录索引
下面代码使用内核中定义的宏和索引函数打印各级页目录索引,代码中的宏和索引函数选自linux5.4.34 arch/x86/include/asm/pgtable.h
#include <stdio.h>
#define PAGE_SHIFT 12
#define PGDIR_SHIFT 39
#define PTRS_PER_PGD 512
#define PUD_SHIFT 30
#define PTRS_PER_PUD 512
#define PMD_SHIFT 21
#define PTRS_PER_PMD 512
#define PTRS_PER_PTE 512
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
static inline unsigned long pud_index(unsigned long address)
{
return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
}
static inline unsigned long pmd_index(unsigned long address)
{
return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
}
static inline unsigned long pte_index(unsigned long address)
{
return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
}
void printIndex(unsigned long address) {
printf("address: 0x%lx\n", address);
printf("pgd_index: %ld\n", pgd_index(address)); // 对应第一节图中的PML4
printf("pud_index: %ld\n", pud_index(address)); // 对应第一节图中的Directory Ptr
printf("pmd_index: %ld\n", pmd_index(address)); // 对应第一节图中的Directory
printf("pte_index: %ld\n", pte_index(address)); // 对应第一节图中的Table
}
int main() {
printIndex(0xffffffff8220a000);
printIndex(0xffff88800220a000);
}
代码输出如下所示:
address: 0xffffffff8220a000
pgd_index: 511
pud_index: 510
pmd_index: 17
pte_index: 10
address: 0xffff88800220a000
pgd_index: 273
pud_index: 0
pmd_index: 17
pte_index: 10
5. 在内核中添加代码打印页表
在内核代码文件arch/x86/mm/init.c
中添加代码
// 添加打印页表代码开始
void printPTETable(unsigned long parent) {
unsigned long* pte = (unsigned long*)((parent & PTE_PFN_MASK) + PAGE_OFFSET);
int i = 0;
printk("------pte: 0x%lx\n", pte);
while (i < PTRS_PER_PTE /* 512 */) {
unsigned long entry = *(pte + i);
if (entry) {
printk("--------index: %d pysical address: 0x%lx\n", i, entry);
}
i++;
}
}
void printPMDTable(unsigned long parent) {
unsigned long* pmd = (unsigned long*)((parent & PTE_PFN_MASK) + PAGE_OFFSET);
int i = 0;
printk("----pmd: 0x%lx\n", pmd);
while (i < PTRS_PER_PMD /* 512 */) {
unsigned long entry = *(pmd + i);
if (entry) {
printk("------index: %d pte entry: 0x%lx\n", i, entry);
if (entry >> 7 & 1) {
printk("--------pysical address: 0x%lx\n", entry);
} else {
printPTETable(entry);
}
}
i++;
}
}
void printPUDTable(unsigned long parent) {
unsigned long* pud = (unsigned long*)((parent & PTE_PFN_MASK) + PAGE_OFFSET);
int i = 0;
printk("--pud: 0x%lx\n", pud);
while (i < PTRS_PER_PUD /* 512 */) {
unsigned long entry = *(pud + i);
if (entry) {
printk("----index: %d pud entry: 0x%lx\n", i, entry);
if (entry >> 7 & 1) {
printk("----pysical address: 0x%lx\n", entry);
} else {
printPMDTable(entry);
}
}
i++;
}
}
void printPGDTable(void) {
// 读取CR3寄存器,转换成线性地址
unsigned long* pgd = (unsigned long*)(native_read_cr3_pa() + PAGE_OFFSET);
int i = 0;
printk("cr3 pgd: 0x%lx\n", pgd);
while (i < PTRS_PER_PGD /* 512 */) {
unsigned long entry = *(pgd + i);
if (entry) {
printk("--index: %d pgd entry: 0x%lx\n", i, entry);
printPUDTable(entry);
}
i++;
}
}
// 添加打印页表代码结束
void __init init_mem_mapping(void)
{
// ... 省略
printPGDTable();
load_cr3(swapper_pg_dir); // 切换CR3,切换前后打印
printPGDTable();
// ... 省略
}
下面截取部分输出内容,截取部分为上一节打印出来的索引对应的物理地址.
[ 0.000000] cr3 pgd: 0xffff88800269e000
[ 0.000000] --index: 273 pgd entry: 0x26a0063
[ 0.000000] --pud: 0xffff8880026a0000
[ 0.000000] ----index: 0 pud entry: 0x26a1063
[ 0.000000] ----pmd: 0xffff8880026a1000
省略
[ 0.000000] ------index: 17 pte entry: 0x80000000022000e3
[ 0.000000] --------pysical address: 0x80000000022000e3
省略
[ 0.000000] --index: 511 pgd entry: 0x220c067
[ 0.000000] --pud: 0xffff88800220c000
[ 0.000000] ----index: 510 pud entry: 0x220d063
[ 0.000000] ----pmd: 0xffff88800220d000
省略
[ 0.000000] ------index: 17 pte entry: 0x22001e3
[ 0.000000] --------pysical address: 0x22001e3
省略
切换CR3
[ 0.000000] cr3 pgd: 0xffff88800220a000
[ 0.000000] --index: 273 pgd entry: 0x2801067
[ 0.000000] --pud: 0xffff888002801000
[ 0.000000] ----index: 0 pud entry: 0x2802067
省略
[ 0.000000] ----pmd: 0xffff888002802000
[ 0.000000] ------index: 17 pte entry: 0x80000000022001e3
[ 0.000000] --------pysical address: 0x80000000022001e3
省略
[ 0.000000] --index: 511 pgd entry: 0x220c067
[ 0.000000] --pud: 0xffff88800220c000
[ 0.000000] ----index: 510 pud entry: 0x220d063
[ 0.000000] ----pmd: 0xffff88800220d000
省略
[ 0.000000] ------index: 17 pte entry: 0x22001e3
[ 0.000000] --------pysical address: 0x22001e3
省略
可以看出最后的物理地址为0x80000000022000e3
, 0x22001e3
, 0x80000000022001e3
其中0x80000000022000e3
中的8即第63位设置为1表示这快内存是不可执行的
其中0x0e3
和0x1e3
即低12位表示内存的FLAG,具体含义我们先不讨论
所以通过页表找出来的物理内存地址为0x2200000
,这里有个问题,只转化了3次就结束了,和第一节图中转换了4次不一样,这里我迷惑了很久,搜了很多资料没找到答案,最终在intel手册里找到了答案,手册里原文如下:
Because a PDE is identified using bits 47:21 of the linear address, it controls access to a 2-MByte region of the
linear-address space. Use of the PDE depends on its PS flag:
• If the PDE’s PS flag is 1, the PDE maps a 2-MByte page (see Table 4-18). The final physical address is computed
as follows:
— Bits 51:21 are from the PDE.
— Bits 20:0 are from the original linear address.
The linear address’s protection key is the value of bits 62:59 of the PDE (see Section 4.6.2).
• If the PDE’s PS flag is 0, a 4-KByte naturally aligned page table is located at the physical address specified in
bits 51:12 of the PDE (see Table 4-19). A page table comprises 512 64-bit entries (PTEs). A PTE is selected
using the physical address defined as follows:
— Bits 51:12 are from the PDE.
— Bits 11:3 are bits 20:12 of the linear address.
— Bits 2:0 are all 0.
意思是entry的PS flag(第7位)是否设置处理不一样,如果没设置就需要继续找下一级,设置了转换过程就结束了,所以我们这里转换出来的最终物理地址为0xffff88800220a000
(0xffffffff8220a000
也一样)的低21位 + 0x2200000
即0x220a000
参考手册:
Intel® 64 and IA-32 architectures software developer’s manual volume 3A: System programming guide, part 1 CHAPTER 4 4.5
linux版本:5.4.34
水平有限,如有不对的地方请指正