目录
1.地址转换
进程中不直接对物理地址直接操作,CPU在运行时指定的地址需要经过内存管理单元MMU
转换后才能访问到真正的物理内存,地址转换的过程分为两块:分段和分页.
分段机制简单地来说就是将进程的代码,数据,栈分在不同的虚拟地址段上,从而避免
进程间的互相影响,分段之前的地址我们称之为逻辑地址,由两部分组成:高位的段选择
符和低位的段内偏移,在分段时,我们先用段选择符在相应的段描述符表中找到段描述符--
也就是某个段的基地址,再加上段内偏移量就得到对应的线性地址,线性地址也称之为
虚拟地址,在实际应用中linux为了增加可移植性并没有完整的使用分段机制,它将所有的
段都指向相同的地址范围,段的基地址都为0,这样逻辑地址和线性地址在数值上就相同了.
所以今天分析的重点在分页,也就是由线性地址到物理地址转换的过程.
2.Linux4级页表
linux为了兼容32和64位的CPU,需要一个统一的页面地址模型.目前常用的就是四级页表模型,
里面有PGD-页全局目录,PUD-页上级目录,PMD-页中级目录,PTE-页表这四个部分.
根据不同的需要其中的部分页表可能未被使用,线性地址(图中的linear Address)中
的每部分索引的大小会根据计算机体系结构灵活调整,比如说:
对于没有启用物理地址扩展功能的32位系统来说,两级页表就足够了,linux首先会让在
线性地址中将页上级目录索引和页中级目录索引这两位置为0,从根本上上取消这两个字段,
但是这两个页目录在指针序列中的位置仍然被保留下来,也就是说寻址的过程中不能跳过
页上级目录和页中级目录直接由页全局目录到页表,内核会将这两个页目录的表项都置为1.
3.64位线性地址
由于64位处理器硬件的限制,它的地址线只有48条,所以线性地址实际使用的也只有48位,
在linux 64位中使用了4级页表的结构,线性地址的划分如图,其中
PGD-页全局目录索引
PUD-页上级目录索引
PMD-页中级目录索引
PTE-页表的索引
分别占了9位,页内偏移占了12位,共计48位,剩下的高位都保留,留作以后扩展使用,
这种情况下,每一个页面的大小都是4K,每个页表项的大小是8bits,这个页表可以映射
的空间是256TB,而新的intel芯片的MMU硬件规定可以进行5级的页表管理,所以在4.15
的内核中,linux已经在页全局目录和页上级目录之间添加了一个新的页目录(P4D页目录),
这个页目录同32位中的情况中一样,现在还没有被使用.它的页目录项只有一个,
目前的线性地址中也没有它的索引位置.
此处有一个非常重要的寄存器,CR3,这个寄存器中CPU中一些列CPU控制寄存器之一,这些
控制寄存器主要是用来保存控制系统级别的操作和信息的,这个CR3就是用来保存当前
进程的页全局目录的地址的,寻址就是从这个页全局目录开始的,页全局目录的地址又在哪里呢?
内核在创建一个进程的时候就会为它分配页全局目录,在进程描述符task_struct结构中有一个
指向mm_struct结构的指针mm,而mm_struct这个结构就是用来描述进程的虚拟地址空间的,在
mm_struct中有一个字段pgd就是保存该进程的全局目录的地址的,所以在进程切换的时候,
操作系统通过访问task_struct中的mm_struct结构中的pgd字段取得新进程的页全局目录地址,
填充到CR3寄存器中,就完成了页表的切换.
4.实际观察寻页的过程
在内核中先申请一个页面然后用内核提供的函数按照寻页的步骤一步一步查询各级页目录,
最终找到所对应的物理地址.这些步骤就相当于我们手动模拟了MMU的寻页过程.
/*
page_lowmem.c
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
#include <linux/export.h>
static unsigned long cr0,cr3;
static unsigned long vaddr = 0;
// 打印页机制中的重要参数
static void get_pgtable_macro(void)
{
cr0 = read_cr0();
cr3 = read_cr3();
printk("cr0 = 0x%lx,cr3 = 0x%lx\n",cr0,cr3);
printk("PGDIR_SHIFT = %d\n",PGDIR_SHIFT);
// printk("P4D_SHIFT = %d\n",P4D_SHIFT);
printk("PUD_SHIFT = %d\n",PUD_SHIFT);
printk("PMD_SHIFT = %d\n",PMD_SHIFT);// 这些宏用来指示线性地址中相应字段所能够映射区域的大小的对数
printk("PAGE_SHIFT = %d\n",PAGE_SHIFT);// 大小的以2为底的对数,4K对应的对数大小是12
printk("PTRS_PER_PGD = %d\n",PTRS_PER_PGD);
// printk("PTRS_PER_P4D = %d\n",PTRS_PER_P4D);
printk("PTRS_PER_PUD = %d\n",PTRS_PER_PUD);
printk("PTRS_PER_PMD = %d\n",PTRS_PER_PMD);
printk("PTRS_PER_PTE = %d\n",PTRS_PER_PTE);// 这些宏是用来指示相应的页目录表中项的个数,为了后面寻页时进行位运算的
printk("PAGE_MASK = 0x%lx\n",PAGE_MASK);// 页内偏移掩码,用来屏蔽page_off字段的
}
// 线性地址到物理地址转换的函数
static unsigned long vaddr2paddr(unsigned long vaddr)
{
pgd_t *pgd;
// p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long paddr = 0;
unsigned long page_addr = 0;
unsigned long page_offset = 0;
pgd = pgd_offset(current->mm,vaddr);
printk("pgd_val = 0x%lx,pgd_index = %lu\n",pgd_val(*pgd),pgd_index(vaddr));
if(pgd_none(*pgd))
{
printk("not mapped in pgd");
return -1;
}
/*这个需要4.15以上版本的内核才支持,我的实验的内核版本是4.10的,所以将此段注释掉
p4d = pdg_offset(pgd,vaddr);
printk("p4d_val = 0x%lx,p4d_index = %lu\n",p4d_val(*p4d),p4d_index(vaddr));
if(p4d_none(*p4d))
{
printk("not mapped in p4d");
return -1;
}
*/
pud = pud_offset(pgd,vaddr);
printk("pud_val = 0x%lx,pud_index = %lu\n",pud_val(*pud),pud_index(vaddr));
if(pud_none(*pud))
{
printk("not mapped in pud");
return -1;
}
pmd = pmd_offset(pud,vaddr);
printk("pmd_val = 0x%lx,pmd_index = %lu\n",pmd_val(*pmd),pmd_index(vaddr));
if(pmd_none(*pmd))
{
printk("not mapped in pmd\n");
return -1;
}
pte = pte_offset_kernel(pmd,vaddr);
printk("pte_val = 0x%lx,pte_index = %lu\n",pte_val(*pte),pte_index(vaddr));
if(pte_none(*pte))
{
printk("not mapped in pte line 76\n");
return -1;
}
// 从页表项中取出该页表所映射页框的物理地址,将其与PAGE_MASK做位或操作,取出其高48位,就得到了页框的物理地址
page_addr = pte_val(*pte) & PAGE_MASK;
// 取出线性地址中的第12位,将 PAGE_MASK按位取反,然后与vaddr做了位或的操作,然后取出页内偏移
page_offset = vaddr & ~PAGE_MASK;
// 将上面两个地址拼接起来就可以得到想要的物理地址
paddr = page_addr | page_offset;
printk("line 85 --- page_addr = %lx,page_offset = %lx\n",page_addr,page_offset);
printk("line 86 --- vaddr = %lx,paddr = %lx\n",vaddr,paddr);
return paddr;
}
static int __init v2p_init(void)
{
unsigned long vaddr = 0;
printk("vaddr to paddr is running .\n");
get_pgtable_macro();
printk("\n");
vaddr = __get_free_page(GFP_KERNEL);
if(vaddr == 0 )
{
printk("get free page failed.\n");
return 0;
}
sprintf((char*) vaddr,"hello world from kernel");
printk("get_page_vaddr=0x%lx",vaddr);
vaddr2paddr(vaddr);
return 0;
}
static void __exit v2p_exit(void)
{
printk("vaddr to paddr module is leaving.\n");
free_page(vaddr);
}
module_init(v2p_init);
module_exit(v2p_exit);
//-------------------------------------------------------------------
// dram.c
//
// This module implements a Linux character-mode device-driver
// for the processor's installed physical memory. It utilizes
// the kernel's 'kmap()' function, as a uniform way to provide
// access to all the memory-zones (including the "high memory"
// on systems with more than 896MB of installed physical ram).
// The access here is 'read-only' because we deem it too risky
// to the stable functioning of our system to allow every user
// the unrestricted ability to arbitrarily modify memory-areas
// which might contain some "critical" kernel data-structures.
// We implement an 'llseek()' method so that users can readily
// find out how much physical processor-memory is installed.
//
// NOTE: Developed and tested with Linux kernel version 2.6.10
//
// programmer: ALLAN CRUSE
// written on: 30 JAN 2005
// revised on: 28 JAN 2008 -- for Linux kernel version 2.6.22.5
// revised on: 06 FEB 2008 -- for machines having 4GB of memory
//-------------------------------------------------------------------
#include <linux/module.h> // for module_init()
#include <linux/highmem.h> // for kmap(), kunmap()
#include <asm/uaccess.h> // for copy_to_user()
char modname[] = "dram"; // for displaying driver's name
int my_major = 85; // note static major assignment
loff_t dram_size; // total bytes of system memory
loff_t my_llseek( struct file *file, loff_t offset, int whence );
ssize_t my_read( struct file *file, char *buf, size_t count, loff_t *pos );
struct file_operations
my_fops = {
owner: THIS_MODULE,
llseek: my_llseek,
read: my_read,
};
static int __init dram_init( void )
{
printk( "<1>\nInstalling \'%s\' module ", modname );
printk( "(major=%d)\n", my_major );
dram_size = (loff_t)num_physpages << PAGE_SHIFT;
printk( "<1> ramtop=%08llX (%llu MB)\n", dram_size, dram_size >> 20 );
return register_chrdev( my_major, modname, &my_fops );
}
static void __exit dram_exit( void )
{
unregister_chrdev( my_major, modname );
printk( "<1>Removing \'%s\' module\n", modname );
}
ssize_t my_read( struct file *file, char *buf, size_t count, loff_t *pos )
{
struct page *pp;
void *from;
int page_number, page_indent, more;
// we cannot read beyond the end-of-file
if ( *pos >= dram_size ) return 0;
// determine which physical page to temporarily map
// and how far into that page to begin reading from
page_number = *pos / PAGE_SIZE;
page_indent = *pos % PAGE_SIZE;
// map the designated physical page into kernel space
/*If kerel vesion is 2.6.32 or later, please use pfn_to_page() to get page, and include
asm-generic/memory_model.h*/
#if 0
pp = pfn_to_page( page_number);
#else
pp = &mem_map[ page_number ];
#endif
from = kmap( pp ) + page_indent;
// cannot reliably read beyond the end of this mapped page
if ( page_indent + count > PAGE_SIZE ) count = PAGE_SIZE - page_indent;
// now transfer count bytes from mapped page to user-supplied buffer
more = copy_to_user( buf, from, count );
// ok now to discard the temporary page mapping
kunmap( pp );
// an error occurred if less than count bytes got copied
if ( more ) return -EFAULT;
// otherwise advance file-pointer and report number of bytes read
*pos += count;
return count;
}
loff_t my_llseek( struct file *file, loff_t offset, int whence )
{
loff_t newpos = -1;
switch( whence )
{
case 0: newpos = offset; break; // SEEK_SET
case 1: newpos = file->f_pos + offset; break; // SEEK_CUR
case 2: newpos = dram_size + offset; break; // SEEK_END
}
if (( newpos < 0 )||( newpos > dram_size )) return -EINVAL;
file->f_pos = newpos;
return newpos;
}
MODULE_LICENSE("GPL");
module_init( dram_init );
module_exit( dram_exit );
//----------------------------------------------------------------
// fileview.cpp
//
// This program displays the contents of a specified file
// in hexadecimal and ascii formats (including any device
// special files representing storage media). A user may
// navigate the file's contents using arrow-key commands,
// or may adjust the format of the hexadecimal display to
// select from among five data-sizes: byte (B), word (W),
// doubleword (D), quadword (Q) or octaword (O). It also
// is possible to seek to a specified position within the
// file by hitting the <ENTER>-key and then typing in the
// desired (hexadecimal) address. Type <ESCAPE> to quit.
//
// compile-and-link using: $ make fileview
//
// programmer: ALLAN CRUSE
// written on: 26 OCT 2002
// revised on: 07 JUN 2006 -- removed reliance on 'ncurses'
//----------------------------------------------------------------
#include <stdio.h> // for printf(), perror(), fflush()
#include <fcntl.h> // for open()
#include <string.h> // for strncpy()
#include <unistd.h> // for read(), lseek64()
#include <stdlib.h> // for exit()
#include <termios.h> // for tcgetattr(), tcsetattr()
#define MAXNAME 80
#define BUFHIGH 16
#define BUFWIDE 16
#define BUFSIZE 256
#define ROW 6
#define COL 2
#define KB_SEEK 0x0000000A
#define KB_QUIT 0x0000001B
#define KB_BACK 0x0000007F
#define KB_HOME 0x00315B1B
#define KB_LNUP 0x00415B1B
#define KB_PGUP 0x00355B1B
#define KB_LEFT 0x00445B1B
#define KB_RGHT 0x00435B1B
#define KB_LNDN 0x00425B1B
#define KB_PGDN 0x00365B1B
#define KB_END 0x00345B1B
#define KB_DEL 0x00335B1B
char progname[] = "FILEVIEW";
char filename[ MAXNAME + 1 ];
char buffer[ BUFSIZE ];
char outline[ 80 ];
int main( int argc, char *argv[] )
{
// setup the filename (if supplied), else terminate
if ( argc > 1 ) strncpy( filename, argv[1], MAXNAME );
else { fprintf( stderr, "argument needed\n" ); exit(1); }
// open the file for reading
int fd = open( filename, O_RDONLY );
if ( fd < 0 ) { perror( filename ); exit(1); }
// obtain the filesize (if possible)
long long filesize = lseek64( fd, 0LL, SEEK_END );
if ( filesize < 0LL )
{
fprintf( stderr, "cannot locate \'end-of-file\' \n" );
exit(1);
}
long long incmin = ( 1LL << 8 );
long long incmax = ( 1LL << 36 );
long long posmin = 0LL;
long long posmax = (filesize - 241LL)&~0xF;
if ( posmax < posmin ) posmax = posmin;
// initiate noncanonical terminal input
struct termios tty_orig;
tcgetattr( STDIN_FILENO, &tty_orig );
struct termios tty_work = tty_orig;
tty_work.c_lflag &= ~( ECHO | ICANON ); // | ISIG );
tty_work.c_cc[ VMIN ] = 1;
tty_work.c_cc[ VTIME ] = 0;
tcsetattr( STDIN_FILENO, TCSAFLUSH, &tty_work );
printf( "\e[H\e[J" );
// display the legend
int i, j, k;
k = (77 - strlen( progname ))/2;
printf( "\e[%d;%dH %s ", 1, k, progname );
k = (77 - strlen( filename ))/2;
printf( "\e[%d;%dH\'%s\'", 3, k, filename );
char infomsg[ 80 ];
sprintf( infomsg, "filesize: %llu (=0x%013llX)", filesize, filesize );
k = (78 - strlen( infomsg ));
printf( "\e[%d;%dH%s", 24, k, infomsg );
fflush( stdout );
// main loop to navigate the file
long long pageincr = incmin;
long long lineincr = 16LL;
long long position = 0LL;
long long location = 0LL;
int format = 1;
int done = 0;
while ( !done )
{
// erase prior buffer contents
for (j = 0; j < BUFSIZE; j++) buffer[ j ] = ~0;
// restore 'pageincr' to prescribed bounds
if ( pageincr == 0LL ) pageincr = incmax;
else if ( pageincr < incmin ) pageincr = incmin;
else if ( pageincr > incmax ) pageincr = incmax;
// get current location of file-pointer position
location = lseek64( fd, position, SEEK_SET );
// try to fill 'buffer[]' with data from the file
char *where = buffer;
int to_read = BUFSIZE;
while ( to_read > 0 )
{
int nbytes = read( fd, where, to_read );
if ( nbytes <= 0 ) break;
to_read -= nbytes;
where += nbytes;
}
int datalen = BUFSIZE - to_read;
// display the data just read into the 'buffer[]' array
unsigned char *bp;
unsigned short *wp;
unsigned int *dp;
unsigned long long *qp;
for (i = 0; i < BUFHIGH; i++)
{
int linelen;
// draw the line-location (13-digit hexadecimal)
linelen = sprintf( outline, "%013llX ", location );
// draw the line in the selected hexadecimal format
switch ( format )
{
case 1: // 'byte' format
bp = (unsigned char*)&buffer[ i*BUFWIDE ];
for (j = 0; j < BUFWIDE; j++)
linelen += sprintf( outline+linelen,
"%02X ", bp[j] );
break;
case 2: // 'word' format
wp = (unsigned short*)&buffer[ i*BUFWIDE ];
for (j = 0; j < BUFWIDE/2; j++)
linelen += sprintf( outline+linelen,
" %04X ", wp[j] );
break;
case 4: // 'dword' format
dp = (unsigned int*)&buffer[ i*BUFWIDE ];
for (j = 0; j < BUFWIDE/4; j++)
linelen += sprintf( outline+linelen,
" %08X ", dp[j] );
break;
case 8: // 'qword' format
qp = (unsigned long long*)&buffer[ i*BUFWIDE ];
for (j = 0; j < BUFWIDE/8; j++)
linelen += sprintf( outline+linelen,
" %016llX ", qp[j] );
break;
case 16: // 'octaword'
qp = (unsigned long long*)&buffer[ i*BUFWIDE ];
linelen += sprintf( outline+linelen, " " );
linelen += sprintf( outline+linelen,
" %016llX%016llX ", qp[1], qp[0] );
linelen += sprintf( outline+linelen, " " );
break;
}
// draw the line in ascii format
for (j = 0; j < BUFWIDE; j++)
{
char ch = buffer[ i*BUFWIDE + j ];
if (( ch < 0x20 )||( ch > 0x7E )) ch = '.';
linelen += sprintf( outline+linelen, "%c", ch);
}
// transfer this output-line to the screen
printf( "\e[%d;%dH%s", ROW+i, COL, outline );
// advance 'location' for the next output-line
location += BUFWIDE;
}
printf( "\e[%d;%dH", 23, COL );
fflush( stdout );
// await keypress
long long inch = 0LL;
read( STDIN_FILENO, &inch, sizeof( inch ) );
printf( "\e[%d;%dH%60s", 23, COL, " " );
// interpret navigation or formatting command
inch &= 0x00FFFFFFLL;
switch ( inch )
{
// move to the file's beginning/ending
case 'H': case 'h':
case KB_HOME: position = posmin; break;
case 'E': case 'e':
case KB_END: position = posmax; break;
// move forward/backward by one line
case KB_LNDN: position += BUFWIDE; break;
case KB_LNUP: position -= BUFWIDE; break;
// move forward/packward by one page
case KB_PGDN: position += pageincr; break;
case KB_PGUP: position -= pageincr; break;
// increase/decrease the page-size increment
case KB_RGHT: pageincr >>= 4; break;
case KB_LEFT: pageincr <<= 4; break;
// reset the hexadecimal output-format
case 'B': case 'b': format = 1; break;
case 'W': case 'w': format = 2; break;
case 'D': case 'd': format = 4; break;
case 'Q': case 'q': format = 8; break;
case 'O': case 'o': format = 16; break;
// seek to a user-specified file-position
case KB_SEEK:
printf( "\e[%d;%dHAddress: ", 23, COL );
fflush( stdout );
{
char inbuf[ 16 ] = {0};
//tcsetattr( STDIN_FILENO, TCSANOW, &tty_orig );
int i = 0;
while ( i < 15 )
{
long long ch = 0;
read( STDIN_FILENO, &ch, sizeof( ch ) );
ch &= 0xFFFFFF;
if ( ch == '\n' ) break;
if ( ch == KB_QUIT ) { inbuf[0] = 0; break; }
if ( ch == KB_LEFT ) ch = KB_BACK;
if ( ch == KB_DEL ) ch = KB_BACK;
if (( ch == KB_BACK )&&( i > 0 ))
{
inbuf[--i] = 0;
printf( "\b \b" );
fflush( stdout );
}
if (( ch < 0x20 )||( ch > 0x7E )) continue;
inbuf[ i++ ] = ch;
printf( "%c", ch );
fflush( stdout );
}
printf( "\e[%d;%dH%70s", 23, COL, " " );
fflush( stdout );
position = strtoull( inbuf, NULL, 16 );
position &= ~0xFLL; // paragraph align
}
break;
// program termination
case KB_QUIT: done = 1; break;
default:
printf( "\e[%d;%dHHit <ESC> to quit", 23, 2 );
}
fflush( stdout );
// insure that 'position' remains within bounds
if ( position < posmin ) position = posmin;
if ( position > posmax ) position = posmax;
}
// restore canonical terminal behavior
tcsetattr( STDIN_FILENO, TCSAFLUSH, &tty_orig );
printf( "\e[%d;%dH\e[0J\n", 23, 0 );
}