用户态的程序都是在内存保护模式下使用内存,无法直接访问物理内存。同时用户程序使用的地址,也并不是物理地址,而是逻辑地址。至于这些逻辑地址对应的物理内存在哪里,用户进程本身并不知道。
通过用户程序若想访问物理内存,我们需要通过内核才能实现。本文基于内核模块的方式,实现在Linux中用户态程序访问所有物理内存。
1、内核模块编写
通过文件读写的方式,实现物理地址访问。将物理地址,作为参数pos传递。
ssize_t my_read( struct file *file, char *buf, size_t count, loff_t *pos )
在内核代码中,是无法直接访问物理地址的,代码能访问的都是逻辑地址。此时我们需要先将物理地址转换成逻辑地址,才能在代码中对地址读写。
物理地址转换成逻辑地址方法:
1)根据物理地址,计算出对应的页面号和页内偏移
page_number = *pos / PAGE_SIZE;
page_indent = *pos % PAGE_SIZE;
2)将页面号找到对应的页面指针
注意在2.6.32及以上内核中,没有导出mem_map符号,只能通过
pfn_to_page()来找到对应的页面指针。
#if 0
pp =pfn_to_page( page_number);
#else
pp = &mem_map[ page_number ];
#endif
3)通过kmap映射成逻辑地址
from = kmap( pp ) + page_indent;
映射成逻辑地址后,我们直接通过from指针来访问物理地址pos了。
2.x86_64 Address_mapping
1.查看物理内存的工具fileview.c
//----------------------------------------------------------------
// fileview.cpp
//
// This program displays the contents of a specified file
// in hexadecimal and ascii formats (including any device
// special files representing storage media). A user may
// navigate the file's contents using arrow-key commands,
// or may adjust the format of the hexadecimal display to
// select from among five data-sizes: byte (B), word (W),
// doubleword (D), quadword (Q) or octaword (O). It also
// is possible to seek to a specified position within the
// file by hitting the <ENTER>-key and then typing in the
// desired (hexadecimal) address. Type <ESCAPE> to quit.
//
// compile-and-link using: $ make fileview
//
// programmer: ALLAN CRUSE
// written on: 26 OCT 2002
// revised on: 07 JUN 2006 -- removed reliance on 'ncurses'
//----------------------------------------------------------------
#include <stdio.h> // for printf(), perror(), fflush()
#include <fcntl.h> // for open()
#include <string.h> // for strncpy()
#include <unistd.h> // for read(), lseek64()
#include <stdlib.h> // for exit()
#include <termios.h> // for tcgetattr(), tcsetattr()
#define MAXNAME 80
#define BUFHIGH 16
#define BUFWIDE 16
#define BUFSIZE 256
#define ROW 6
#define COL 2
#define KB_SEEK 0x0000000A
#define KB_QUIT 0x0000001B
#define KB_BACK 0x0000007F
#define KB_HOME 0x00315B1B
#define KB_LNUP 0x00415B1B
#define KB_PGUP 0x00355B1B
#define KB_LEFT 0x00445B1B
#define KB_RGHT 0x00435B1B
#define KB_LNDN 0x00425B1B
#define KB_PGDN 0x00365B1B
#define KB_END 0x00345B1B
#define KB_DEL 0x00335B1B
char progname[] = "FILEVIEW";
char filename[ MAXNAME + 1 ];
char buffer[ BUFSIZE ];
char outline[ 80 ];
int main( int argc, char *argv[] )
{
// setup the filename (if supplied), else terminate
if ( argc > 1 ) strncpy( filename, argv[1], MAXNAME );
else { fprintf( stderr, "argument needed\n" ); exit(1); }
// open the file for reading
int fd = open( filename, O_RDONLY );
if ( fd < 0 ) { perror( filename ); exit(1); }
// obtain the filesize (if possible)
unsigned long long filesize = lseek64( fd, 0LL, SEEK_END );
if ( filesize < 0LL )
{
fprintf( stderr, "cannot locate \'end-of-file\' \n" );
exit(1);
}
long long incmin = ( 1LL << 8 );
long long incmax = ( 1LL << 36 );
long long posmin = 0LL;
long long posmax = (filesize - 241LL)&~0xF;
if ( posmax < posmin ) posmax = posmin;
// initiate noncanonical terminal input
struct termios tty_orig;
tcgetattr( STDIN_FILENO, &tty_orig );
struct termios tty_work = tty_orig;
tty_work.c_lflag &= ~( ECHO | ICANON ); // | ISIG );
tty_work.c_cc[ VMIN ] = 1;
tty_work.c_cc[ VTIME ] = 0;
tcsetattr( STDIN_FILENO, TCSAFLUSH, &tty_work );
printf( "\e[H\e[J" );
// display the legend
int i, j, k;
k = (77 - strlen( progname ))/2;
printf( "\e[%d;%dH %s ", 1, k, progname );
k = (77 - strlen( filename ))/2;
printf( "\e[%d;%dH\'%s\'", 3, k, filename );
char infomsg[ 80 ];
sprintf( infomsg, "filesize: %llu (=0x%013llX)", filesize, filesize );
k = (78 - strlen( infomsg ));
printf( "\e[%d;%dH%s", 24, k, infomsg );
fflush( stdout );
// main loop to navigate the file
long long pageincr = incmin;
long long lineincr = 16LL;
long long position = 0LL;
long long location = 0LL;
int format = 1;
int done = 0;
while ( !done )
{
// erase prior buffer contents
for (j = 0; j < BUFSIZE; j++) buffer[ j ] = ~0;
// restore 'pageincr' to prescribed bounds
if ( pageincr == 0LL ) pageincr = incmax;
else if ( pageincr < incmin ) pageincr = incmin;
else if ( pageincr > incmax ) pageincr = incmax;
// get current location of file-pointer position
location = lseek64( fd, position, SEEK_SET );
// try to fill 'buffer[]' with data from the file
char *where = buffer;
int to_read = BUFSIZE;
while ( to_read > 0 )
{
int nbytes = read( fd, where, to_read );
if ( nbytes <= 0 ) break;
to_read -= nbytes;
where += nbytes;
}
int datalen = BUFSIZE - to_read;
// display the data just read into the 'buffer[]' array
unsigned char *bp;
unsigned short *wp;
unsigned int *dp;
unsigned long long *qp;
for (i = 0; i < BUFHIGH; i++)
{
int linelen;
// draw the line-location (13-digit hexadecimal)
linelen = sprintf( outline, "%013llX ", location );
// draw the line in the selected hexadecimal format
switch ( format )
{
case 1: // 'byte' format
bp = (unsigned char*)&buffer[ i*BUFWIDE ];
for (j = 0; j < BUFWIDE; j++)
linelen += sprintf( outline+linelen,
"%02X ", bp[j] );
break;
case 2: // 'word' format
wp = (unsigned short*)&buffer[ i*BUFWIDE ];
for (j = 0; j < BUFWIDE/2; j++)
linelen += sprintf( outline+linelen,
" %04X ", wp[j] );
break;
case 4: // 'dword' format
dp = (unsigned int*)&buffer[ i*BUFWIDE ];
for (j = 0; j < BUFWIDE/4; j++)
linelen += sprintf( outline+linelen,
" %08X ", dp[j] );
break;
case 8: // 'qword' format
qp = (unsigned long long*)&buffer[ i*BUFWIDE ];
for (j = 0; j < BUFWIDE/8; j++)
linelen += sprintf( outline+linelen,
" %016llX ", qp[j] );
break;
case 16: // 'octaword'
qp = (unsigned long long*)&buffer[ i*BUFWIDE ];
linelen += sprintf( outline+linelen, " " );
linelen += sprintf( outline+linelen,
" %016llX%016llX ", qp[1], qp[0] );
linelen += sprintf( outline+linelen, " " );
break;
}
// draw the line in ascii format
for (j = 0; j < BUFWIDE; j++)
{
char ch = buffer[ i*BUFWIDE + j ];
if (( ch < 0x20 )||( ch > 0x7E )) ch = '.';
linelen += sprintf( outline+linelen, "%c", ch);
}
// transfer this output-line to the screen
printf( "\e[%d;%dH%s", ROW+i, COL, outline );
// advance 'location' for the next output-line
location += BUFWIDE;
}
printf( "\e[%d;%dH", 23, COL );
fflush( stdout );
// await keypress
long long inch = 0LL;
read( STDIN_FILENO, &inch, sizeof( inch ) );
printf( "\e[%d;%dH%60s", 23, COL, " " );
// interpret navigation or formatting command
inch &= 0x00FFFFFFLL;
switch ( inch )
{
// move to the file's beginning/ending
case 'H': case 'h':
case KB_HOME: position = posmin; break;
case 'E': case 'e':
case KB_END: position = posmax; break;
// move forward/backward by one line
case KB_LNDN: position += BUFWIDE; break;
case KB_LNUP: position -= BUFWIDE; break;
// move forward/packward by one page
case KB_PGDN: position += pageincr; break;
case KB_PGUP: position -= pageincr; break;
// increase/decrease the page-size increment
case KB_RGHT: pageincr >>= 4; break;
case KB_LEFT: pageincr <<= 4; break;
// reset the hexadecimal output-format
case 'B': case 'b': format = 1; break;
case 'W': case 'w': format = 2; break;
case 'D': case 'd': format = 4; break;
case 'Q': case 'q': format = 8; break;
case 'O': case 'o': format = 16; break;
// seek to a user-specified file-position
case KB_SEEK:
printf( "\e[%d;%dHAddress: ", 23, COL );
fflush( stdout );
{
char inbuf[ 16 ] = {0};
//tcsetattr( STDIN_FILENO, TCSANOW, &tty_orig );
int i = 0;
while ( i < 15 )
{
long long ch = 0;
read( STDIN_FILENO, &ch, sizeof( ch ) );
ch &= 0xFFFFFF;
if ( ch == '\n' ) break;
if ( ch == KB_QUIT ) { inbuf[0] = 0; break; }
if ( ch == KB_LEFT ) ch = KB_BACK;
if ( ch == KB_DEL ) ch = KB_BACK;
if (( ch == KB_BACK )&&( i > 0 ))
{
inbuf[--i] = 0;
printf( "\b \b" );
fflush( stdout );
}
if (( ch < 0x20 )||( ch > 0x7E )) continue;
inbuf[ i++ ] = ch;
printf( "%c", ch );
fflush( stdout );
}
printf( "\e[%d;%dH%70s", 23, COL, " " );
fflush( stdout );
position = strtoull( inbuf, NULL, 16 );
position &= ~0xFLL; // paragraph align
}
break;
// program termination
case KB_QUIT: done = 1; break;
default:
printf( "\e[%d;%dHHit <ESC> to quit", 23, 2 );
}
fflush( stdout );
// insure that 'position' remains within bounds
if ( position < posmin ) position = posmin;
if ( position > posmax ) position = posmax;
}
// restore canonical terminal behavior
tcsetattr( STDIN_FILENO, TCSAFLUSH, &tty_orig );
printf( "\e[%d;%dH\e[0J\n", 23, 0 );
}
这是国外某位大神写的,可以在用户程序查看物理内存,原理很简单,但实现比较复杂(有些涉及到终端控制台的地方没怎么接触,反正会用就行)。通过向内核注册一个字符设备,本着linux“万物皆文件”的原则字符设备文件也不例外,通过把所有物理内存映射到这个字符设备,这样就可以访问查看物理内存了。
2.注册字符设备
//-------------------------------------------------------------------
// dram.c
//
// This module implements a Linux character-mode device-driver
// for the processor's installed physical memory. It utilizes
// the kernel's 'kmap()' function, as a uniform way to provide
// access to all the memory-zones (including the "high memory"
// on systems with more than 896MB of installed physical ram).
// The access here is 'read-only' because we deem it too risky
// to the stable functioning of our system to allow every user
// the unrestricted ability to arbitrarily modify memory-areas
// which might contain some "critical" kernel data-structures.
// We implement an 'llseek()' method so that users can readily
// find out how much physical processor-memory is installed.
//
// NOTE: Developed and tested with Linux kernel version 2.6.10
//
// programmer: ALLAN CRUSE
// written on: 30 JAN 2005
// revised on: 28 JAN 2008 -- for Linux kernel version 2.6.22.5
// revised on: 06 FEB 2008 -- for machines having 4GB of memory
//-------------------------------------------------------------------
#include <linux/module.h> // for module_init()
#include <linux/highmem.h> // for kmap(), kunmap()
#include <asm/uaccess.h> // for copy_to_user()
char modname[] = "dram"; // for displaying driver's name
int my_major = 85; // note static major assignment
unsigned long dram_size; // total bytes of system memory
loff_t my_llseek( struct file *file, loff_t offset, int whence );
ssize_t my_read( struct file *file, char *buf, size_t count, loff_t *pos );
struct file_operations
my_fops = {
owner: THIS_MODULE,
llseek: my_llseek,
read: my_read,
};
static int __init dram_init( void )
{
printk( "<1>\nInstalling \'%s\' module ", modname );
printk( "(major=%d)\n", my_major );
dram_size = (unsigned long)num_physpages << PAGE_SHIFT;
printk( "<1> ramtop=%08lX (%lu MB)\n", dram_size, dram_size >> 20 );
return register_chrdev( my_major, modname, &my_fops );
}
static void __exit dram_exit( void )
{
unregister_chrdev( my_major, modname );
printk( "<1>Removing \'%s\' module\n", modname );
}
ssize_t my_read( struct file *file, char *buf, size_t count, loff_t *pos )
{
struct page *pp;
void *from;
int page_number, page_indent, more;
// we cannot read beyond the end-of-file
if ( *pos >= dram_size ) return 0;
// determine which physical page to temporarily map
// and how far into that page to begin reading from
page_number = *pos / PAGE_SIZE;
page_indent = *pos % PAGE_SIZE;
// map the designated physical page into kernel space
/*If kerel vesion is 2.6.32 or later, please use pfn_to_page() to get page, and include
asm-generic/memory_model.h*/
pp = pfn_to_page( page_number);
//pp = &mem_map[ page_number ];
from = kmap( pp ) + page_indent;
// cannot reliably read beyond the end of this mapped page
if ( page_indent + count > PAGE_SIZE ) count = PAGE_SIZE - page_indent;
// now transfer count bytes from mapped page to user-supplied buffer
more = copy_to_user( buf, from, count );
// ok now to discard the temporary page mapping
kunmap( pp );
// an error occurred if less than count bytes got copied
if ( more < count) return -EFAULT;
// otherwise advance file-pointer and report number of bytes read
*pos += count;
return count;
}
loff_t my_llseek( struct file *file, loff_t offset, int whence )
{
unsigned long newpos = -1;
switch( whence )
{
case 0: newpos = offset; break; // SEEK_SET
case 1: newpos = file->f_pos + offset; break; // SEEK_CUR
case 2: newpos = dram_size + offset; break; // SEEK_END
}
if (( newpos < 0 )||( newpos > dram_size )) return -EINVAL;
file->f_pos = newpos;
return newpos;
}
MODULE_LICENSE("GPL");
module_init( dram_init );
module_exit( dram_exit );
3.读取cr3
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/sched.h>
#include <asm/uaccess.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/desc.h>
//#include <asm/system.h>
static char modname[] = "sys_reg";
struct gdtr_struct{
short limit;
unsigned long address __attribute__((packed));
};
static unsigned long cr0,cr3,cr4;
static struct gdtr_struct gdtr;
static int my_get_info( char *buf, char **start, off_t off, int count )
{
int len = 0;
struct mm_struct *mm;
mm = current->active_mm;
cr0 = read_cr0();
cr3 = read_cr3();
cr4 = read_cr4();
asm(" sgdt %0\n":"=m"(gdtr));
len += sprintf( buf+len, "cr4=%lX ", cr4 );
len += sprintf( buf+len, "PSE=%lX ", (cr4>>4)&1 );
len += sprintf( buf+len, "PAE=%lX ", (cr4>>5)&1 );
len += sprintf( buf+len, "\n" );
len += sprintf( buf+len, "cr3=%lX cr0=%lX\n",cr3,cr0);
len += sprintf( buf+len, "pgd:0x%lX\n",(unsigned long)mm->pgd);
len += sprintf( buf+len, "gdtr address:%lX, limit:%X\n", gdtr.address,gdtr.limit);
return len;
}
int init_module( void )
{
printk( "<1>\nInstalling \'%s\' module\n", modname );
create_proc_read_entry( modname, 0, NULL, my_get_info, NULL);
return 0;
}
void cleanup_module( void )
{
remove_proc_entry( modname, NULL );
printk( "<1>Removing \'%s\' module\n", modname );
}
MODULE_LICENSE("GPL");
4.用户测试程序
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#define REGISTERINFO "/proc/sys_reg"
#define BUFSIZE 4096
static char buf[BUFSIZE];
static unsigned long addr;
#define FILE_TO_BUF(filename, fd) do{ \
static int local_n; \
if (fd == - 1 && (fd = open(filename, O_RDONLY)) == - 1) { \
fprintf(stderr, "Open /proc/register file failed! \n"); \
fflush(NULL); \
_exit(102); \
} \
lseek(fd, 0L, SEEK_SET); \
if ((local_n = read(fd, buf , sizeof buf - 1)) < 0) { \
perror(filename); \
fflush(NULL); \
_exit(103); \
} \
buf [local_n] = 0; \
}while(0)
int main()
{
unsigned long tmp;
tmp = 0x12345678beaf5dde;
static int cr_fd = - 1;
printf("tmp address:0x%lX\n", (unsigned long)&tmp);
FILE_TO_BUF(REGISTERINFO, cr_fd);
printf("%s", buf );
while(1);
return 0;
}
5、64位地址映射分析
5.1.段映射
5.1.1、内核定义段描述符表
arch/x86/kernel/cpu/common.c
87 #ifdef CONFIG_X86_64
88 /*
89 * We need valid kernel segments for data and code in long mode too
90 * IRET will check the segment types kkeil 2000/10/28
91 * Also sysret mandates a special GDT layout
92 *
93 * TLS descriptors are currently at a different place compared to i386.
94 * Hopefully nobody expects them at a fixed place (Wine?)
95 */
96 [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
97 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
98 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
99 [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
100 [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
101 [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
102 #else
103 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
104 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
105 [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
106 [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
/*a是低32位,b是高32位*/
37 #define GDT_ENTRY_INIT(flags, base, limit) { { { \
38 .a = ((limit) & 0xffff) | (((base) & 0xffff) << 16), \
39 .b = (((base) & 0xff0000) >> 16) | (((flags) & 0xf0ff) << 8) | \
40 ((limit) & 0xf0000) | ((base) & 0xff000000), \
41 } } }
上述代码段的段描述符就是:
GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff)就变为 0x00 cff3 000000 ffff
5.1.2、实验验证
linux/x86/include/asm/segment.h
162 #define GDT_ENTRY_DEFAULT_USER32_CS 4
163 #define GDT_ENTRY_DEFAULT_USER_DS 5
164 #define GDT_ENTRY_DEFAULT_USER_CS 6
165 #define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
166 #define __USER32_DS __USER_DS
187 #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
188 #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
189 #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3)
190 #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3)
故__USER_DS = 5*8 + 3 = 43
0000 0000 00101 011 (段索引是5)
我们的测试代码中gdt的逻辑地址是gdtr address:0xFFFF88011FC84000,对应的物理地址(内核start地址是0xFFFF880000000000):0x11fc84000(线性空间堆栈是直接映射到内核空间的)。以5为索引(每一项64位(8字节)),故物理地址(0x000011FC84028)处的内容即是段描述符信息:(FF FF 00 00 00 F3 CF 00),表示为64位段描述符即0x00 FCFF3 000000 FFFF(正是5.5.1中的ds段描述符信息)
可见段基地址是0(搞了半天,段映射根本没用呢),逻辑地址就是线性地址(虚拟地址)0x7FFF363E1158
5.2.页映射
tmp 48bit virtual address:0x7FFF363E1158
011111111 111111100 110110001 111100001 000101011000
0ff 1fc 1b1 1e1 158
计算过程:
cr3=BD3F8000
CR3寄存器的值为0xBD3F8000,这是第一级映射表的起始物理地址。
1.(0ff*8)7f8 + 0xBD3F8000 = 0xBD3F87f8(第二级映射表存放在该物理地址处,*8表示每一项8字节(64位地址值))
67 90 F1 A6 00 00 00 00 ------> 00000000 A6F19067 ----------->0xA6F19000(52位用作物理地址,后12位067是页属性)(第二级映射(上层页目录表)的物理地址)
2.(1fc*8)fe0 + 0xA6F19000 = 0xA6F19fe0
67 C0 D4 A6 00 00 00 00 ------> 00000000 A6D4C067 ----------->0xA6D4C000(52位用作物理地址,后12位067是页属性)(第三级映射(中间页目录表)的物理地址)
3.(1b1*8)d88 + 0xA6D4C000 = 0xA6D4Cd88
67 30 4C 8D 00 00 00 00 ------> 00000000 8D4C3067 ----------->0x8D4C3000(52位用作物理地址,后12位067是页属性)(第四级映射(页表)的物理地址)
4.(1e1*8)f08 + 0x8D4C3000 = 0x8D4C3f08
67 50 6A 76 00 00 00 00 ------> 00000000 766A5067 ----------->0x766A5000(52位用作物理地址,后12位067是页属性)(第四级映射(页表项(页面))的物理地址)
5.158 + 0x766A5000 = 0x766A5158
DE 5D AF BE 78 56 34 12 (0x12345678beaf5dde)
得到验证了,果然是tmp的值,这就是整个逻辑地址到物理地址映射的全过程
这是那个读cr3寄存器的内核实现
arch/x86/include/asm/system.h
312 #define read_cr3() (native_read_cr3())
246 static inline unsigned long native_read_cr3(void)
247 {
248 unsigned long val;
249 asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
250 return val;
251 }