程序员的自我修养一书中,在最后一章专门介绍一款小型的C语言运行库,并给出了详细的代码实现。阅读该代码实现,可以对C语言运行库提供的语言抽象层得到更深的理解。Talk is cheap, show code!
minicrt.h: MiniCRT的文件头
#ifndef __MINI_CRT_H__
#define __MINI_CRT_H__
#endif
/*定义C++相关的函数,以使得函数的兼容性得到满足,但是依旧按照C的修饰规则来进行导出函数名修饰,即使用extern "C"*/
#ifdef __cplusplus
extern "C" {
#endif
//malloc
#ifndef NULL
#define NULL (0)
#endif
void free(void* ptr);
void* malloc( unsigned size );
static int brk( void* end_data_segment );
int mini_crt_init_heap();
//字符串
char* itoa(int n, char* str, int radix);
int strcmp(const char* src, const char* dst);
char* strcpy(char* dest, const char* src);
unsigned strlen(const char* sr);
//文件与IO
typedef int FILE;
#define EOF (-1)
/*FILE* 这个类型在Windows下实际上是内核句柄,要通过GetStdHandle的Windows API获得
而在Linux下则是文件描述符,标准输入输出是0,1,2,并不是指向FILE结构的地址*/
#ifdef WIN32
#define stdin ( (FILE*) (GetStdHandle(STD_INPUT_HANDLE)) )
#define stdout ( (FILE*) (GetStdHandle(STD_OUTPUT_HANDLE)) )
#define stderr ( (FILE*) (GetStdHandle (STD_ERROR_HANDLE)) )
#else
#define stdin ((FILE*) 0)
#define stdout ((FILE*) 1)
#define stderr ((FILE*) 2)
#endif
int mini_crt_init_io(); //省略了缓冲等诸多内容,miniCRT的IO基本无需初始化(即无需给打开文件列表进行空间分配),故而这个函数其实就是个空函数
FILE* fopen( const char* filename, const char* mode);
int fread(void* buffer, int size, int count, FILE* stream);
int fwrite(const void* bufffer, int size, int count, FILE* stream);
int fclose(FILE* fp);
int fseek(FILE* fp, int offset, int set);
//printf
int fputc( int c, FILE* stream );
int fputs( const char* str, FILE* stream);
int printf( const char* format, ...);
int fprintf(FILE* stream, const char* format,...);
//internal
void do_global_ctors();
void mini_crt_call_exit_routine();
//atexit
typedef void (*atexit_func_t) (void);
int atexit(atexit_func_t func);
#ifdef __cplusplus
}
#endif
entry.c:运行库入口函数定义
#include "minicrt.h"
#ifdef WIN32
#include <Windows.h>
#endif
//entry.c
extern int main(int argc, char* argv[]);//申明程序员编写的程序中的main函数
void exit(int);
static void crt_fatal_error(const char* msg)
{
//printf("fatal error:%s", msg);
exit(1);
}
void mini_crt_entry(void)
{
int ret;
#ifdef WIN32
int flag = 0;
int argc = 0;
char* argv[16]; //最多16个参数
int sequentialSpaceNum = 0; //连续空格数目
char* cl = GetCommandLineA(); //对于windows系统提供GetCommandLine这个API来返回整体命令行参数字符串
//解析命令行
argv[0] = cl;
argc++;
while(*cl) {
if(*cl == '\"'){ //在字符串中的空格是有效的字符,所以需要设计标识位
if(flag==0) flag=1;
else flag = 0;
}
//如果不是在字符串期间遇到空格,则需要剔除空格
else if(*cl == ' ' && flag == 0) {
sequentialSpaceNum = 1; //空格数目为1
while( *(cl+sequentialSpaceNum) == ' '){
sequentialSpaceNum++;
}
if( *(cl+sequentialSpaceNum) ) {
argv[argc] = cl + sequentialSpaceNum;
argc++;
}
*cl = '\0';
cl = cl + sequentialSpaceNum;
}
cl++;
}
#else
int argc;
char** argv;
char* ebp_reg;
asm(" movl %%ebp, %0 \n\t":"=r"(ebp_reg));
argc = *(int*)(ebp_reg + 4);
argv = (char**)(ebp_reg + 8);
#endif
if (!mini_crt_init_heap())
crt_fatal_error("heap initialize failed");
if (!mini_crt_init_io())
crt_fatal_error("IO initialize failed");
ret = main(argc, argv);
exit(ret);
}
void exit(int exitCode)
{
//mini_crt_call_exit_routine();
#ifdef WIN32
ExitProcess(exitCode);
#else
asm("movl %0, %%ebx \n\t"
"movl $1, %%eax \n\t"
"int $0x80 \n\t"
"hlt \n\t"::"m"(exitCode));
#endif
}
malloc.c:堆的初始化和功能实现
//堆的实现
/*在遵循Mini CRT的原则下,我们将Mini CRT堆的实现归纳为以下几条
1.实现一个以空闲链表算法为基础的堆空间分配算法;
2.为了简单起见,堆空间大小固定为32MB,初始化后空间不再扩展或缩小;
3.在Windows平台下不适用HeapAlloc等堆分配算法,采用VirtualAlloc 向系统直接申请32MB空间,由我们自己的堆分配算法实现malloc
4.在Linux平台下,使用brk将数据段结束地址向后调整32MB,将这块空间作为堆空间
*/
/*
brk系统调用可以设置进程的数据段.data边界,而sbrk可以移动进程的数据段边界,显然如果将数据段边界后移,就相当于分配了一定量的内存。但是这段内存初始只是分配了虚拟空间,这些空间的申请一开始是不会提交的(即不会分配物理页面),当进程师徒访问一个地址的时候,操作系统会检测到页缺少异常,从而会为被访问的地址所在的页分配物理页面。
故而这种被动的物理分配,又被称为按践踏分配,即不打不动。
*/
#include "minicrt.h"
typedef struct _heap_header
{
enum{
HEAP_BLOCK_FREE = 0xABABABAB, //空闲块的魔数
HEAP_BLOCK_USED = 0xCDCDCDCD, //占用快的魔数
}type;
unsigned size; //块的尺寸包括块的信息头
struct _heap_header* next;
struct _heap_header* prev;
}heap_header;
#define ADDR_ADD(a,o) ( ((char*) (a)) + o)
#define HEADER_SIZE (sizeof(heap_header))
static heap_header* list_head = NULL;
void free(void* ptr)
{
heap_header* header = (heap_header*) ADDR_ADD(ptr, -HEADER_SIZE);
if(header->type != HEAP_BLOCK_USED)
return;
header->type = HEAP_BLOCK_FREE;
if(header->prev != NULL && header->prev->type == HEAP_BLOCK_FREE) {
//释放块的前一个块也正好为空
header->prev->next = header->next;
if(header->next != NULL)
header->next->prev = header->prev;
header->prev->size += header->size;
header = header->prev;
}
if(header->next != NULL && header->next->type == HEAP_BLOCK_FREE) {
//释放块的后一个块也是空块
header->size += header->next->size;
header->next = header->next->next;
}
}
void* malloc( unsigned size )
{
heap_header* header;
if(size == 0)
return NULL;
header = list_head;
// fputs("\ninside the malloc\n", stdout);
// fputs("outside the malloc-fuck you asshole\n", stdout);
while(header != 0) {
if (header->type == HEAP_BLOCK_USED) {
header = header->next;
continue;
}
//刚好碰到一个空闲快,且其块的大小大于所需size加上一个信息头尺寸,但是小于所需size加上两个信息头尺寸,即剩余的内部碎片就算分离出来,也没有利用价值了,直接整个块都分配给used,等待整体释放
if (header->size > size + HEADER_SIZE &&
header->size <= size + HEADER_SIZE*2)
{
header->type = HEAP_BLOCK_USED;
return ADDR_ADD(header, HEADER_SIZE);
}
//空闲块空间足够,且剩余的内部碎片分离出来还可以再使用
if (header->size > size + HEADER_SIZE * 2) {
//split
heap_header* next = (heap_header*) ADDR_ADD(header, size+HEADER_SIZE);
next->prev = header;
next->next = header->next;
next->type = HEAP_BLOCK_FREE;
next->size = header->size - (size + HEADER_SIZE); //此处有误吧
if (header->next != NULL)
header->next->prev = next;
header->next = next;
header->size = size + HEADER_SIZE;
header->type = HEAP_BLOCK_USED;
return ADDR_ADD(header, HEADER_SIZE);
};
header = header->next;
}
// fputs("outside the malloc-fuck you asshole\n", stdout);
//delete header;
return NULL;
}
#ifndef WIN32
//Linux brk system call
static int brk(void* end_data_segment) {
int ret = 0;
//brk system call number:45
//in /usr/include/asm-i386/unistd.h:
//#define __NR_brk 45
asm("movl $45, %%eax \n\t"
"movl %1, %%ebx \n\t"
"int $0x80 \n\t"
"movl %%eax, %0 \n\t"
:"=r"(ret):"m"(end_data_segment) );
}
#endif
#ifdef WIN32
#include <Windows.h>
#endif
int mini_crt_init_heap()
{
void* base = NULL;
heap_header* header = NULL;
//32MB heap size
unsigned heap_size = 1024*1024*32;
//以base为起点分配32MB的内存空间
#ifdef WIN32
base = VirtualAlloc(0, heap_size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
if (base == NULL)
return 0;
#else
base = (void*)brk(0);
void* end = ADDR_ADD(base, heap_size);
end = (void*)brk(end);
if(!end)
{
printf("Linux create heap fail\n");
return 0;
}
#endif
header = (heap_header*) base;
header->size = heap_size;
header->type = HEAP_BLOCK_FREE;
header->next = NULL;
header->prev = NULL;
list_head = header;
if(header == 0){
printf("Linux create heap fail\n");
return 0;
}
return 1;
}
string.c:字符串相关函数的封装和实现
/*这部分实现的是字符串相关的操作,主要是包括计算字符串长度、比较两个字符串、整数与字符串之间的转换等,由于这部分无需涉及任何和内核的交互,是纯粹的用户态的计算。实现较为简单*/
#include "minicrt.h"
char* itoa(int n, char* str, int radix)
{
char digit[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
char* p = str;
char* head = str;
if(!p || radix < 2 || radix > 36) //radix代表是几进制
return p;
if(radix != 10 && n < 0)
return p;
if (n == 0) //如果要转换的数字n为0,则直接在输出字符串中直接输出
{
*p++ = '0';
*p = 0;
return p;
}
if (radix == 10 && n < 0) //如果是10进制,且为负数,则先添加负号,然后转正留待后续处理
{
*p++ = '-';
n = -n;
}
while (n)
{
*p++ = digit[n % radix];
n /= radix;
}
*p = 0; //数字转换完了,末尾添加0
//上面的数字字符串是倒序的,这里将数字字符串倒过来
for (--p; head<p; ++head, --p)
{
char temp = *head;
*head = *p;
*p = temp;
}
return str;
}
int strcmp (const char* src, const char* dst)
{
int ret = 0;
unsigned char* p1 = (unsigned char*)src;
unsigned char* p2 = (unsigned char*)dst;
while( !(ret = *p1 - *p2) && *p2)
{
++p1, ++p2;
}
if (ret < 0)
ret = -1;
else if(ret > 0)
ret = 1;
return(ret);
}
char* strcpy(char *dest, const char* src)
{
char* ret = dest;
while(*src)
*dest++ = *src++;
*dest = '\0';//字符串拷贝完后,手动在末尾添加\0
return ret; //返回copy后的字符串的首
}
unsigned strlen(const char* str)
{
int cnt = 0;
if (!str)
return 0;
for (; (int)*str != 204 && *str != '\0'; ++str){
//printf("inside the for-loop %d\n", cnt);
++cnt;
}
//printf("in the strlen return %d\n", cnt);
return cnt;
}
int fputc(int c, FILE* stream)
{
if( fwrite(&c, 1, 1, stream) != 1)
return EOF;
else
return c;
}
int fputs(const char* str, FILE* stream)
{
// fputc('\n', stream);
int len = strlen(str);
if (fwrite(str,1,len, stream) != len)
return EOF;
else
return len;
}
printf.c:printf输出函数的封装和实现
#include "minicrt.h"
#ifndef WIN32
#define va_list char*
#define va_start(ap,arg) ( ap = (va_list)&arg + sizeof(arg))
#define va_arg(ap, t) ( *(t*) ( (ap+=sizeof(t)) - sizeof(t) ) )
#define va_end(ap) ( ap = (va_list) 0)
#else
#include <Windows.h>
#endif
//Mini CRT 中并不支持特殊的格式操作,仅支持%d和%s两种简单的转换
int vfprintf(FILE* stream, const char* format, va_list arglist )
{
int translating = 0;
int ret = 0; //记录最终输出的字符个数
const char* p = 0;
// fputs("***********entry the Vprintf*********\n",stream);
for (p = format; *p && *p != '\0'; ++p)
{
switch (*p)
{
case '%':
if (!translating)
translating = 1; //translating置为1,代表后面的字符需要解析
else
{
if (fputc('%', stream) < 0)
return EOF;
++ret;
translating = 0;
}
break;
case 'd':
if (translating) //%d
{
char buf[16] = {0};
translating = 0;
itoa( va_arg(arglist, int), buf, 10);
if (fputs(buf, stream) < 0)
return EOF;
ret += strlen(buf);
free(buf);
//buf = 0;
}
else if (fputc('d', stream) < 0)
return EOF;
else
++ret;
break;
case 's':
if (translating) //%s
{
const char* str = va_arg(arglist, const char*);
translating = 0;
if (fputs(str, stream) < 0)
return EOF;
ret += strlen(str);
}
else if (fputc ('s' , stream) < 0)
return EOF;
else
++ret;
break;
default:
if (translating)
translating = 0;
if ( fputc(*p, stream) < 0 )
return EOF;
else
++ret;
break;
}
}
// fputs("************Get out the Vprintf*****************\n", stream);
return ret;
}
int printf(const char* format, ...)
{
//fputs("entry the printf*************\n", stdout);
va_list(arglist);
va_start(arglist, format);
return vfprintf(stdout, format, arglist);
}
int fprintf(FILE* stream, const char* format, ...)
{
va_list(arglist);
va_start(arglist, format);
return vfprintf(stream, format, arglist);
}
stdio.c:IO初始化和函数实现
//stdio.h
/*
1.为了简单起见,这里miniCRT不实现此前介绍的附带的buffer机制,不对Windows下的换行机制进行转换,即\r\n与\n不进行转换。
2.在Windows下,文件基本操作使用了Windows API(CreateFile,ReadFile,WriteFile,CloseHandle,SetFilePointer)
3.在Linux下,则使用系统调用open\read\write\close\seek
4.fopen时仅区分"r""w""+"这几种模式及它们的组合,不对文本模式和二进制模式进行区分,不支持追加模式("a")。
*/
#include "minicrt.h"
int mini_crt_init_io()
{
return 1;
}
#ifdef WIN32
#include <Windows.h>
FILE* fopen(const char* filename, const char* mode){
Handle hFile = 0;
int access = 0;
int creation = 0;
if(strcmp(mode, "w") == 0) {
access != GENERIC_WRITE;
creation |= CREATE_ALWAYS;
}
if(strcmp(mode, "w+") == 0) {
access |= GENERIC_WRITE | GENERIC_READ;
creation |= CREATR_ALWAYS;
}
if(strcmp(mode, "r") == 0) {
access |= GENERIC_READ;
creation += OPEN_EXISTING;
}
if(strcmp(mode, "r+") == 0) {
access |= GENERIC_WRITE | GENERIC_READ;
creation |= TRUNCATE_EXISTING;
}
hFile = CreateFileA(filename, access, 0, 0, creation, 0, 0);
if (hFile == INVALID_HANDLE_VALUE)
return 0;
return (FILE*)hFile;
}
int fread(void* buffer, int size, int count, FILE* stream)
{
int read = 0;
if (!ReadFile( (HANDLE)stream, buffer, size*count, &read, 0))
return 0;
return read;
}
/*
Windows API的ReadFILE()
BOOL ReadFile(
HANDLE hFile,//hFile为要读取的文件句柄,对应的是fread函数中stream参数
LPVOID lpBuffer,//lpBuffer是存储缓冲区的其实地址,对应fread函数中的buffer
DWORD nNumberofBytesToRead,//nNumberofBytesToRead代表要读取字节总数,等于fread函数中count * elementSize
LPDWORD lpNumberofBytesRead,//lpNumberOfBytesRead代表一个指向DWORD类型的指针,用来表示读取了多少个字节
LPOVERLAPPED lpOverlapped//lpOverlapped没用
);
*/
int fwrite(const void* buffer, int size, int count, FILE* stream)
{
int written = 0;
if (!WriteFile( (HANDLE)stream, buffer, size*count, &written, 0))
return 0;
return written;
}
int fclose(FILE* fp)
{
return CloseHandle((HANDLE)fp);
}
int fseek(FILE* fp, int offset, int set)
{
return SetFilePointer((HANDLE)fp, offset, 0, set);
}
/*
Windows API的SetFilePointer(
HANDLE hFile,//hFile为要读取的文件句柄,对应的是fread函数中stream参数
LONG IDistanceToMove, //偏移量(低位),指向64位偏移量的低32位
PLONG lpDistanceToMoveHigh, //偏移量(高位),指向64位偏移量的高32位
DWORD dwMoveMethod//基准位置,文件开始位置FILE_BEGIN/文件当前位置FILE_CURRENT/文件结束位置FILE_END
);
*/
#else //#ifdef WIN32
//movl这种AT&T汇编语言是UNIX下惯用的汇编语言Assembly Language
//mov这种intel汇编语言则是Windows下常用,
static int open(const char* pathname, int flags, int mode)
{
int fd = 0;
asm("movl $5, %%eax \n\t"
"movl %1, %%ebx \n\t"
"movl %2, %%ecx \n\t"
"movl %3, %%edx \n\t"
"int $0x80 \n\t"
"movl %%eax, %0 \n\t":
"=m"(fd):"m"(pathname), "m"(flags), "m"(mode) );
}
static int read( int fd, void* buffer, unsigned size)
{
int ret = 0;
asm("movl $3, %%eax \n\t"
"movl %1, %%ebx \n\t"
"movl %2, %%ecx \n\t"
"movl %3, %%edx \n\t"
"int $0x80 \n\t"
"movl %%eax, %0 \n\t":
"=m"(ret):"m"(fd), "m"(buffer), "m"(size) );
return ret;
}
static int write( int fd, const void* buffer, unsigned size)
{
int ret = 0;
asm("movl $4, %%eax \n\t"
"movl %1, %%ebx \n\t"
"movl %2, %%ecx \n\t"
"movl %3, %%edx \n\t"
"int $0x80 \n\t"
"movl %%eax, %0 \n\t":
"=m"(ret):"m"(fd), "m"(buffer), "m"(size) );
return ret;
}
static int close(int fd)
{
int ret = 0;
asm("movl $6, %%eax \n\t"
"movl %1, %%ebx \n\t"
"int $0x80 \n\t"
"movl %%eax, %0 \n\t":
"=m"(ret):"m"(fd) );
return ret;
}
static int seek(int fd, int offset, int mode)
{
int ret = 0;
asm("movl $19, %%eax \n\t"
"movl %1, %%ebx \n\t"
"movl %2, %%ecx \n\t"
"movl %3, %%edx \n\t"
"int $0x80 \n\t"
"movl %%eax, %0 \n\t":
"=m"(ret):"m"(fd), "m"(offset), "m"(mode) );
return ret;
}
FILE* fopen(const char* filename, const char* mode)
{
int fd = -1;
int flags = 0;
int access = 00700; //创建文件的权限
//来自于/usr/include/bits/fcntl.h
//注意:以0开始的数字是八进制的
#define O_RDONLY 00
#define O_WRONLY 01
#define O_RDWR 02
#define O_CREAT 0100
#define O_TRUNC 01000
#define O_APPEND 02000
if(strcmp(mode, "w") == 0)
flags |= O_WRONLY | O_CREAT | O_TRUNC;
if(strcmp(mode, "w+") == 0)
flags |= O_RDWR | O_CREAT | O_TRUNC;
if(strcmp(mode, "r") == 0)
flags |= O_RDONLY;
if(strcmp(mode, "r+") == 0)
flags |= O_RDWR | O_CREAT;
fd = open(filename, flags, access);
return (FILE*)fd;
}
int fread(void* buffer, int size, int count, FILE* stream)
{
return read( (int)stream, buffer, size*count );
}
int fwrite(const void* buffer, int size, int count, FILE* stream)
{
return write( (int)stream, buffer, size*count );
}
int fclose(FILE* fp)
{
return close( (int)fp);
}
int fseek(FILE* fp, int offset, int set)
{
return seek( (int)fp, offset, set);
}
#endif
test.c:程序员的测试代码
#include "minicrt.h"
int main(int argc, char* argv[]) //莫名奇妙的形参只能传递使用一次
{
int i;
FILE* fp;
char** v =(char**) malloc( argc * sizeof(char*) );
char** tempArgv = argv;
int tempArgc = argc;
for(i=0; i<argc; ++i)
{
v[i] = (char*) malloc( strlen(tempArgv[i]) + 1);
strcpy(v[i], tempArgv[i]);
}
fp = fopen("test.txt", "w");
for(i = 0; i<tempArgc; ++i)
{
int len = strlen(v[i]);
fwrite(&len, 1, sizeof(int), fp);
fwrite(v[i], 1, len, fp);
}
fclose(fp);
fp = fopen("test.txt", "r");
for(i=0; i<tempArgc; ++i)
{
int len;
char* buf;
fread(&len, 1, sizeof(int), fp);
buf =(char*) malloc(len + 1);
fread(buf, 1, len, fp);
buf[len] = '\0';
printf("%d %s\n", len, buf);
free(buf);
free(v[i]);
}
fclose(fp);
}
MiniCRT运行库设计成可以兼容Linux和Windows,故而运行该程序需要分系统讨论
Linux下运行命令
$gcc -c -fno-builtin -nostdlib -fno-stack-protector entry.c malloc.c stdio.c string.c printf.c -m32 -g
$gcc -c -ggdb -fno-builtin -nostdlib -fno-stack-protector test.c -m32 -g
$ar -rs minicrt.a malloc.o printf.o stdio.o string.o
$ld -static -e mini_crt_entry entry.o test.o minicrt.a -o test -m elf_i386
-fno-builtin参数:关闭GCC的内置函数功能,默认情况下GCC会把strlen\strcmp等函数展开成它内部的实现;
-nostdlib:表示不适用任何来自Glibc、GCC的库文件和启动文件,它包含了-nostartfiles这个参数;
-fno-stack-protector:关闭堆栈保护功能,最新版本的GCC在处理变长参数函数的情况下会要求实现对堆栈的保护函数;
由于系统是64位ubuntu,故而需要在上面注明-m32或m elf_i386。
运行结果应该如下:
$ ./test arg1 arg2 124
6 ./test
4 arg1
4 arg2
3 124
Windows下运行命令
>cl /c /DWIN32 /GS- entry.c malloc.c printf.c stdio.c string.c
>lib entry.obj malloc.obj printf.obj stdio.obj string.obj /OUT:minicrt.lib
>cl /c /DWIN32 test.c
>link test.obj minicrt.lib kernel32.lib /NODEFAULTLIB /entry:mini_crt_entry
/DWIN32:启用cl的宏定义功能,即定义WIN32这个宏,这是代码中区分平台的关键宏;
/GS-:关闭堆栈保护功能,否则会在链接截断发生”_security_cookie”和”_security_check_cookie”符号未定义错误。