4.1.2 解压缩内核
解压缩内核使用的是decompress_kernel函数,来自arch/x86/boot/compressed/misc.c:
301asmlinkage void decompress_kernel(void *rmode, memptr heap, 302 unsigned char *input_data, 303 unsigned long input_len, 304 unsigned char *output) 305{ 306 real_mode = rmode; 307 308 if (real_mode->hdr.loadflags & QUIET_FLAG) 309 quiet = 1; 310 311 if (real_mode->screen_info.orig_video_mode == 7) { 312 vidmem = (char *) 0xb0000; 313 vidport = 0x3b4; 314 } else { 315 vidmem = (char *) 0xb8000; 316 vidport = 0x3d4; 317 } 318 319 lines = real_mode->screen_info.orig_video_lines; 320 cols = real_mode->screen_info.orig_video_cols; 321 322 free_mem_ptr = heap; /* Heap */ 323 free_mem_end_ptr = heap + BOOT_HEAP_SIZE; 324 325 if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) 326 error("Destination address inappropriately aligned"); 327#ifdef CONFIG_X86_64 328 if (heap > 0x3fffffffffffUL) 329 error("Destination address too large"); 330#else 331 if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) 332 error("Destination address too large"); 333#endif 334#ifndef CONFIG_RELOCATABLE 335 if ((unsigned long)output != LOAD_PHYSICAL_ADDR) 336 error("Wrong destination address"); 337#endif 338 339 if (!quiet) 340 putstr("/nDecompressing Linux... "); 341 decompress(input_data, input_len, NULL, NULL, output, NULL, error); 342 parse_elf(output); 343 if (!quiet) 344 putstr("done./nBooting the kernel./n"); 345 return; 346} |
看到他的参数,共有5个,是刚才依次压栈而得到的。第一个是压入的output(还记得吧,倒着来的),在刚才的131行看到的,来自z_extract_offset_negative,用于作为解压缩的缓存首地址;第二个,input_len,其值等于z_input_len,表示待压缩内核大小;第三个,input_data,来自input_data,表示待压缩内核地址;第四个参数,heap,32位下是32位,来自boot_heap,表示解压缩阶段的堆;最后一个参数,rmode,来自我们刚才用到的esi寄存器,表示刚才拷贝之前内核映像地址。
假设我配置的是CONFIG_KERNEL_BZIP2,那么会调用顶层lib/decompress_bunzip2.c中的decompress函数,最终会调用位于同一文件的bunzip2函数(注意,在我们制作bzImage的时候使用的压缩程序只有一个,如果指定的bunzip2,那么都在decompress_bunzip2.c里):
674/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data, 675 not end of file.) */ 676STATIC int INIT bunzip2(unsigned char *buf, int len, 677 int(*fill)(void*, unsigned int), 678 int(*flush)(void*, unsigned int), 679 unsigned char *outbuf, 680 int *pos, 681 void(*error_fn)(char *x)) 682{ 683 struct bunzip_data *bd; 684 int i = -1; 685 unsigned char *inbuf; 686 687 set_error_fn(error_fn); 688 if (flush) 689 outbuf = malloc(BZIP2_IOBUF_SIZE); 690 691 if (!outbuf) { 692 error("Could not allocate output bufer"); 693 return RETVAL_OUT_OF_MEMORY; 694 } 695 if (buf) 696 inbuf = buf; 697 else 698 inbuf = malloc(BZIP2_IOBUF_SIZE); 699 if (!inbuf) { 700 error("Could not allocate input bufer"); 701 i = RETVAL_OUT_OF_MEMORY; 702 goto exit_0; 703 } 704 i = start_bunzip(&bd, inbuf, len, fill); 705 if (!i) { 706 for (;;) { 707 i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE); 708 if (i <= 0) 709 break; 710 if (!flush) 711 outbuf += i; 712 else 713 if (i != flush(outbuf, i)) { 714 i = RETVAL_UNEXPECTED_OUTPUT_EOF; 715 break; 716 } 717 } 718 } 719 /* Check CRC and release memory */ 720 if (i == RETVAL_LAST_BLOCK) { 721 if (bd->headerCRC != bd->totalCRC) 722 error("Data integrity error when decompressing."); 723 else 724 i = RETVAL_OK; 725 } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) { 726 error("Compressed file ends unexpectedly"); 727 } 728 if (!bd) 729 goto exit_1; 730 if (bd->dbuf) 731 large_free(bd->dbuf); 732 if (pos) 733 *pos = bd->inbufPos; 734 free(bd); 735exit_1: 736 if (!buf) 737 free(inbuf); 738exit_0: 739 if (flush) 740 free(outbuf); 741 return i; 742} |
bunzip2函数的第一、二个参数是待压缩内核的首地址和长度;第三、四是两个函数参数,传进来的时候为空;第五个参数是解压缩后的地址,也是刚才传进来的。683行首先初始化一个指向bunzip_data数据结构的指针bd,待会再去说他。我们没有定义flush函数,所以定义一个局部指针inbuf指向刚才传递进来的待压缩内核首地址buf,随后704行调用函数start_bunzip:
626static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, 627 int (*fill)(void*, unsigned int)) 628{ 629 struct bunzip_data *bd; 630 unsigned int i, j, c; 631 const unsigned int BZh0 = 632 (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16) 633 +(((unsigned int)'h') << 8)+(unsigned int)'0'; 634 635 /* Figure out how much data to allocate */ 636 i = sizeof(struct bunzip_data); 637 638 /* Allocate bunzip_data. Most fields initialize to zero. */ 639 bd = *bdp = malloc(i); 640 if (!bd) 641 return RETVAL_OUT_OF_MEMORY; 642 memset(bd, 0, sizeof(struct bunzip_data)); 643 /* Setup input buffer */ 644 bd->inbuf = inbuf; 645 bd->inbufCount = len; 646 if (fill != NULL) 647 bd->fill = fill; 648 else 649 bd->fill = nofill; 650 651 /* Init the CRC32 table (big endian) */ 652 for (i = 0; i < 256; i++) { 653 c = i << 24; 654 for (j = 8; j; j--) 655 c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1); 656 bd->crc32Table[i] = c; 657 } 658 659 /* Ensure that file starts with "BZh['1'-'9']." */ 660 i = get_bits(bd, 32); 661 if (((unsigned int)(i-BZh0-1)) >= 9) 662 return RETVAL_NOT_BZIP_DATA; 663 664 /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of 665 uncompressed data. Allocate intermediate buffer for block. */ 666 bd->dbufSize = 100000*(i-BZh0); 667 668 bd->dbuf = large_malloc(bd->dbufSize * sizeof(int)); 669 if (!bd->dbuf) 670 return RETVAL_OUT_OF_MEMORY; 671 return RETVAL_OK; 672} |
由于bdp和fill都是空的,所以传入这个函数的有效的仅仅是待压缩内核首地址及其长度,内部变量i是数据结构bunzip_data的长度。而631行BZh0常量很重要,就是著名的bz压缩常量。我们看到这段代码:
const unsigned int BZh0 =
(((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
+(((unsigned int)'h') << 8)+(unsigned int)'0';
就是压缩常量的值,'B'、'Z'、'h'、'0'是ASCII码,计数出来就是0x48<<24 +0x5A<<16+0x68最后等于0x48166800。
继续走,分配i个内存单元,由指针bd指上,同时bunzip2函数的内部变量bd也指到它了,然后642行将该结构清零(这些都是规定动作,值得我们程序员学习)。看了我们不得不把数据结构bunzip_data列出来了:
91/* Structure holding all the housekeeping data, including IO buffers and 92 memory that persists between calls to bunzip */ 93struct bunzip_data { 94 /* State for interrupting output loop */ 95 int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; 96 /* I/O tracking data (file handles, buffers, positions, etc.) */ 97 int (*fill)(void*, unsigned int); 98 int inbufCount, inbufPos /*, outbufPos*/; 99 unsigned char *inbuf /*,*outbuf*/; 100 unsigned int inbufBitCount, inbufBits; 101 /* The CRC values stored in the block header and calculated from the 102 data */ 103 unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC; 104 /* Intermediate buffer and its size (in bytes) */ 105 unsigned int *dbuf, dbufSize; 106 /* These things are a bit too big to go on the stack */ 107 unsigned char selectors[32768]; /* nSelectors = 15 bits */ 108 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ 109 int io_error; /* non-zero if we have IO error */ 110}; |
644和645行初始化bunzip_data的inbuf和inbufCount字段。随后给crc32Table字段赋值,根据注释说,这个字段是存储块的头,随后调用get_bits函数:
113/* Return the next nnn bits of input. All reads from the compressed input 114 are done through this function. All reads are big endian */ 115static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted) 116{ 117 unsigned int bits = 0; 118 119 /* If we need to get more data from the byte buffer, do so. 120 (Loop getting one byte at a time to enforce endianness and avoid 121 unaligned access.) */ 122 while (bd->inbufBitCount < bits_wanted) { 123 /* If we need to read more data from file into byte buffer, do 124 so */ 125 if (bd->inbufPos == bd->inbufCount) { 126 if (bd->io_error) 127 return 0; 128 bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE); 129 if (bd->inbufCount <= 0) { 130 bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF; 131 return 0; 132 } 133 bd->inbufPos = 0; 134 } 135 /* Avoid 32-bit overflow (dump bit buffer to top of output) */ 136 if (bd->inbufBitCount >= 24) { 137 bits = bd->inbufBits&((1 << bd->inbufBitCount)-1); 138 bits_wanted -= bd->inbufBitCount; 139 bits <<= bits_wanted; 140 bd->inbufBitCount = 0; 141 } 142 /* Grab next 8 bits of input from buffer. */ 143 bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; 144 bd->inbufBitCount += 8; 145 } 146 /* Calculate result */ 147 bd->inbufBitCount -= bits_wanted; 148 bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1); 149 150 return bits; 151} |
这个函数比较简单,一开始inbufBitCount肯定小于32,因为被清零了。由于是解压缩阶段,bd->inbufPos也为0,不可能等于inbufCount。所以第一次循环直接来到143行,第一次循环,bd->inbufBits也就是bd->inbuf[bd->inbufPos++]的值,bd->inbufPos加加之后就变成1了,所以bd->inbufBits = bd->inbuf[1],144行bd->inbufBitCount为8了;
第二次循环,bd->inbufBitCount为8还是小于32,也小于24,所以又赋值bd->inbufBits为bd->inbuf[1] << 8|bd->inbuf[2],bd->inbufPos加加之后就变成2了,bd->inbufBitCount成了16。
第三次循环,bd->inbufBitCount为16还是小于32,也小于24,所以又赋值bd->inbufBits为bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3],bd->inbufPos加加之后就变成3了,bd->inbufBitCount成了24。
第四次循环,bd->inbufBitCount为24了,还是小于32,但是要进入136行的条件块。只不过得到了一个bits值,这个值是:
bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]&(1 << bd->inbufBitCount)-1)
换算过来就是bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]&0xfffff。最后139行再把这么长串左移8位(此时bits_wanted在138行做了个运算:32-24=8)。最后转了这么久,bd->inbufBitCount又为0了。是不是要进入一个死循环了呢?没有,143行bd->inbufBits继续走,bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]|bd->inbuf[4],但是bits_wanted却是变成8了,所以跳出循环了。现在感受到内核代码的变态了吧,下面的更变态:bd->inbufBitCount被清零了,bits,也就是刚才那么长一串还要继续去|= (bd->inbufBits >> 8)&((1 << 8)-1)。
最后返回这个bits值。这个值是多少,我实在是不知道,只有机器知道。但根据注释我们知道这个值的意思是待压缩内核压缩组合块的数量,压缩程序是按4个字节一组进行压缩的,每个字节按位移动8位,4个字节压缩成一个块,称为压缩组合块。以我的水平只能分析到这种程度了,请吃透了这里代码的同志联系我啊,一定会请你喝酒的。
回到start_bunzip中,刚才bits返回给了i,660行i-BZh0-1就是0-0x48166800-1转换成无符号整形,就是0x48166801,最后i-0x48166801肯定是小于9的,不然就不可能继续走了,我们假设这个值为8。最后666行和668行给bd->dbufSize和bd->dbuf赋值,其长度就是800k。
最后返回到bunzip2中,start_bunzip最后返回的RETVAL_OK为0,所以进入705这个条件语句中,一来就进入read_bunzip函数:
/* Undo burrows-wheeler transform on intermediate buffer to produce output. 514 If start_bunzip was initialized with out_fd =-1, then up to len bytes of 515 data are written to outbuf. Return value is number of bytes written or 516 error (all errors are negative numbers). If out_fd!=-1, outbuf and len 517 are ignored, data is written to out_fd and return is RETVAL_OK or error. 518*/ 519 520static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len) 521{ 522 const unsigned int *dbuf; 523 int pos, xcurrent, previous, gotcount; 524 525 /* If last read was short due to end of file, return last block now */ 526 if (bd->writeCount < 0) 527 return bd->writeCount; 528 529 gotcount = 0; 530 dbuf = bd->dbuf; 531 pos = bd->writePos; 532 xcurrent = bd->writeCurrent; 533 534 /* We will always have pending decoded data to write into the output 535 buffer unless this is the very first call (in which case we haven't 536 Huffman-decoded a block into the intermediate buffer yet). */ 537 538 if (bd->writeCopies) { 539 /* Inside the loop, writeCopies means extra copies (beyond 1) */ 540 --bd->writeCopies; 541 /* Loop outputting bytes */ 542 for (;;) { 543 /* If the output buffer is full, snapshot 544 * state and return */ 545 if (gotcount >= len) { 546 bd->writePos = pos; 547 bd->writeCurrent = xcurrent; 548 bd->writeCopies++; 549 return len; 550 } 551 /* Write next byte into output buffer, updating CRC */ 552 outbuf[gotcount++] = xcurrent; 553 bd->writeCRC = (((bd->writeCRC) << 8) 554 ^bd->crc32Table[((bd->writeCRC) >> 24) 555 ^xcurrent]); 556 /* Loop now if we're outputting multiple 557 * copies of this byte */ 558 if (bd->writeCopies) { 559 --bd->writeCopies; 560 continue; 561 } 562decode_next_byte: 563 if (!bd->writeCount--) 564 break; 565 /* Follow sequence vector to undo 566 * Burrows-Wheeler transform */ 567 previous = xcurrent; 568 pos = dbuf[pos]; 569 xcurrent = pos&0xff; 570 pos >>= 8; 571 /* After 3 consecutive copies of the same 572 byte, the 4th is a repeat count. We count 573 down from 4 instead *of counting up because 574 testing for non-zero is faster */ 575 if (--bd->writeRunCountdown) { 576 if (xcurrent != previous) 577 bd->writeRunCountdown = 4; 578 } else { 579 /* We have a repeated run, this byte 580 * indicates the count */ 581 bd->writeCopies = xcurrent; 582 xcurrent = previous; 583 bd->writeRunCountdown = 5; 584 /* Sometimes there are just 3 bytes 585 * (run length 0) */ 586 if (!bd->writeCopies) 587 goto decode_next_byte; 588 /* Subtract the 1 copy we'd output 589 * anyway to get extras */ 590 --bd->writeCopies; 591 } 592 } 593 /* Decompression of this block completed successfully */ 594 bd->writeCRC = ~bd->writeCRC; 595 bd->totalCRC = ((bd->totalCRC << 1) | 596 (bd->totalCRC >> 31)) ^ bd->writeCRC; 597 /* If this block had a CRC error, force file level CRC error. */ 598 if (bd->writeCRC != bd->headerCRC) { 599 bd->totalCRC = bd->headerCRC+1; 600 return RETVAL_LAST_BLOCK; 601 } 602 } 603 604 /* Refill the intermediate buffer by Huffman-decoding next 605 * block of input */ 606 /* (previous is just a convenient unused temp variable here) */ 607 previous = get_next_block(bd); 608 if (previous) { 609 bd->writeCount = previous; 610 return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; 611 } 612 bd->writeCRC = 0xffffffffUL; 613 pos = bd->writePos; 614 xcurrent = bd->writeCurrent; 615 goto decode_next_byte; 616} |
看到这个函数,我都冒冷汗了。具体的我实在没能力去分析他了,感兴趣的同志可以帮帮我,谢谢。最后解压后的程序会存放到outbuf开始的内存中。这个函数结束后,bunzip2也就带着结束了RETVAL_OK结束了,decompress也就结束了。回到boot/compressed/head_32.S的代码中:
141#if CONFIG_RELOCATABLE
……
165#endif
由于我们.config没有CONFIG_RELOCATABLE,所以不去详细分析141~165行的代码,直接来到170行:
170 xorl %ebx, %ebx
171 jmp *%ebp
开始执行解压缩后的第一条代码,即第二个startup_32()函数。这个函数主要是为第一个Linux进程(进程0)建立执行环境。该函数主要执行以下操作:
1. 把段寄存器初始化为最终值。
2. 把内核的bss段填充为0。
3. 初始化包含在swapper_pg_dir的临时内核页表,并初始化pg0,以使线性地址一致地映射同一物理地址。
4. 把页全局目录的地址存放在cr3寄存器中,并通过设置cr0寄存器的PG位启用分页。
5. 把从BIOS中获得的系统参数和传递给操作系统的参数boot_params放入第一个页框中。
6. 为进程0建立内核态堆栈。
7. 该函数再一次清零eflags寄存器的所有位。
8. 调用setup_idt用空的中断处理程序填充中断描述符表IDT。
9. 识别处理器的型号。
10. 用编译好的GDT和IDT表的地址来填充gdtr和idtr寄存器。
11. 初始化虚拟机监视器Xen。
12. 向start_kernel()函数进发。
文章出自:http://blog.csdn.net/yunsongice/article/details/6110667