[u-boot] uboot启动过程中的代码重定位-CSDN博客

本文链接：https://blog.csdn.net/qinmoge8586/article/details/147722219

Uboot中的代码重定位

为什么需要代码重定位

启用了SPL_BUILD、TPL_BUILD选项，uboot 完整镜像比较大，uboot刚启动时，DRAM还未初始化，无法直接将完整镜像加载到DRAM；只能加载一个spl.bin到sysram这种内存上初始化DDR。待DRAM可用后，需要将uboot重定位到DRAM运行
一开始uboot在低位地址执行，但通常kenrel也需要在低地址空间启动，因此uboot需要重定位到一段高位地址空间，为设备树、ramdisk、kernel image让路
Flash XIP，Flash一般速度较慢，为了执行速度，也需要重定位到RAM空间执行

链接地址(Link Address)

lds文件中，链接地址为0x0000_0000，实际上，很多SoC芯片中，RAM并不是从0地址开始的，uboot的加载地址也不在0地址上，为什么链接地址是0地址呢？

#include <config.h>
#include <asm/psci.h>

OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64")
OUTPUT_ARCH(aarch64)
ENTRY(_start)
SECTIONS
{
	. = 0x00000000;																			//这个地址就是链接地址吗？
	. = ALIGN(8);
	__image_copy_start = ADDR(.text);
	.text :
	{
		CPUDIR/start.o (.text*)
	}

	/* This needs to come before *(.text*) */
	.efi_runtime : {
                __efi_runtime_start = .;

	......

直到我在Makefile找到了这一句，.text段的链接地址是CONFIG_TEXT_BASE定义的地址

LDFLAGS_u-boot += -Ttext $(CONFIG_TEXT_BASE)			//代码段被链接到CONFIG_TEXT_BASE

加载地址(Load Address)和运行地址(Running Address)

早期的启动阶段、uboot还没打开MMU，即使后面MMU Enable的情况下，也是做的一一映射（虚拟地址 = 物理地址），在这个前提下，加载地址就是运行地址
uboot是被前级Bootloader加载到RAM上的，通常情况下，前级BL也是把镜像搬移到运行地址上去

uboot中的第一次重定位

__rel_dyn_start和__rel_dyn_end，这段地址空间是如何生成的？保存的是什么数据？如何进行的重定位？重定位的目的

		.rela.dyn : {
			__rel_dyn_start = .;
			*(.rela*)
			__rel_dyn_end = .;
		}

从uboot文档中得知：链接参数包含-pie才会生成.rela的数据

    To make relocation on arm working, the following changes are done:
    
    At arch level: add linker flag -pie
    
            This causes the linker to generate fixup tables .rel.dyn and .dynsym,
            which must be applied to the relocated image before transferring
            control to it.
    
            These fixups are described in the ARM ELF documentation as type 23
            (program-base-relative) and 2 (symbol-relative)

   pie_fixup:
   	/* 检查链接地址和运行地址是否相同，不一样的话，需要重定位符号表 */
   	adr	x0, _start							/*运行地址*/
   	ldr	x1, _TEXT_BASE						/*链接地址*/
   	subs	x9, x0, x1						/*运行地址和链接地址的偏移*/
   	beq	pie_fixup_done
   	/*对.rela段进行重定向*/
   	adrp    x2, __rel_dyn_start				/*.rela段的起始地址*/
   	add     x2, x2, #:lo12:__rel_dyn_start
   	adrp    x3, __rel_dyn_end				/*.rela段的结束地址*/
   	add     x3, x3, #:lo12:__rel_dyn_end
   pie_fix_loop:
   	ldp	x0, x1, [x2], #16					/*从x2地址处读取16B的数据，分别保存在x0和x1，x2 += 16*/
   	ldr	x4, [x2], #8						/*继续读8B数据，保存到x4*/
   	cmp	w1, #1027							/*判断x1的低32bit是否为1027，不是1027表示无需重定位*/
   	bne	pie_skip_reloc
   	/* relative fix: store addend plus offset at dest location */
   	add	x0, x0, x9							/*x0 += x9, x9是运行地址和链接地址之间的偏移量*/
   	add	x4, x4, x9							/*x4 += x9*/
   	str	x4, [x0]							/*x4的值保存在x0的地址处*/

相信不光是只有我一个人到这里看懵逼了，X1寄存器保存的是类似于Type或者类型的东西，应该能猜出来。X4寄存器中的数据和X2中的地址有什么联系？
这里不得不提到重定位的两种类型了, 输入man elf

Relocation entries (Rel & Rela)
	Relocation  is the process of connecting symbolic references with symbolic definitions.  
	Relocatable files must have information that describes how to modify their section contents, 
	thus allowing executable and shared object files to hold the right information for a process's program image.
	Relocation entries are these data.
    
    Relocation structures that do not need an addend:
    
       typedef struct {
           Elf32_Addr r_offset;
           uint32_t   r_info;
       } Elf32_Rel;
    
       typedef struct {
           Elf64_Addr r_offset;
           uint64_t   r_info;
       } Elf64_Rel;
    
    Relocation structures that need an addend:
    
       typedef struct {
           Elf32_Addr r_offset;
           uint32_t   r_info;
           int32_t    r_addend;
       } Elf32_Rela;
    
       typedef struct {
           Elf64_Addr r_offset;
           uint64_t   r_info;
           int64_t    r_addend;
       } Elf64_Rela;

毫无疑问：对于AARCH64 平台，重定位数据的格式是下面这种格式，X0和X1分别是r_offset和r_info， X4为r_addend

   typedef struct {
      Elf64_Addr r_offset;
      uint64_t   r_info;
      int64_t    r_addend;
   } Elf64_Rela;

r_offset
This member gives the location at which to apply the relocation action. For a relocatable file, the value is the byte offset from the beginning of the section to the storage unit affected by the relocation. For an executable file or shared object, the value is the virtual address of the storage unit affected by the relocation.

对于一个可重定位的文件，值时相对于Section起始地址的偏移，对于一个可执行文件或者lib.so

r_info

This member gives both the symbol table index with respect to which the relocation must be made and the type of relocation to apply. Relocation types are processor-specific. When the text refers to a relocation entry’s relocation type or symbol table index, it means the result of applying ELF[32|64]_R_TYPE or ELF[32|64]_R_SYM, respectively, to the entry’s r_info member.

r_addend
This member specifies a constant addend used to compute the value to be stored into the relocatable field.
该成员指定了一个常量加数，用于计算要存储到可重定位字段中的值
看到这里，还是云里雾里的，还是没搞懂r_addend和r_info的关系，那我们在uboot中增加如下测试代码

    void rely_test_function(void)
    {
    	printf("relocate test function\n");
    }
    
    void * rely_test_function_addr = rely_test_function;
    unsigned int test_val = 0xa55a5aa5;
    
    void rely_test(void)
    {
    	test_val = 0xffffffff;
    	printf("rely_test_function address %x\n", rely_test_function_addr);
    	printf("rely_test_val %d\n",test_val);
    	rely_test_function();
    }

编译，aarch64-none-elf-objdump -D u-boot.bin > u-boot.dump生成反汇编文件，从文件中提取如下信息

 Disassembly of section .text:
  
  0000000000000000 <__image_copy_start>:
     0:	1400000a 	b	28 <reset>
     4:	d503201f 	nop
  ......
  
  000000000002d82c <rely_test>:
     2d82c:	a9be7bfd 	stp	x29, x30, [sp, #-32]!
     2d830:	12800000 	mov	w0, #0xffffffff								// 赋值的过程，value保存到W0寄存器
     2d834:	910003fd 	mov	x29, sp
     2d838:	f9000bf3 	str	x19, [sp, #16]
     2d83c:	b0000573 	adrp	x19, da000 <tpm1_commands+0x178>
     2d840:	b9090a60 	str	w0, [x19, #2312].                        // h'da000 + o'2312 = h'da908,x19就是test_val的地址
     2d844:	b0000560 	adrp	x0, da000 <tpm1_commands+0x178>	//h'da000 + o'2320 = h'da910,x1就是rely_test_function_addr
     2d848:	f9448801 	ldr	x1, [x0, #2320]
     2d84c:	f00004e0 	adrp	x0, cc000 <net_null_ethaddr+0xda9>
     2d850:	91363000 	add	x0, x0, #0xd8c
     2d854:	9401ae66 	bl	991ec <printf>
     2d858:	b9490a61 	ldr	w1, [x19, #2312]
     2d85c:	f00004e0 	adrp	x0, cc000 <net_null_ethaddr+0xda9>
     2d860:	9136ac00 	add	x0, x0, #0xdab
     2d864:	9401ae62 	bl	991ec <printf>
     2d868:	f9400bf3 	ldr	x19, [sp, #16]
     2d86c:	a8c27bfd 	ldp	x29, x30, [sp], #32
     2d870:	17fffeec 	b	2d420 <rely_test_function>
   
  00000000000da908 <test_val>:											// da908初始值0xa55a5aa5就是我们一开始赋的初值
     da908:	a55a5aa5 	ld1w	{z5.s}, p6/z, [x21, x26, lsl #2]
     da90c:	00000000 	udf	#0
  
  00000000000da910 <rely_test_function_addr>:					// da910地址存储的初始值就是rely_test_function函数的地址
     da910:	0002d420 	.inst	0x0002d420 ; undefined
     da914:	00000000 	udf	#0
  
  000000000002d420 <rely_test_function>:
     2d420:	f00004e0 	adrp	x0, cc000 <net_null_ethaddr+0xda9>
     2d424:	9136f400 	add	x0, x0, #0xdbd
     2d428:	1401af71 	b	991ec <printf>
  
  Disassembly of section .rela.dyn:
  
  00000000000e5728 <__image_copy_end>:
  	......
  
    1037b8:	000da8f0 	.inst	0x000da8f0 ; undefined
    1037bc:	00000000 	udf	#0
    1037c0:	00000403 	udf	#1027
    1037c4:	00000000 	udf	#0
    1037c8:	000d4e62 	.inst	0x000d4e62 ; undefined
    1037cc:	00000000 	udf	#0
    1037d0:	000da910 	.inst	0x000da910 ; undefined		// r_offset,这个地址上的值0xda910
    1037d4:	00000000 	udf	#0
    1037d8:	00000403 	udf	#1027							// r_info, 是不是和上面的x1对上了，为什么会cmp x1, #1027
    1037dc:	00000000 	udf	#0
    1037e0:	0002d420 	.inst	0x0002d420 ; undefined		// r_addend, 这个地址的值0x2d420
    1037e4:	00000000 	udf	#0
    1037e8:	000da918 	.inst	0x000da918 ; undefined
    1037ec:	00000000 	udf	#0
    1037f0:	00000403 	udf	#1027
    1037f4:	00000000 	udf	#0
    1037f8:	0002d984 	.inst	0x0002d984 ; undefined
    1037fc:	00000000 	udf	#0
    103800:	000da920 	.inst	0x000da920 ; undefined
    103804:	00000000 	udf	#0

看到这里大概明白了，对于全局变量、全局指针、函数存储在text段, 而当链接地址和运行地址有差异后，代码需要搬移到运行地址上去，因此，指针的地址需要更新，指针指向的函数地址，也需要更新，在Link Addr和Running Addr不等的情况下，需要更新动态符号表里面的索引值，避免uboot运行过程中，相关变量、指针的引用找不到正确的地址。

所以上面的问题是不是也就迎刃而解了！.rela.dyn段存储的就是一些符号表，遵循GNU的Elf64_Rela的格式，当某些代码、变量、指针被搬移到别的地址后，符号表里面的索引值也需要更新

uboot中的第二次代码重定位

第二次的代码重定位目的是将uboot搬移到高位地址区间运行

#if !defined(CONFIG_XPL_BUILD)
    
    	ldr	x0, [x18, #GD_START_ADDR_SP]			/* x0 <- gd->start_addr_sp */
    	bic	sp, x0, #0xf							/* 16-byte alignment for ABI compliance */
    	ldr	x18, [x18, #GD_NEW_GD]					/* x18 <- gd->new_gd */
    
    	ldr	x0, [x18, #GD_FLAGS]					/* x0 <- gd->flags, 如果跳过重定位的标志位 为1，直接跳过*/
    	tbnz	x0, 11, relocation_return			/* GD_FLG_SKIP_RELOC is bit 11 */
    
    	adr	lr, relocation_return
    #if CONFIG_POSITION_INDEPENDENT
    	/* Add in link-vs-runtime offset */
    	adrp	x0, _start							/* x0 <- Runtime value of _start， 计算出运行地址和链接地址之间的offset 保存在x9*/
    	add	x0, x0, #:lo12:_start
    	ldr	x9, _TEXT_BASE							/* x9 <- Linked value of _start */
    	sub	x9, x9, x0								/* x9 <- Run-vs-link offset */
    	add	lr, lr, x9
    #if defined(CONFIG_SYS_RELOC_GD_ENV_ADDR)
    	ldr	x0, [x18, #GD_ENV_ADDR]					/* x0 <- gd->env_addr ，如果环境变量也需要重定位，那就更新gd中的环境变量地址*/
    	add	x0, x0, x9
    	str	x0, [x18, #GD_ENV_ADDR]
    #endif
    #endif
    	/* Add in link-vs-relocation offset */
    	ldr	x9, [x18, #GD_RELOC_OFF]			/* x9 <- gd->reloc_off ，找出来reloc offset*/
    	add	lr, lr, x9							/* new return address after relocation ，更新lr的地址，代码执行完后的返回*/
    	ldr	x0, [x18, #GD_RELOCADDR]			/* x0 <- gd->relocaddr ，获取到relocaddr， 也就是重定位的目标地址*/
    	b	relocate_code
    
    relocation_return:
    ......
    ~~~

    ~~~c
    ENTRY(relocate_code)
    	stp	x29, x30, [sp, #-32]!					/*创建一个栈帧*/
    	mov	x29, sp
    	str	x0, [sp, #16]
    
    	adrp	x1, __image_copy_start
    	add	x1, x1, :lo12:__image_copy_start
    	subs	x9, x0, x1
    	b.eq	relocate_done						/*x0时函数参数，保存relocaddr，如果运行地址就是目标地址，直接done就好了 */
    	/*
    	 * Don't ldr x1, __image_copy_start here, since if the code is already
    	 * running at an address other than it was linked to, that instruction
    	 * will load the relocated value of __image_copy_start. To
    	 * correctly apply relocations, we need to know the linked value.
    	 *
    	 * Linked &__image_copy_start, which we know was at
    	 * CONFIG_TEXT_BASE, which is stored in _TEXT_BASE, as a non-
    	 * relocated value, since it isn't a symbol reference.
    	 */
    	ldr	x1, _TEXT_BASE						/* x1 代码段的链接地址*/
    	subs	x9, x0, x1						/* x9 链接地址和重定向目标地址的offset */
    
      adrp	x1, __image_copy_start													
    	add	x1, x1, :lo12:__image_copy_start									
    	adrp	x2, __image_copy_end														
    	add	x2, x2, :lo12:__image_copy_end										
    copy_loop:									/* .text段搬移到gd->relocaddr */
    	ldp	x10, x11, [x1], #16																
    	stp	x10, x11, [x0], #16																
    	cmp	x1, x2																						
    	b.lo	copy_loop
    	str	x0, [sp, #24]
    
    	/*
    	 * Fix .rela.dyn relocations
    	 */
    	adrp	x2, __rel_dyn_start															
    	add	x2, x2, :lo12:__rel_dyn_start											
    	adrp	x3, __rel_dyn_end																
    	add	x3, x3, :lo12:__rel_dyn_end											
    fixloop:									/*.rela.dyn里面的符号表也得重定位，和第一次符号表重定位方法一致*/
    	ldp	x0, x1, [x2], #16
    	ldr	x4, [x2], #8
    	and	x1, x1, #0xffffffff
    	cmp	x1, #R_AARCH64_RELATIVE
    	bne	fixnext
    
    	add	x0, x0, x9
    	add	x4, x4, x9
    	str	x4, [x0]
    fixnext:
    	cmp	x2, x3
    	b.lo	fixloop
    
    relocate_done:
    	switch_el x1, 3f, 2f, 1f
    	bl	hang
    3:	mrs	x0, sctlr_el3
    	b	0f
    2:	mrs	x0, sctlr_el2
    	b	0f
    1:	mrs	x0, sctlr_el1
    0:	tbz	w0, #2, 5f							/*检查标志位的bit[2],为0(D-Cache没开)，跳转到Label 5*/
    	tbz	w0, #12, 4f							/*检查标志位的bit[12],为0(I-Cache没开)，跳转到Label 4*/
    	ic	iallu								/*icache flush*/
    	isb	sy
    4:	ldp	x0, x1, [sp, #16]					/*dcache flush*/
    	bl	__asm_flush_dcache_range
    	bl     __asm_flush_l3_dcache			/*L3 Cache Flush*/
    5:	ldp	x29, x30, [sp],#32
    	ret
    ENDPROC(relocate_code)