/*
* linux/arch/arm/boot/compressed/head.S
*
* Copyright (C) 1996-2002 Russell King
* Copyright (C) 2004 Hyok S. Choi (MPU support)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include
/*
* 调试宏
*
* 注意:这些宏必须不包含那些非100%可重定位的代码
* 任何试图这样做的结果是导致程序崩溃
* 当打开调试时请选择以下一个使用
*/
#ifdef DEBUG/* 调试宏-中间层 */
#if defined(CONFIG_DEBUG_ICEDCC)/* 使用内部调试协处理器CP14 */
#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
.macroloadsp, rb, tmp
.endm
.macrowriteb, ch, rb
mcrp14, 0, \ch, c0, c5, 0
.endm
#elif defined(CONFIG_CPU_XSCALE)
.macroloadsp, rb, tmp
.endm
.macrowriteb, ch, rb
mcrp14, 0, \ch, c8, c0, 0
.endm
#else
.macroloadsp, rb, tmp
.endm
.macrowriteb, ch, rb
mcrp14, 0, \ch, c1, c0, 0
.endm
#endif
#else/* 使用串口作为调试通道 */
#include /* 包含构架相关的的调试宏的汇编文件 调试宏-底层 */
.macrowriteb,ch, rb
senduart \ch, \rb
.endm
#if defined(CONFIG_ARCH_SA1100)
.macroloadsp, rb, tmp
mov\rb, #0x80000000@ physical base address
#ifdef CONFIG_DEBUG_LL_SER3
add\rb, \rb, #0x00050000@ Ser3
#else
add\rb, \rb, #0x00010000@ Ser1
#endif
.endm
#elif defined(CONFIG_ARCH_S3C2410)
.macro loadsp, rb, tmp
mov\rb, #0x50000000
add\rb, \rb, #0x4000 * CONFIG_S3C_LOWLEVEL_UART_PORT
.endm
#else
.macroloadsp,rb, tmp
addruart \rb, \tmp
.endm
#endif
#endif
#endif/* DEBUG */
/* 调试宏-上层 */
.macrokputc,val/* 打印字符 */
movr0, \val
blputc
.endm
.macrokphex,val,len/* 打印十六进制数 */
movr0, \val
movr1, #\len
blphex
.endm
.macrodebug_reloc_start/* 重定位内核调试宏-开始 */
#ifdef DEBUG
kputc#'\n'
kphexr6, 8/* 处理器 id */
kputc#':'
kphexr7, 8/* 构架 id */
#ifdef CONFIG_CPU_CP15
kputc#':'
mrcp15, 0, r0, c1, c0
kphexr0, 8/* 控制寄存器 */
#endif
kputc#'\n'
kphexr5, 8/* 解压后的内核起始地址 */
kputc#'-'
kphexr9, 8/* 解压后的内核结束地址 */
kputc#'>'
kphexr4, 8/* 内核执行地址 */
kputc#'\n'
#endif
.endm
.macrodebug_reloc_end/* 重定位内核调试宏-结束 */
#ifdef DEBUG
kphexr5, 8/* 内核结束地址 */
kputc#'\n'
movr0, r4
blmemdump/* 打印内核起始处 256 字节 */
#endif
.endm
.section ".start", #alloc, #execinstr
/*
* 清理不同的调用约定
*/
.align
.arm@ 启动总是进入ARM状态
start:
.typestart,#function
.rept7
movr0, r0
.endr
ARM(movr0, r0)
ARM(b1f)
THUMB(adrr12, BSYM(1f))
THUMB(bxr12)
.word0x016f2818@ 用于boot loader的魔数
.wordstart@ 加载/运行zImage的绝对地址(编译时确定)
.word_edata@ zImage结束地址
THUMB(.thumb)
1:movr7, r1@ 保存构架ID到r7(此前由bootloader放入r1)
movr8, r2@ 保存内核启动参数地址到r8(此前由bootloader放入r2)
#ifndef __ARM_ARCH_2__
/*
* 通过Angel调试器启动 - 必须进入 SVC模式且关闭FIQs/IRQs
* (numeric definitions from angel arm.h source).
* 如果进入时在user模式下,我们只需要做这些
*/
mrsr2, cpsr@ 获取当前模式
tstr2, #3@ 判断是否是user模式
bnenot_angel
movr0, #0x17@ angel_SWIreason_EnterSVC
ARM(swi0x123456)@ angel_SWI_ARM
THUMB(svc0xab)@ angel_SWI_THUMB
not_angel:
mrsr2, cpsr@ 关闭中断
orrr2, r2, #0xc0@ 以保护调试器的运作
msrcpsr_c, r2
#else
teqppc, #0x0c000003@ 关闭中断(此外bootloader已设置模式为SVC)
#endif
/*
* 注意一些缓存的刷新和其他事务可能需要在这里完成
* - is there an Angel SWI call for this?
*/
/*
* 一些构架的特定代码可以在这里被连接器插入,
* 但是不应使用 r7(保存构架ID), r8(保存内核启动参数地址), and r9.
*/
.text
/*
* 此处确定解压后的内核映像的绝对地址(物理地址),保存于r4
* 由于配置的不同可能有的结果
* (1)定义了CONFIG_AUTO_ZRELADDR
* ZRELADDR是已解压内核最终存放的物理地址
* 如果AUTO_ZRELADDR被选择了, 这个地址将会在运行是确定:
* 将当pc值和0xf8000000做与操作,
* 并加上TEXT_OFFSET(内核最终存放的物理地址与内存起始的偏移)
* 这里假定zImage被放在内存开始的128MB内
* (2)没有定义CONFIG_AUTO_ZRELADDR
* 直接使用zreladdr(此值位于arch/arm/mach-xxx/Makefile.boot文件确定)
*/
#ifdef CONFIG_AUTO_ZRELADDR
@ 确定内核映像地址
movr4, pc
andr4, r4, #0xf8000000
addr4, r4, #TEXT_OFFSET
#else
ldrr4, =zreladdr
#endif
blcache_on/* 开启缓存(以及MMU) */
restart:adrr0, LC0
ldmiar0, {r1, r2, r3, r6, r10, r11, r12}
ldrsp, [r0, #28]
/*
* 我们可能运行在一个与编译时定义的不同地址上,
* 所以我们必须修正变量指针
*/
subr0, r0, r1@ 计算偏移量
addr6, r6, r0@ 重新计算_edata
addr10, r10, r0@ 重新获得压缩后的内核大小数据位置
/*
*内核编译系统将解压后的内核大小数据
*以小端格式
*附加在压缩数据的后面(其实是“gzip -f -9”命令的结果)
* 下面代码的作用是将解压后的内核大小数据正确地放入r9中(避免了大小端问题)
*/
ldrbr9, [r10, #0]
ldrblr, [r10, #1]
orrr9, r9, lr, lsl #8
ldrblr, [r10, #2]
ldrbr10, [r10, #3]
orrr9, r9, lr, lsl #16
orrr9, r9, r10, lsl #24
/*
* 下面代码的作用是将正确的当前执行映像的结束地址放入r10
*/
#ifndef CONFIG_ZBOOT_ROM
/* malloc 获取的内存空间位于重定向的栈指针之上 (64k max) */
addsp, sp, r0
addr10, sp, #0x10000
#else
/*
* 如果定义了 ZBOOT_ROM, bss/stack 是非可重定位的,
* 但有些人依然可以将其放在RAM中运行,
* 这时我们可以参考 _edata.
*/
movr10, r6
#endif
/*
* 检测我们是否会发生自我覆盖的问题
* r4 = 解压后的内核起始地址(最终执行位置)
* r9 = 解压后内核的大小
* r10 = 当前执行映像的结束地址, 包含了 bss/stack/malloc 空间(假设是非XIP执行的)
* 我们的基本需求是:
* (若最终执行位置r4在当前映像之后)r4 - 16k 页目录 >= r10 -> OK
* (若最终执行位置r4在当前映像之前)r4 + 解压后的内核大小 <= 当前位置 (pc) -> OK
* 如果上面的条件不满足,就会自我覆盖,必须先搬运当前映像
*/
addr10, r10, #16384
cmpr4, r10 @ 假设最终执行位置r4在当前映像之后
bhswont_overwrite
addr10, r4, r9 @ 假设最终执行位置r4在当前映像之前
ARM(cmpr10, pc)@ r10 = 解压后的内核结束地址
THUMB(movlr, pc)
THUMB(cmpr10, lr)
blswont_overwrite
/*
*将当前的映像重定向到解压后的内核之后(会发生自我覆盖时才执行,否则就被跳过)
* r6 = _edata(已校正)
* r10 = 解压后的内核结束地址
*因为我们要把当前映像向后移动, 所以我们必须由后往前复制代码,
*以防原数据和目标数据的重叠
*/
/*
* 将解压后的内核结束地址r10扩展(reloc_code_end - restart),
* 并对齐到下一个256B边界。
* 这样避免了当搬运的偏移较小时的自我覆盖
*/
addr10, r10, #((reloc_code_end - restart + 256) & ~255)
bicr10, r10, #255
/* 获取需要搬运的当前映像的起始位置r5,并向下做32B对齐. */
adrr5, restart
bicr5, r5, #31
subr9, r6, r5@ _edata - restart(已向下对齐)= 需要搬运的大小
addr9, r9, #31
bicr9, r9, #31@ 做32B对齐 ,r9 = 需要搬运的大小
addr6, r9, r5@ r6 = 当前映像需要搬运的结束地址
addr9, r9, r10@ r9 = 当前映像搬运的目的地的结束地址
/* 搬运当前执行映像,不包含 bss/stack/malloc 空间*/
1:ldmdbr6!, {r0 - r3, r10 - r12, lr}
cmpr6, r5
stmdbr9!, {r0 - r3, r10 - r12, lr}
bhi1b
/* 保存偏移量,用来修改sp和实现代码跳转 */
subr6, r9, r6
#ifndef CONFIG_ZBOOT_ROM
/* cache_clean_flush 可能会使用栈,所以重定向sp指针 */
addsp, sp, r6
#endif
blcache_clean_flush@ 刷新缓存
/* 通过搬运的偏移和当前的实际 restart 地址来实现代码跳转*/
adrr0, BSYM(restart)
addr0, r0, r6
movpc, r0
/* 在上面的跳转之后,程序又从restart开始。
* 但这次在检查自我覆盖的时候,新的执行位置必然满足
* 最终执行位置r4在当前映像之前,r4 + 压缩后的内核大小 <= 当前位置 (pc)
* 所以必然直接跳到了下面的wont_overwrite执行
*/
wont_overwrite:
/*
* 如果delta(当前映像地址与编译时的地址偏移)为0, 我们运行的地址就是编译时确定的地址.
* r0 = delta
* r2 = BSS start(编译值)
* r3 = BSS end(编译值)
* r4 = 内核最终运行的物理地址
* r7 = 构架ID(bootlodaer传递值)
* r8 = 内核启动参数指针(bootlodaer传递值)
* r11 = GOT start(编译值)
* r12 = GOT end(编译值)
* sp = stack pointer(修正值)
*/
teqr0, #0@测试delta值
beqnot_relocated@如果delta为0,无须对GOT表项和BSS进行重定位
addr11, r11, r0@重定位GOT start
addr12, r12, r0@重定位GOT end
#ifndef CONFIG_ZBOOT_ROM
/*
* 如果内核配置 CONFIG_ZBOOT_ROM = n,
* 我们必须修正BSS段的指针
* 注意:sp已经被修正
*/
addr2, r2, r0@重定位BSS start
addr3, r3, r0@重定位BSS end
/*
* 重定位所有GOT表的入口项
*/
1:ldrr1, [r11, #0]@ 重定位GOT表的入口项
addr1, r1, r0@ 这个修正了 C 引用
strr1, [r11], #4
cmpr11, r12
blo1b
#else
/*
* 重定位所有GOT表的入口项.
* 我们只重定向在(已重定向后)BSS段外的入口
*/
1:ldrr1, [r11, #0]@ 重定位GOT表的入口项
cmpr1, r2@ entry < bss_start ||
cmphsr3, r1@ _end < entry table
addlor1, r1, r0@ 这个修正了 C 引用
strr1, [r11], #4
cmpr11, r12
blo1b
#endif
/*
* 至此当前映像的搬运和调整已经完成
* 可以开始真正的工作的
*/
not_relocated:movr0, #0
1:strr0, [r2], #4@ 清零 bss(初始化BSS段)
strr0, [r2], #4
strr0, [r2], #4
strr0, [r2], #4
cmpr2, r3
blo1b
/*
*C运行时环境已经充分建立.
*设置一些指针就可以解压内核了.
* r4 = 内核最终运行的物理地址
* r7 = 构架ID
* r8 = 内核启动参数指针
*
* 下面对r0~r3的配置是decompress_kernel函数对应参数
* r0 = 解压后的输出位置首地址
* r1 = 可用RAM空间首地址
* r2 = 可用RAM空间结束地址
* r3 = 构架ID
*就是这个decompress_kernel(C函数)输出了"Uncompressing Linux..."
*以及" done, booting the kernel.\n"
*/
movr0, r4
movr1, sp@ malloc 获取的内存空间位于栈指针之上
addr2, sp, #0x10000@ 64k max
movr3, r7
bldecompress_kernel
/*
* decompress_kernel(misc.c)--调用-->
* do_decompress(decompress.c)--调用-->
* decompress(../../../../lib/decompress_xxxx.c根据压缩方式的配置而不同)
*/
/*
* 以下是为跳入解压后的内核,再次做准备(恢复解压前的状态)
*/
blcache_clean_flush
blcache_off@ 数据缓存必须关闭(内核的要求)
movr0, #0@ r0必须为0
movr1, r7@ 恢复构架ID到r1
movr2, r8@ 恢复内核启动参数指针到r2
movpc, r4@ 跳入解压后的内核映像(Image)入口(arch/arm/kernel/head.S)
/*
* 以下是为了确定当前运行时的地址和编译时确定的地址偏差,
* 而将编译时确定的映像数据保存如下,用于检测对比
*/
.align2
.typeLC0, #object
LC0:.wordLC0@ r1
.word__bss_start@ r2
.word_end@ r3
.word_edata@ r6
.wordinput_data_end - 4@ r10 (inflated size location)
.word_got_start@ r11
.word_got_end@ ip
.word.L_user_stack_end@ sp
.sizeLC0, . - LC0
#ifdef CONFIG_ARCH_RPC
.globlparams
params:ldrr0, =0x10000100@ params_phys for RPC
movpc, lr
.ltorg
.align
#endif
/*
* 开启缓存.
*我们必须创建页表(并开启MMU)才可以开启数据和指令缓存。
*我们把页表(节描述符)放在内核执行地址前16k(0x4000)的空间中,
*且我们希望没人会去用这段地址空间.
*如果我们使用了,可能会出问题的!
*
* 进入时,
* r4 = 内核最终运行的物理地址
* r7 = 构架ID
* r8 = 内核启动参数指针
* 退出时,
* r0, r1, r2, r3, r9, r10, r12 被修改
* 此例程必须保护:
* r4, r7, r8
*/
.align5
cache_on:movr3, #8@ 调用cache_on 函数
bcall_cache_fn
/*
* Initialize the highest priority protection region, PR7
* to cover all 32bit address and cacheable and bufferable.
*/
__armv4_mpu_cache_on:
movr0, #0x3f@ 4G, the whole
mcrp15, 0, r0, c6, c7, 0@ PR7 Area Setting
mcr p15, 0, r0, c6, c7, 1
movr0, #0x80@ PR7
mcrp15, 0, r0, c2, c0, 0@ D-cache on
mcrp15, 0, r0, c2, c0, 1@ I-cache on
mcrp15, 0, r0, c3, c0, 0@ write-buffer on
movr0, #0xc000
mcrp15, 0, r0, c5, c0, 1@ I-access permission
mcrp15, 0, r0, c5, c0, 0@ D-access permission
movr0, #0
mcrp15, 0, r0, c7, c10, 4@ drain write buffer
mcrp15, 0, r0, c7, c5, 0@ flush(inval) I-Cache
mcrp15, 0, r0, c7, c6, 0@ flush(inval) D-Cache
mrcp15, 0, r0, c1, c0, 0@ read control reg
@ ...I .... ..D. WC.M
orrr0, r0, #0x002d@ .... .... ..1. 11.1
orrr0, r0, #0x1000@ ...1 .... .... ....
mcrp15, 0, r0, c1, c0, 0@ write control reg
movr0, #0
mcrp15, 0, r0, c7, c5, 0@ flush(inval) I-Cache
mcrp15, 0, r0, c7, c6, 0@ flush(inval) D-Cache
movpc, lr
__armv3_mpu_cache_on:
movr0, #0x3f@ 4G, the whole
mcrp15, 0, r0, c6, c7, 0@ PR7 Area Setting
movr0, #0x80@ PR7
mcrp15, 0, r0, c2, c0, 0@ cache on
mcrp15, 0, r0, c3, c0, 0@ write-buffer on
movr0, #0xc000
mcrp15, 0, r0, c5, c0, 0@ access permission
movr0, #0
mcrp15, 0, r0, c7, c0, 0@ invalidate whole cache v3
/*
* ?? ARMv3 MMU does not allow reading the control register,
* does this really work on ARMv3 MPU?
*/
mrcp15, 0, r0, c1, c0, 0@ read control reg
@ .... .... .... WC.M
orrr0, r0, #0x000d@ .... .... .... 11.1
/* ?? this overwrites the value constructed above? */
movr0, #0
mcrp15, 0, r0, c1, c0, 0@ write control reg
/* ?? invalidate for the second time? */
mcrp15, 0, r0, c7, c0, 0@ invalidate whole cache v3
movpc, lr
/*
* 初始化MMU页表
* 内核最终运行的物理地址向下16K的空间
* 存放可以寻址4G空间节描述符
* (16KB/4B=4K个描述符,每个描述符映射1MB空间,4K*1MB = 4GB)
* 进入时,
* r4 = 内核最终运行的物理地址
* r7 = 构架ID
* r8 = 内核启动参数指针
* 退出时,
* r0, r1, r2, r3, r9, r10 被修改
* 此例程必须保护:
* r4, r7, r8
*/
__setup_mmu:subr3, r4, #16384@ 页目录大小为16K
bicr3, r3, #0xff@ 页目录指针向下对齐
bicr3, r3, #0x3f00@ 对齐方式-16KB
/*
* 对于这个对齐,是MMU硬件的要求
* 转换表基址寄存器(CP15的寄存器2)保存着第一级转换表基址的物理地址。
* 只有bits[31:14]有效,bits[13:0]应该是零(SBZ)。
* 所以第一级表必须16KB对齐。
*/
/*
*初始化页表, 仅针对RAM(最大到256MB)开启
* 缓存(cacheable)和缓冲(bufferable)位
* r3 = 页目录基址(内核最终运行的物理地址向下16K的位置)
*/
movr0, r3@ 页目录指针给r0
movr9, r0, lsr #18
movr9, r9, lsl #18@ 通过移位清零低18bit,得到RAM基地址(推测值,r9)
addr10, r9, #0x10000000@ 加一个合理的RAM大小(猜测值) = RAM结束地址(猜测值,r10)
movr1, #0x12
orrr1, r1, #3 << 10@ 初始化节描述符r1 = 0b110000010010(完全访问:0域:XN:节)
addr2, r3, #16384@ r2 = 内核最终运行的物理地址(可能)
1:cmpr1, r9@ if virt > start of RAM(针对RAM开启缓存和缓冲)
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
orrhsr1, r1, #0x08@ 设置 cacheable
#else
orrhsr1, r1, #0x0c@ 设置 cacheable, bufferable
#endif
cmpr1, r10@ if virt > end of RAM
bichsr1, r1, #0x0c@ 清除 cacheable, bufferable
strr1, [r0], #4@ 设置节描述符-1:1 映射(虚拟地址 == 物理地址)
addr1, r1, #1048576@ r1 + 1MB(每节管理的地址长度)下一个节描述符
teqr0, r2
bne1b
/*
* 如果我们在flash中运行, 那么我们一定要为我们当前的代码开启缓存。
* 我们映射2MB的代码,
* 所以对于多达1MB压缩的内核没有映射重叠的问题??
* 如果我们在RAM中运行, 那么我们只需要完成上面的工作即可,下面重复了.
*/
movr1, #0x1e
orrr1, r1, #3 << 10@ 初始化节描述符r1 = 0b110000011110(完全访问:0域:XN:cacheable:bufferable:节)
movr2, pc
movr2, r2, lsr #20@ 当前执行地址的节基址
orrr1, r1, r2, lsl #20@ 生成节描述符
addr0, r3, r2, lsl #2@ 获得页目录中相应的入口
strr1, [r0], #4@ 设置节描述符-1:1 映射(虚拟地址 == 物理地址)
addr1, r1, #1048576@ r1 + 1MB(每节管理的地址长度)下一个节描述符
strr1, [r0]@ 设置节描述符(只做2MB映射)
movpc, lr
ENDPROC(__setup_mmu)
__arm926ejs_mmu_cache_on:
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
movr0, #4@ put dcache in WT mode
mcrp15, 7, r0, c15, c0, 0
#endif
__armv4_mmu_cache_on:
movr12, lr
#ifdef CONFIG_MMU
bl__setup_mmu
movr0, #0
mcrp15, 0, r0, c7, c10, 4@ drain write buffer
mcrp15, 0, r0, c8, c7, 0@ flush I,D TLBs
mrcp15, 0, r0, c1, c0, 0@ read control reg
orrr0, r0, #0x5000@ I-cache enable, RR cache replacement
orrr0, r0, #0x0030
#ifdef CONFIG_CPU_ENDIAN_BE8
orrr0, r0, #1 << 25@ big-endian page tables
#endif
bl__common_mmu_cache_on
movr0, #0
mcrp15, 0, r0, c8, c7, 0@ flush I,D TLBs
#endif
movpc, r12
__armv7_mmu_cache_on:
movr12, lr@保存lr到r12
#ifdef CONFIG_MMU
mrcp15, 0, r11, c0, c1, 4@ 读取CP15的ID_MMFR0(内存模块特性)寄存器
tstr11, #0xf@ 测试VMSA(虚拟内存系统构架)A8 = 0x3
blne__setup_mmu@ 如果VMSA不是0xf,就进入mmu页表初始化(节模式)
movr0, #0
mcrp15, 0, r0, c7, c10, 4@ 数据内存屏障(保证上面的写操作完成才继续)
tstr11, #0xf@ 测试VMSA(虚拟内存系统构架)A8 = 0x3
mcrnep15, 0, r0, c8, c7, 0@ flush I,D TLBs缓存
#endif
mrcp15, 0, r0, c1, c0, 0@ 读系统控制寄存器
orrr0, r0, #0x5000@ I-cache 使能, RR cache replacement
orrr0, r0, #0x003c@ write buffer
#ifdef CONFIG_MMU
#ifdef CONFIG_CPU_ENDIAN_BE8
orrr0, r0, #1 << 25@ 大端模式页表
#endif
orrner0, r0, #1@ 设置MMU 开启位
movner1, #-1
mcrnep15, 0, r3, c2, c0, 0@ 载入页表基址到TTBR0
mcrnep15, 0, r1, c3, c0, 0@ 载入域访问控制数据到DACR(所有域都是Manager,所以XN会被忽略)
#endif
mcrp15, 0, r0, c1, c0, 0@ 写系统控制寄存器
mrcp15, 0, r0, c1, c0, 0@ 回读系统控制寄存器
movr0, #0
mcrp15, 0, r0, c7, c5, 4@ 指令同步屏障(确保上面指令完成才返回)
movpc, r12@ 此处返回(此时MMU已启用,RAM缓存已开启)
__fa526_cache_on:
movr12, lr
bl__setup_mmu
movr0, #0
mcrp15, 0, r0, c7, c7, 0@ Invalidate whole cache
mcrp15, 0, r0, c7, c10, 4@ drain write buffer
mcrp15, 0, r0, c8, c7, 0@ flush UTLB
mrcp15, 0, r0, c1, c0, 0@ read control reg
orrr0, r0, #0x1000@ I-cache enable
bl__common_mmu_cache_on
movr0, #0
mcrp15, 0, r0, c8, c7, 0@ flush UTLB
movpc, r12
__arm6_mmu_cache_on:
movr12, lr
bl__setup_mmu
movr0, #0
mcrp15, 0, r0, c7, c0, 0@ invalidate whole cache v3
mcrp15, 0, r0, c5, c0, 0@ invalidate whole TLB v3
movr0, #0x30
bl__common_mmu_cache_on
movr0, #0
mcrp15, 0, r0, c5, c0, 0@ invalidate whole TLB v3
movpc, r12
__common_mmu_cache_on:
#ifndef CONFIG_THUMB2_KERNEL
#ifndef DEBUG
orrr0, r0, #0x000d@ Write buffer, mmu
#endif
movr1, #-1
mcrp15, 0, r3, c2, c0, 0@ load page table pointer
mcrp15, 0, r1, c3, c0, 0@ load domain access control
b1f
.align5@ cache line aligned
1:mcrp15, 0, r0, c1, c0, 0@ load control register
mrcp15, 0, r0, c1, c0, 0@ and read it back to
subpc, lr, r0, lsr #32@ properly flush pipeline
#endif
#define PROC_ENTRY_SIZE (4*5)
/*
* 这里是为不同的处理器提供遵循可重定向缓存支持的函数
* 这是一个通用的为 定位入口 和 跳入一个(从块起始处到)特定偏移的指令 的钩子函数。
* 请注意这是一个位置无关代码。
*
* r1 = 被修改
* r2 = 被修改
* r3 = 相对每个入口的功能函数位置偏移(on:#08|off:#12|flush:#16)
* r9 = 被修改
* r12 = 被修改
*/
call_cache_fn:adrr12, proc_types
#ifdef CONFIG_CPU_CP15
mrcp15, 0, r9, c0, c0@ 动态获取处理器ID
#else
ldrr9, =CONFIG_PROCESSOR_ID@ 使用预编译的处理器ID
#endif
1:ldrr1, [r12, #0]@ 获取ID值
ldrr2, [r12, #4]@ 获取对应的掩码
eorr1, r1, r9@ (real ^ match) 检测是否匹配
tstr1, r2@ & mask 将检测结果做掩码
ARM(addeqpc, r12, r3) @ 如果匹配就调用缓存函数
THUMB(addeqr12, r3)
THUMB(moveqpc, r12) @ call cache function
addr12, r12, #PROC_ENTRY_SIZE@ 如果不匹配就跳过这个入口,进入下个测试
b1b
/*
* 缓存操作表. 这些是最基本的:
* - CPU ID 匹配
* - CPU ID 掩码
* - 'cache on' 方法代码
* - 'cache off' 方法代码
* - 'cache flush' 方法代码
*
* 我们通过这个公式匹配入口: ((real_id ^ match) & mask) == 0
*
* 写通式缓存一般只需要 'on' 和 'off' 方法
* 回写式缓存必须有 flush 方法定义
*
*/
.align2
.typeproc_types,#object
proc_types:
.word0x41560600@ ARM6/610
.word0xffffffe0
W(b)__arm6_mmu_cache_off@ 可以使用但是较慢
W(b)__arm6_mmu_cache_off
movpc, lr
THUMB(nop)
@b__arm6_mmu_cache_on@ 未测试
@b__arm6_mmu_cache_off
@b__armv3_mmu_cache_flush
.word0x00000000@ old ARM ID
.word0x0000f000
movpc, lr
THUMB(nop)
movpc, lr
THUMB(nop)
movpc, lr
THUMB(nop)
.word0x41007000@ ARM7/710
.word0xfff8fe00
W(b)__arm7_mmu_cache_off
W(b)__arm7_mmu_cache_off
movpc, lr
THUMB(nop)
.word0x41807200@ ARM720T (写通式)
.word0xffffff00
W(b)__armv4_mmu_cache_on
W(b)__armv4_mmu_cache_off
movpc, lr
THUMB(nop)
.word0x41007400@ ARM74x
.word0xff00ff00
W(b)__armv3_mpu_cache_on
W(b)__armv3_mpu_cache_off
W(b)__armv3_mpu_cache_flush
.word0x41009400@ ARM94x
.word0xff00ff00
W(b)__armv4_mpu_cache_on
W(b)__armv4_mpu_cache_off
W(b)__armv4_mpu_cache_flush
.word0x41069260@ ARM926EJ-S (v5TEJ)
.word0xff0ffff0
W(b)__arm926ejs_mmu_cache_on
W(b)__armv4_mmu_cache_off
W(b)__armv5tej_mmu_cache_flush
.word0x00007000@ ARM7 IDs
.word0x0000f000
movpc, lr
THUMB(nop)
movpc, lr
THUMB(nop)
movpc, lr
THUMB(nop)
@ 以下使用新的 ID 系统.
.word0x4401a100@ sa110 / sa1100
.word0xffffffe0
W(b)__armv4_mmu_cache_on
W(b)__armv4_mmu_cache_off
W(b)__armv4_mmu_cache_flush
.word0x6901b110@ sa1110
.word0xfffffff0
W(b)__armv4_mmu_cache_on
W(b)__armv4_mmu_cache_off
W(b)__armv4_mmu_cache_flush
.word0x56056900
.word0xffffff00@ PXA9xx
W(b)__armv4_mmu_cache_on
W(b)__armv4_mmu_cache_off
W(b)__armv4_mmu_cache_flush
.word0x56158000@ PXA168
.word0xfffff000
W(b)__armv4_mmu_cache_on
W(b)__armv4_mmu_cache_off
W(b)__armv5tej_mmu_cache_flush
.word0x56050000@ Feroceon
.word0xff0f0000
W(b)__armv4_mmu_cache_on
W(b)__armv4_mmu_cache_off
W(b)__armv5tej_mmu_cache_flush
#ifdef CONFIG_CPU_FEROCEON_OLD_ID
/* this conflicts with the standard ARMv5TE entry */
.long0x41009260@ Old Feroceon
.long0xff00fff0
b__armv4_mmu_cache_on
b__armv4_mmu_cache_off
b__armv5tej_mmu_cache_flush
#endif
.word0x66015261@ FA526
.word0xff01fff1
W(b)__fa526_cache_on
W(b)__armv4_mmu_cache_off
W(b)__fa526_cache_flush
@ 这些匹配构架ID
.word0x00020000@ ARMv4T
.word0x000f0000
W(b)__armv4_mmu_cache_on
W(b)__armv4_mmu_cache_off
W(b)__armv4_mmu_cache_flush
.word0x00050000@ ARMv5TE
.word0x000f0000
W(b)__armv4_mmu_cache_on
W(b)__armv4_mmu_cache_off
W(b)__armv4_mmu_cache_flush
.word0x00060000@ ARMv5TEJ
.word0x000f0000
W(b)__armv4_mmu_cache_on
W(b)__armv4_mmu_cache_off
W(b)__armv5tej_mmu_cache_flush
.word0x0007b000@ ARMv6
.word0x000ff000
W(b)__armv4_mmu_cache_on
W(b)__armv4_mmu_cache_off
W(b)__armv6_mmu_cache_flush
.word0x000f0000@ new CPU Id
.word0x000f0000
W(b)__armv7_mmu_cache_on
W(b)__armv7_mmu_cache_off
W(b)__armv7_mmu_cache_flush
.word0@ 未识别类型
.word0
movpc, lr
THUMB(nop)
movpc, lr
THUMB(nop)
movpc, lr
THUMB(nop)
.sizeproc_types, . - proc_types
/*
* 如果你获得了一个 "非常量的表达式".如果汇编器从这行返回" 申明"错误
* 请检查下你是否偶尔在应该使用“W(b)”的地方写了"b"指令
* 这是一个缓存方法跳转表的对齐检查机制
* 在写汇编的时候可以借鉴
*/
.if (. - proc_types) % PROC_ENTRY_SIZE != 0
.error "The size of one or more proc_types entries is wrong."
.endif
/*
* 关闭缓存和MMU. ARMv3不支持控制寄存器的读取,
* 但ARMv4支持.
*
* 在退出时,
* r0, r1, r2, r3, r9, r12 被篡改
* 这个例程必须保护:
* r4, r7, r8
*/
.align5
cache_off:movr3, #12@ 缓存关闭函数
bcall_cache_fn
__armv4_mpu_cache_off:
mrcp15, 0, r0, c1, c0
bicr0, r0, #0x000d
mcrp15, 0, r0, c1, c0@ turn MPU and cache off
movr0, #0
mcrp15, 0, r0, c7, c10, 4@ drain write buffer
mcrp15, 0, r0, c7, c6, 0@ flush D-Cache
mcrp15, 0, r0, c7, c5, 0@ flush I-Cache
movpc, lr
__armv3_mpu_cache_off:
mrcp15, 0, r0, c1, c0
bicr0, r0, #0x000d
mcrp15, 0, r0, c1, c0, 0@ turn MPU and cache off
movr0, #0
mcrp15, 0, r0, c7, c0, 0@ invalidate whole cache v3
movpc, lr
__armv4_mmu_cache_off:
#ifdef CONFIG_MMU
mrcp15, 0, r0, c1, c0
bicr0, r0, #0x000d
mcrp15, 0, r0, c1, c0@ turn MMU and cache off
movr0, #0
mcrp15, 0, r0, c7, c7@ invalidate whole cache v4
mcrp15, 0, r0, c8, c7@ invalidate whole TLB v4
#endif
movpc, lr
__armv7_mmu_cache_off:
mrcp15, 0, r0, c1, c0@ 读取系统控制寄存器SCTLR
#ifdef CONFIG_MMU
bicr0, r0, #0x000d@ 清零MMU和cache使能位
#else
bicr0, r0, #0x000c@ 清零cache使能位
#endif
mcrp15, 0, r0, c1, c0@ 关闭MMU和cache
movr12, lr@ 保存lr到r12
bl__armv7_mmu_cache_flush
movr0, #0
#ifdef CONFIG_MMU
mcrp15, 0, r0, c8, c7, 0@ 废止整个TLB
#endif
mcrp15, 0, r0, c7, c5, 6@ 废止BTC
mcrp15, 0, r0, c7, c10, 4@ 数据同步屏障
mcrp15, 0, r0, c7, c5, 4@ 指令同步屏障(确保上面指令完成才返回)
movpc, r12
__arm6_mmu_cache_off:
movr0, #0x00000030@ ARM6 control reg.
b__armv3_mmu_cache_off
__arm7_mmu_cache_off:
movr0, #0x00000070@ ARM7 control reg.
b__armv3_mmu_cache_off
__armv3_mmu_cache_off:
mcrp15, 0, r0, c1, c0, 0@ turn MMU and cache off
movr0, #0
mcrp15, 0, r0, c7, c0, 0@ invalidate whole cache v3
mcrp15, 0, r0, c5, c0, 0@ invalidate whole TLB v3
movpc, lr
/*
* 清空和flush缓存以保持一致性
*
* 退出时,
* r1, r2, r3, r9, r10, r11, r12 被篡改
* 这个例程必须保护:
* r4, r6, r7, r8
*/
.align5
cache_clean_flush:
movr3, #16
bcall_cache_fn
__armv4_mpu_cache_flush:
movr2, #1
movr3, #0
mcrp15, 0, ip, c7, c6, 0@ invalidate D cache
movr1, #7 << 5@ 8 segments
1:orrr3, r1, #63 << 26@ 64 entries
2:mcrp15, 0, r3, c7, c14, 2@ clean & invalidate D index
subsr3, r3, #1 << 26
bcs2b@ entries 63 to 0
subs r1, r1, #1 << 5
bcs1b@ segments 7 to 0
teqr2, #0
mcrnep15, 0, ip, c7, c5, 0@ invalidate I cache
mcrp15, 0, ip, c7, c10, 4@ drain WB
movpc, lr
__fa526_cache_flush:
movr1, #0
mcrp15, 0, r1, c7, c14, 0@ clean and invalidate D cache
mcrp15, 0, r1, c7, c5, 0@ flush I cache
mcrp15, 0, r1, c7, c10, 4@ drain WB
movpc, lr
__armv6_mmu_cache_flush:
movr1, #0
mcrp15, 0, r1, c7, c14, 0@ clean+invalidate D
mcrp15, 0, r1, c7, c5, 0@ invalidate I+BTB
mcrp15, 0, r1, c7, c15, 0@ clean+invalidate unified
mcrp15, 0, r1, c7, c10, 4@ drain WB
movpc, lr
__armv7_mmu_cache_flush:
mrcp15, 0, r10, c0, c1, 5@ read ID_MMFR1
tstr10, #0xf << 16@ hierarchical cache (ARMv7)
movr10, #0
beqhierarchical
mcrp15, 0, r10, c7, c14, 0@ clean+invalidate D
biflush
hierarchical:
mcrp15, 0, r10, c7, c10, 5@ DMB
stmfdsp!, {r0-r7, r9-r11}
mrcp15, 1, r0, c0, c0, 1@ read clidr
andsr3, r0, #0x7000000@ extract loc from clidr
movr3, r3, lsr #23@ left align loc bit field
beqfinished@ if loc is 0, then no need to clean
movr10, #0@ start clean at cache level 0
loop1:
addr2, r10, r10, lsr #1@ work out 3x current cache level
movr1, r0, lsr r2@ extract cache type bits from clidr
andr1, r1, #7@ mask of the bits for current cache only
cmpr1, #2@ see what cache we have at this level
bltskip@ skip if no cache, or just i-cache
mcrp15, 2, r10, c0, c0, 0@ select current cache level in cssr
mcrp15, 0, r10, c7, c5, 4@ isb to sych the new cssr&csidr
mrcp15, 1, r1, c0, c0, 0@ read the new csidr
andr2, r1, #7@ extract the length of the cache lines
addr2, r2, #4@ add 4 (line length offset)
ldrr4, =0x3ff
andsr4, r4, r1, lsr #3@ find maximum number on the way size
clzr5, r4@ find bit position of way size increment
ldrr7, =0x7fff
andsr7, r7, r1, lsr #13@ extract max number of the index size
loop2:
movr9, r4@ create working copy of max way size
loop3:
ARM(orrr11, r10, r9, lsl r5) @ factor way and cache number into r11
ARM(orrr11, r11, r7, lsl r2) @ factor index number into r11
THUMB(lslr6, r9, r5)
THUMB(orrr11, r10, r6) @ factor way and cache number into r11
THUMB(lslr6, r7, r2)
THUMB(orrr11, r11, r6) @ factor index number into r11
mcrp15, 0, r11, c7, c14, 2@ clean & invalidate by set/way
subsr9, r9, #1@ decrement the way
bgeloop3
subsr7, r7, #1@ decrement the index
bgeloop2
skip:
addr10, r10, #2@ increment cache number
cmpr3, r10
bgtloop1
finished:
ldmfdsp!, {r0-r7, r9-r11}
movr10, #0@ swith back to cache level 0
mcrp15, 2, r10, c0, c0, 0@ select current cache level in cssr
iflush:
mcrp15, 0, r10, c7, c10, 4@ DSB
mcrp15, 0, r10, c7, c5, 0@ invalidate I+BTB
mcrp15, 0, r10, c7, c10, 4@ DSB
mcrp15, 0, r10, c7, c5, 4@ ISB
movpc, lr
__armv5tej_mmu_cache_flush:
1:mrcp15, 0, r15, c7, c14, 3@ test,clean,invalidate D cache
bne1b
mcrp15, 0, r0, c7, c5, 0@ flush I cache
mcrp15, 0, r0, c7, c10, 4@ drain WB
movpc, lr
__armv4_mmu_cache_flush:
movr2, #64*1024@ default: 32K dcache size (*2)
movr11, #32@ default: 32 byte line size
mrcp15, 0, r3, c0, c0, 1@ read cache type
teqr3, r9@ cache ID register present?
beqno_cache_id
movr1, r3, lsr #18
andr1, r1, #7
movr2, #1024
movr2, r2, lsl r1@ base dcache size *2
tstr3, #1 << 14@ test M bit
addner2, r2, r2, lsr #1@ +1/2 size if M == 1
movr3, r3, lsr #12
andr3, r3, #3
movr11, #8
movr11, r11, lsl r3@ cache line size in bytes
no_cache_id:
movr1, pc
bicr1, r1, #63@ align to longest cache line
addr2, r1, r2
1:
ARM(ldrr3, [r1], r11) @ s/w flush D cache
THUMB(ldr r3, [r1]) @ s/w flush D cache
THUMB(add r1, r1, r11)
teqr1, r2
bne1b
mcrp15, 0, r1, c7, c5, 0@ flush I cache
mcrp15, 0, r1, c7, c6, 0@ flush D cache
mcrp15, 0, r1, c7, c10, 4@ drain WB
movpc, lr
__armv3_mmu_cache_flush:
__armv3_mpu_cache_flush:
movr1, #0
mcrp15, 0, r1, c7, c0, 0@ invalidate whole cache v3
movpc, lr
/*
* Various debugging routines for printing hex characters and
* memory, which again must be relocatable.
*/
#ifdef DEBUG
.align2
.typephexbuf,#object
phexbuf:.space12
.sizephexbuf, . - phexbuf
@ phex corrupts {r0, r1, r2, r3}
phex:adrr3, phexbuf
movr2, #0
strbr2, [r3, r1]
1:subsr1, r1, #1
movmir0, r3
bmiputs
andr2, r0, #15
movr0, r0, lsr #4
cmpr2, #10
addger2, r2, #7
addr2, r2, #'0'
strbr2, [r3, r1]
b1b
@ puts corrupts {r0, r1, r2, r3}
puts:loadspr3, r1
1:ldrbr2, [r0], #1
teqr2, #0
moveqpc, lr
2:writebr2, r3
movr1, #0x00020000
3:subsr1, r1, #1
bne3b
teqr2, #'\n'
moveqr2, #'\r'
beq2b
teqr0, #0
bne1b
movpc, lr
@ putc corrupts {r0, r1, r2, r3}
putc:
movr2, r0
movr0, #0
loadspr3, r1
b2b
@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
memdump:movr12, r0
movr10, lr
movr11, #0
2:movr0, r11, lsl #2
addr0, r0, r12
movr1, #8
blphex
movr0, #':'
blputc
1:movr0, #' '
blputc
ldrr0, [r12, r11, lsl #2]
movr1, #8
blphex
andr0, r11, #7
teqr0, #3
moveqr0, #' '
bleqputc
andr0, r11, #7
addr11, r11, #1
teqr0, #7
bne1b
movr0, #'\n'
blputc
cmpr11, #64
blt2b
movpc, r10
#endif
.ltorg
reloc_code_end:
.align
.section ".stack", "aw", %nobits
.L_user_stack:.space4096
.L_user_stack_end: