前言
这个模块是关于汇编语言编程的,要求设置寄存器rdi的值为0x1337。在x86-64架构的Linux系统中,rdi是第一个参数寄存器。
为了将0x1337这个值放入rdi寄存器,你可以使用以下的汇编代码:
assembly
mov $0x1337, %rdi
这条指令将立即数0x1337移动到rdi寄存器中。
如果你需要将这条指令转换为字节码形式(即机器码),你可以使用任何支持x86-64汇编的汇编器,如GNU Assembler (GAS)。以下是如何使用GAS将上述汇编指令转换为字节码的步骤:
- 创建一个名为set_rdi.s的汇编源文件,其中包含以下内容:
assembly
section .text
global _start
_start:
mov $0x1337, %rdi
; 添加退出代码,以便程序能够正常退出
mov $60, %rax ; 系统调用号 (sys_exit)
xor %rdi, %rdi ; 退出状态码 0
syscall
- 使用GAS将汇编源文件编译为机器码:
bash
nasm -f elf64 set_rdi.s
- 链接生成的目标文件以创建可执行文件:
bash
ld -m elf_x86_64 -o set_rdi set_rdi.o
现在,set_rdi可执行文件包含了将0x1337设置到rdi寄存器的机器码。你可以使用objdump工具来查看这些机器码:
bash
objdump -d ./set_rdi
这将显示反汇编的输出,其中包含了mov $0x1337, %rdi指令的机器码表示。注意,实际的机器码可能会因为不同的系统和编译器选项而有所不同。
1. 寄存器赋值: level 1~level 2
from pwn import *
context.update(arch="amd64")
io = process("/challenge/run")
io.write(asm("""
mov rdi,0x1337 # Write your assembly code here
"""))
io.interactive() # print(io.readallS())
mov rax, 0x1337
mov r12, 0xCAFED00D1337BEEF
mov rsp, 0x31337
2. 加减乘除(模):level 3~level 6
add rdi,0x331337
imul rdi,rsi # rdi = rdi*rsi ; imul 有符号乘法,mul无符号乘法;
# 单操作数 imul xxx / mul xxx ==> rax*xxx,放在rax,作用于有符号乘数结果可能有溢出
add rdi,rdx
mov rax,rdi
# div 只有一个操作数。需要先把被除数放入rax寄存器,然后使用 div 除以 除数寄存器,商保存rax
rax,rdi
div rsi
# rax = rdi/rsi
xor rdx,rdx # 先清空rdx内容
mov rax ,rdi # 运算 rax➗rsi
div rsi
mov rax,rdx # div 运算余数放在 rdx寄存器
3. 通用数据寄存器高低位:level 7 ~ level 8
通用数据寄存器:rax,rbx,rcx,rdx
以rax寄存器为例:
MSB 【high】 LSB 【low】
±---------------------------------------+
| rax | 64 bit
±-------------------±------------------+
** | eax | 32 bit**
** ±--------±--------+**
** | ax | 16 bit**
** ±—±—+**
** | ah | al | 8 bit each**
** ±—±—+**
mov ah, 0x42
仅使用 mov 指令完成模运算
mov rcx,rdi
mov al,cl # 注意mov前后要对等,mov rax,cl 会报错
mov rcx,rsi
mov bx,cx
4. 逻辑运算:level 9 ~ level 11
# reg2的数据表示移动位数【这里可以是立即数,如:shl rdi,24】
shl reg1, reg2 <=> Shift reg1 left by the amount in reg2
shr reg1, reg2 <=> Shift reg1 right by the amount in reg2
rdi = | B7 | B6 | B5 | B4 | B3 | B2 | B1 | B0 |
Set rax to the value of B4
# 将 rdi 先左移 3*8=24位得到:
# | B4 | B3 | B2 | B1 | B0 | 0 | 0 | 0 |
# 然后右移 7*8 = 56 位得到:
# | 0 | 0 | 0 | 0 | 0 | 0 | 0 | B4 |
# 最后赋值给 rax即可
shl rdi,24
shr rdi,56
mov rax ,rdi
and rdi,rsi # 不使用 mov指令,实现:rax = rdi and rsi
xor rax,rax
or rax,rdi
Implement the following logic:
if x is even then # x 是 偶数,y = 1;反之 y=0
y = 1
else
y = 0
where:
x = rdi
y = rax
and rdi,0x1 # 取 rdi最低位:0/1 (偶/奇)
xor rdi,1 # 反转 -----> 1/0
xor rax,rax # 清空 rax
or rax,rdi # 赋值 rax
5. 内存<–>寄存器操作 :level 12~level 14
In x86 we can access the thing at a memory location, called dereferencing, like so:
mov rax, [some_address] <=> Moves the thing at 'some_address' into rax
This also works with things in registers:
mov rax, [rdi] <=> Moves the thing stored at the address of what rdi holds to rax
This works the same for writing:
mov [rax], rdi <=> Moves rdi to the address of what rax holds.
So if rax was 0xdeadbeef, then rdi would get stored at the address 0xdeadbeef:
[0xdeadbeef] = rdi
Note: memory is linear, and in x86, it goes from 0 - 0xffffffffffffffff (yes, huge).
#取内存地址的值赋给rax寄存器:
mov rax,[0x404000] # mov rax, qword ptr [0x404000]
# 将寄存器值写入内存地址:
mov [0x404000],rax # mov qword ptr [0x404000],rax
# 将指定地址内容赋值给 rax,接着该地址内容+0x1337
mov rax,[0x404000]
add rax,0x1337
mov [0x404000],rax
sub rax,0x1337
# [或者借助另外寄存器]:
mov rax,[0x404000]
mov rbx,rax
add rbx,0x1337
mov [0x404000],rbx
6. 内存操作:level 15~level 16
Recall the following:
The breakdown of the names of memory sizes:
Quad Word = 8 Bytes = 64 bits
Double Word = 4 bytes = 32 bits
Word = 2 bytes = 16 bits
Byte = 1 byte = 8 bits
In x86_64, you can access each of these sizes when dereferencing an address, just like using
bigger or smaller register accesses:
mov al, [address] <=> moves the least significant byte from address to rax
mov ax, [address] <=> moves the least significant word from address to rax
mov eax, [address] <=> moves the least significant double word from address to rax
mov rax, [address] <=> moves the full quad word from address to rax
mov al, byte [0x403fff] # mov al, byte ptr [0x404000]
# 如果地址前面加上 byte等,汇编代码地址需要-1,执行时候会自动向后加一位开始赋值。
# 常规:
mov al, [0x404000]
mov bx , [0x404000]
mov ecx ,[0x404000]
mov rdx , [0x404000]
# 加上byte...之后地址需要依此变形
mov al, byte [0x403fff]
mov bx , word [0x403ffe]
mov ecx ,[0x404000]
mov rdx , [0x404000]
mov al, byte [0x403fff]
mov bx , word [0x403ffe]
mov ecx ,dword [0x403ffc]
mov rdx ,qword [0x403ff8]
# 结果
---------------- CODE ----------------
0x400000: mov al, byte ptr [0x404000]
0x400007: mov bx, word ptr [0x404000]
0x40000f: mov ecx, dword ptr [0x404000]
0x400016: mov rdx, qword ptr [0x404000]
--------------------------------------
7. 寄存器地址赋值:level 17~level 18
Set [rdi] = 0xdeadbeef00001337
Set [rsi] = 0xc0ffee0000
mov rax,0xdeadbeef00001337
mov [rdi],rax
mov rax, 0xc0ffee0000
mov [rsi],rax
[0x404190] = 0xd5729
[0x404198] = 0xe37d2
rdi = 0x404190 # task:取出 rdi 与 rdi+8 执向地址数据,求和存入 rsi 指向地址
rsi = 0x4047a8
mov rax, [rdi+8]
mov rbx,[rdi]
add rbx,rax
mov [rsi],rbx
8. 栈调用:level 19~level 21
pop rax
sub rax,rdi
push rax
# 或者
mov rax,[rsp]
sub rax,rdi
mov [rsp],rax
push rdi
push rsi
pop rdi
pop rsi
mov rax, [rsp]
add rax,[rsp+8]
add rax,[rsp+16]
add rax,[rsp+24]
mov rdi,4
div rdi
push rax
---------------- CODE ----------------
0x400000: mov rax, qword ptr [rsp]
0x400004: add rax, qword ptr [rsp + 8]
0x400009: add rax, qword ptr [rsp + 0x10]
0x40000e: add rax, qword ptr [rsp + 0x18]
0x400013: mov rdi, 4
0x40001a: div rdi
0x40001d: push rax
--------------------------------------
# 下面汇编不能得到正确结果,可能栈没有对齐
mov rax,[rsp]
mov rbx,[rsp+8]
mov rcx,[rsp+16]
mov rdx,[rsp+24]
mov rdi,4
add rax,rbx
add rax,rcx
add rax,rdx
div rdi
push rax
---------------- CODE ----------------
0x400000: mov rax, qword ptr [rsp]
0x400004: mov rbx, qword ptr [rsp + 8]
0x400009: mov rcx, qword ptr [rsp + 0x10]
0x40000e: mov rdx, qword ptr [rsp + 0x18]
0x400013: mov rdi, 4
0x40001a: add rax, rbx
0x40001d: add rax, rcx
0x400020: add rax, rdx
0x400023: div rdi
0x400026: push rax
--------------------------------------
然而,还有另一种更有趣的除法方法。
除法使用shr
我们知道,字节中的每一位都是某个数字的 2 次方。
±--------------------------------------------------------------+
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| (2^7) | (2^6) | (2^5) | (2^4) | (2^3) | (2^2) | (2^1) | (2^0) |
±--------------------------------------------------------------+
上面字节的值是 1x(2^7),等于 128。
如果我们向右移动 2 位,我们会得到以下结果。
±--------------------------------------------------------------+
| 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| (2^7) | (2^6) | (2^5) | (2^4) | (2^3) | (2^2) | (2^1) | (2^0) |
±--------------------------------------------------------------+
字节的值现在是 1x(2^5),即 32。因此,我们基本上在不使用指令的情况下将数字除以 4。div
现在,我们只需要对存储的总和做同样的事情来找到平均值。rax
shr rax, 2
接下来,我们必须使用指令将平均值复制到堆栈上。push
push rax
堆栈将如下所示:
±------------------------+
RSP+0x20 | Quad Word A | <------ rbp
±------------------------+
RSP+0x18 | Quad Word B |
±------------------------+
RSP+0x10 | Quad Word C |
±------------------------+
RSP+0x08 | Quad Word D |
±------------------------+
RSP | Average | <------ rsp
±------------------------+
9. 程序跳转指令: level 22 ~ level 24
# jmp 跳转绝对地址需要把地址放入寄存器接上jmp命令。
mov r12,0x403000
jmp r12
---------------- CODE ----------------
0x400071: mov r12, 0x403000
0x400078: jmp r12
--------------------------------------
间接跳转到所在地址后面0x51地址,并在该地址将 rax 赋值为 0x1。
汇编代码文件后缀:.S .asm; 后期编译会进行预处理,.s文件属于中间文件,不进行预处理。
.global _start # 头文件-----
_start: # -----------
.intel_syntax noprefix # -----
_start:
jmp next
.rept 0x51
nop
.endr
next:
mov rax,1
as -o asm.o asm.S && objcopy -O binary --only-section=.text ./asm.o ./asm.bin && cat ./asm.bin | /challenge/run
.global _start
_start:
.intel_syntax noprefix
_start:
jmp next
.rept 0x51
nop
.endr
next:
mov rdi,[rsp] # 注意 mov rdi,rsp 传递的是rsp地址而不是该地址保存的值
mov rax,0x403000 # 因为 rsp作为指针,保存一个地址,地址保存数据
jmp rax
10. 分支循环语句: level 25~level 28
Using the above knowledge, implement the following:
if [x] is 0x7f454c46:
y = [x+4] + [x+8] + [x+12]
else if [x] is 0x00005A4D:
y = [x+4] - [x+8] - [x+12]
else:
y = [x+4] * [x+8] * [x+12]
where:
x = rdi, y = rax.
mov eax,[rdi] # 如果想使用 rax,rbx,rcx。可以试试xor清零
cmp eax,0x7f454c46
je one
nop
mov eax,[rdi]
cmp eax,0x00005A4D
je two
nop # nop貌似不必要
mov eax,[rdi+4]
imul eax,[rdi+8]
imul eax,[rdi+12]
jmp done # 使用je/jg等跳转进入分支语句之后,
# 注意执行完分支语句会按顺序向下执行,注意不要混淆执行流!!!
one: # 在结尾放一个 done分支,每个if分支执行完之后跳转到后面可以避免
mov eax,[rdi+4]
add eax,[rdi+8]
add eax,[rdi+12]
jmp done
two:
mov eax,[rdi+4]
sub eax,[rdi+8]
sub eax,[rdi+12]
done:
and eax,eax
mov ebx,[rdi+4]
mov ecx,[rdi+8]
mov edx,[rdi+12]
mov eax,[rdi]
cmp eax,0x7f454c46
je con1
nop
mov eax,[rdi]
cmp eax,0x00005A4D
je con2
nop
imul ebx,ecx
imul ebx,edx
jmp done
nop
con1:
add ebx,ecx
add ebx,edx
jmp done
nop
con2:
sub ebx,ecx
sub ebx,edx
done:
mov eax,ebx
level 26:基地址跳转,指令优化 (⭐)
In the above example, the jump table could look like:
[0x1337] = address of do_thing_0
[0x1337+0x8] = address of do_thing_1
[0x1337+0x10] = address of do_thing_2
[0x1337+0x18] = address of do_default_thing
Using the jump table, we can greatly reduce the amount of cmps we use.
Now all we need to check is if `number` is greater than 2.
If it is, always do:
jmp [0x1337+0x18]
Otherwise:
jmp [jump_table_address + number * 8]
Using the above knowledge, implement the following logic:
if rdi is 0:
jmp 0x403019
else if rdi is 1:
jmp 0x4030c9
else if rdi is 2:
jmp 0x4031d2
else if rdi is 3:
jmp 0x403287
else:
jmp 0x403378
Please do the above with the following constraints:
Assume rdi will NOT be negative
Use no more than 1 cmp instruction
Use no more than 3 jumps (of any variant)
We will provide you with the number to 'switch' on in rdi.
We will provide you with a jump table base address in rsi.
Here is an example table:
[0x40405b] = 0x403019 (addrs will change)
[0x404063] = 0x4030c9
[0x40406b] = 0x4031d2 # ==
[0x404073] = 0x403287
[0x40407b] = 0x403378 # ==
# rsi 作为基地址0x40405b,[rsi]= 0x403019
mov rax,rdi
and rax,0xfffffffffffffffc # 最低两位全0;作用,清空最低两位数据
# 若 rdi == 0~3,得到0,执行跳转
je nomal # je指令,隐式比较 rax 与 0,若 rdi>=4,不执行跳转
nop
jmp [rsi+32] # >=4: 0x40407b-0x40405b == 0x20
nop
nomal: # 0: 0x40405b-0x40405b == 0x0 == 0x0*8
jmp [rsi+rdi*8] # 1: 0x404073-0x40405b == 0x8 == 0x1*8
nop # 2: 0x40405b-0x40405b == 0x10 == 0x2*8
# 3: 0x404073-0x40405b == 0x18 == 0x3*8
As an example, a for-loop can be used to compute the sum of the numbers 1 to n:
sum = 0
i = 1
while i <= n:
sum += i
i += 1
Please compute the average of n consecutive quad words, where:
rdi = memory address of the 1st quad word
rsi = n (amount to loop for)
rax = average computed
We will now set the following in preparation for your code:
[0x4042f0:0x404498] = {n qwords]}
rdi = 0x4042f0
rsi = 53
xor rax,rax
mov rbx,0
one:
cmp rbx,rsi
jle loop
nop
jmp done
nop
loop:
add rax,[rdi+rbx*0x8]
add rbx,0x1
jmp one
done:
div rsi
As an example, say we had a location in memory with adjacent numbers and we wanted
to get the average of all the numbers until we find one bigger or equal to 0xff:
average = 0
i = 0
while x[i] < 0xff:
average += x[i]
i += 1
average /= i
Using the above knowledge, please perform the following:
Count the consecutive non-zero bytes in a contiguous region of memory, where:
rdi = memory address of the 1st byte
rax = number of consecutive non-zero bytes
Additionally, if rdi = 0, then set rax = 0 (we will check)!
An example test-case, let:
rdi = 0x1000
[0x1000] = 0x41
[0x1001] = 0x42
[0x1002] = 0x43
[0x1003] = 0x00
then: rax = 3 should be set
We will now run multiple tests on your code, here is an example run:
(data) [0x404000] = {10 random bytes},
rdi = 0x404000
xor rax,rax # 校验 rdi 是否为0【可能不必要】
cmp rdi,0x0
je done
nop
mov rbx,0x0
loop:
mov rcx,[rdi+rbx]
cmp rcx,0x0
je done
nop
add rax,0x1
add rbx,0x1
jmp loop
nop
done:
and rax,rax
11. 程序栈调用:level 29~level 30
level 29(⭐)
Functions use the instructions "call" and "ret".
The "call" instruction pushes the memory address of the next instruction onto
the stack and then jumps to the value stored in the first argument.
Let's use the following instructions as an example:
0x1021 mov rax, 0x400000
0x1028 call rax
0x102a mov [rsi], rax
1. call pushes 0x102a, the address of the next instruction, onto the stack.
2. call jumps to 0x400000, the value stored in rax.
The "ret" instruction is the opposite of "call".
ret pops the top value off of the stack and jumps to it.
Let's use the following instructions and stack as an example:
Stack ADDR VALUE
0x103f mov rax, rdx RSP + 0x8 0xdeadbeef
0x1042 ret RSP + 0x0 0x0000102a
Here, ret will jump to 0x102a
Please implement the following logic:
str_lower(src_addr):
i = 0
if src_addr != 0:
while [src_addr] != 0x00:
if [src_addr] <= 0x5a:
[src_addr] = foo([src_addr])
i += 1
src_addr += 1
return i
foo is provided at 0x403000.
foo takes a single argument as a value and returns a value.
All functions (foo and str_lower) must follow the Linux amd64 calling convention (also known as System V AMD64 ABI):
https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI
Therefore, your function str_lower should look for src_addr in rdi and place the function return in rax.
An important note is that src_addr is an address in memory (where the string is located) and [src_addr] refers to the byte that exists at src_addr.
Therefore, the function foo accepts a byte as its first argument and returns a byte.
We will now run multiple tests on your code, here is an example run:
(data) [0x404000] = {10 random bytes},
rdi = 0x404000
mov rax,0
mov rsi,rdi
cmp rsi,0
je done
nop
loop:
mov bl,[rsi]
cmp bl,0
je done
nop
cmp bl,90
ja next
nop
mov dil,bl ;将地址发给rdi,因为foo函数的参数从rdi引用
mov rdx,rax ;调用函数前保存rax的值
mov rcx,0x403000
call rcx
mov [rsi],al
mov rax,rdx ;使用之后再赋回rax的值,当然也可以直接使用其他寄存器,在函数最后返回时赋给rax
add rax,1
next:
add rsi,1
jmp loop
nop
done:
ret
level 30(⭐)
push 0
mov rbp,rsp
mov rax,-1
sub rsi,1
sub rsp,rsi
loop1:
add rax,1
cmp rax,rsi
jg next
nop
mov rcx,0
mov cl,[rdi+rax]
mov r11,rbp
sub r11,rcx
mov dl,[r11]
add dl,1
mov [r11],dl
jmp loop1
nop
next:
mov rax,0
mov rbx,rax
mov rcx,rax
mov ax,-1
loop2:
add ax,1
cmp ax,0xff
jg return
nop
mov r11,rbp
sub r11,rax
mov dl,[r11]
cmp dl,bl
jle loop2
nop
mov bl,dl
mov cl,al
jmp loop2
nop
return:
mov rax,rcx
mov rsp,rbp
pop rbx
ret
参考wp
pwncollege通关笔记:3.Assembly Refresher(从0开始学习pwn) - FreeBuf网络安全行业门户
Assembly Crash Course | Write-ups