最全IDA Python 使用总结_idapython,今年最新整理的《高频Python面试题集合》

学好 Python 不论是就业还是做副业赚钱都不错,但要学会 Python 还是要有一个学习规划。最后大家分享一份全套的 Python 学习资料,给那些想学习 Python 的小伙伴们一点帮助!

一、Python所有方向的学习路线

Python所有方向路线就是把Python常用的技术点做整理,形成各个领域的知识点汇总,它的用处就在于,你可以按照上面的知识点去找对应的学习资源,保证自己学得较为全面。

二、学习软件

工欲善其事必先利其器。学习Python常用的开发软件都在这里了,给大家节省了很多时间。

三、全套PDF电子书

书籍的好处就在于权威和体系健全,刚开始学习的时候你可以只看视频或者听某个人讲课,但等你学完之后,你觉得你掌握了,这时候建议还是得去看一下书籍,看权威技术书籍也是每个程序员必经之路。

四、入门学习视频

我们在看视频学习的时候,不能光动眼动脑不动手,比较科学的学习方法是在理解之后运用它们,这时候练手项目就很适合了。

五、实战案例

光学理论是没用的,要学会跟着一起敲,要动手实操,才能将自己的所学运用到实际当中去,这时候可以搞点实战案例来学习。

六、面试资料

我们学习Python必然是为了找到高薪的工作,下面这些面试题是来自阿里、腾讯、字节等一线互联网大厂最新的面试资料,并且有阿里大佬给出了权威的解答,刷完这一套面试资料相信大家都能找到满意的工作。

网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。

需要这份系统化学习资料的朋友,可以戳这里获取

一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!

        new_code_ea += len(new_code)

在完成代码去混淆之后需要对代码进行重定位,重定位的时候需要注意 jmp 指令长度的变化。



ea = new_code_start
while ea < new_code_ea:
    idc.create_insn(ea)
    mnem = idc.print_insn_mnem(ea)

    if mnem == 'call':
        call_target, need_handle = reloc.get(get_operand_value(ea, 0))
        assert need_handle == False
        ida_bytes.patch_bytes(ea, asm('call 0x%x' % (call_target), ea))
    elif mnem[0] == 'j' and idc.get_operand_type(ea, 0) != idc.o_reg:
        jcc_target, need_handle = reloc.get(get_operand_value(ea, 0))
        assert need_handle == False
        ida_bytes.patch_bytes(ea, asm('%s 0x%x' % (mnem, jcc_target), ea).ljust(idc.get_item_size(ea), b'\x90'))
    elif mnem == 'pushf':
        ida_bytes.patch_bytes(ea, b'\x90' \* 9)
        ea += 9
        continue
    ea += get_item_size(ea)

最后去混淆后的 switch 不能被 ida 正常识别出来,具体原因是前面获取返回地址 eip 的函数被 patch 成了 `mov reg, xxx` 指令,导致其与编译器默认编译出的汇编不同(程序开启了 PIE,直接访问跳转表的地址 ida 不能正确识别),因此需要将这里的代码重新 patch 回去。


同时为了不影响原本程序中的数据,这里我将修复的跳转表放到了其他位置。另外还有两个字符串全局变量也移动到了正确位置。



new_jmp_table = (0xA6000 - 0x2D54, 0xA6000)

# 移动并修复跳转表
for eax in range(0x20):
    jmp_target = (ida_bytes.get_dword(jmp_table[0] + eax \* 4) + jmp_table[1]) & 0xFFFFFFFF
    new_jmp_target, need_handle = reloc.get(jmp_target)
    assert need_handle == False
    ida_bytes.patch_dword(new_jmp_table[0] + eax \* 4, (new_jmp_target - new_jmp_table[1]) & 0xFFFFFFFF)

need_patch_addr = 0x963D7
ida_bytes.patch_bytes(need_patch_addr, asm('call 0x900;add ebx, 0x%x' % (new_jmp_table[1] - (need_patch_addr + 5)), need_patch_addr))  # 修复指令
ida_bytes.patch_bytes(new_jmp_table[1] - 0x2d7a, ida_bytes.get_bytes(jmp_table[1] - 0x2d7a, 0x26))  # 复制字符串到正确位置

最终去混淆脚本如下:



from queue import *
import ida_bytes
from idc import *
import idc
from keystone import *
from capstone import *

asmer = Ks(KS_ARCH_X86, KS_MODE_32)
disasmer = Cs(CS_ARCH_X86, CS_MODE_32)

def disasm(machine_code, addr=0):
l = “”
for i in disasmer.disasm(machine_code, addr):
l += “{:8s} {};\n”.format(i.mnemonic, i.op_str)
return l.strip(‘\n’)

def asm(asm_code, addr=0):
l = b’’
for i in asmer.asm(asm_code, addr)[0]:
l += bytes([i])
return l

def print_asm(ea):
print(disasm(idc.get_bytes(ea, idc.get_item_size(ea)), ea))

class RelocDSU:

def \_\_init\_\_(self):
    self.reloc = {}

def get(self, ea):
    if ea not in self.reloc:
        if idc.print_insn_mnem(ea) == 'jmp' and idc.get_operand_type(ea, 0) != idc.o_reg:
            jmp_ea = idc.get_operand_value(ea, 0)

            if idc.get_segm_name(jmp_ea) == '.got.plt':
                self.reloc[ea] = ea
                return self.reloc[ea], False

            self.reloc[ea], need_handle = self.get(idc.get_operand_value(ea, 0))
            return self.reloc[ea], need_handle
        else:
            self.reloc[ea] = ea
    if self.reloc[ea] != ea: self.reloc[ea] = self.get(self.reloc[ea])[0]
    return self.reloc[ea], idc.get_segm_name(self.reloc[ea]) == '.text'

def merge(self, ea, reloc_ea):
    self.reloc[self.get(ea)[0]] = self.get(reloc_ea)[0]

reloc = RelocDSU()

class Block:
def __init__(self, start_ea, end_ea, imm, reg, call_target):
self.start_ea = start_ea
self.end_ea = end_ea
self.imm = imm
self.reg = reg
self.call_target = call_target

def mov_code(ea, new_code_ea):
return asm(disasm(idc.get_bytes(ea, idc.get_item_size(ea)), ea), new_code_ea)

def get_real_code(block, new_code_ea):
ea = block.call_target
while True:
if idc.print_insn_mnem(ea) == ‘cmp’:
reg = idc.print_operand(ea, 0)
imm = idc.get_operand_value(ea, 1)
if reg == block.reg and imm == block.imm:
ea += idc.get_item_size(ea)
break
ea += idc.get_item_size(ea)

# 在 cmp 判断找到对应位置后会依次执行 jnz,popa,popf 三条指令
assert idc.print_insn_mnem(ea) == 'jnz'
ea += idc.get_item_size(ea)

assert idc.print_insn_mnem(ea) == 'popa'
ea += idc.get_item_size(ea)

assert idc.print_insn_mnem(ea) == 'popf'
ea += idc.get_item_size(ea)

if idc.print_insn_mnem(ea) == 'pushf':  # 第一种特殊情况,实际是 ret 指令。
    return True, asm('ret')

new_code = b''
while True:
    if idc.print_insn_mnem(ea) == 'jmp':  # 第二种特殊情况,跳转过去可能还会有几条实际功能指令。
        jmp_ea = idc.get_operand_value(ea, 0)
        if idc.print_insn_mnem(jmp_ea) == 'pushf':
            break
        ea = jmp_ea
    else:
        code = mov_code(ea, new_code_ea)
        new_code += code
        new_code_ea += len(code)
        ea += get_item_size(ea)
return False, new_code

def get_block(start_ea):
global imm, reg, call_target
mnem_list = [‘pushf’, ‘pusha’, ‘mov’, ‘call’, ‘pop’]
ea = start_ea
for i in range(5):
mnem = idc.print_insn_mnem(ea)
assert mnem == mnem_list[i]
if mnem == ‘mov’:
imm = idc.get_operand_value(ea, 1)
reg = idc.print_operand(ea, 0)
elif mnem == ‘call’:
call_target = idc.get_operand_value(ea, 0)
ea += idc.get_item_size(ea)
return Block(start_ea, ea, imm, reg, call_target)

def handle_one_branch(branch_address, new_code_ea):
new_code = b’’
ea = branch_address
while True:
try:
block = get_block(ea)
is_ret, real_code = get_real_code(block, new_code_ea)
reloc.merge(ea, new_code_ea)
ea = block.end_ea
new_code_ea += len(real_code)
new_code += real_code
if is_ret: break
except:
get_eip_func = {0x900: ‘ebx’, 0x435c: ‘eax’}
if idc.print_insn_mnem(ea) == ‘call’ and get_operand_value(ea, 0) in get_eip_func:
reloc.merge(ea, new_code_ea)
real_code = asm(‘mov %s, 0x%x’ % (get_eip_func[get_operand_value(ea, 0)], ea + 5), new_code_ea)
else:
if idc.print_insn_mnem(ea) == ‘jmp’ and idc.get_operand_type(ea, 0) != idc.o_reg:
reloc.merge(new_code_ea, ea)
else:
reloc.merge(ea, new_code_ea)
real_code = mov_code(ea, new_code_ea)

        new_code += real_code
        if real_code == asm('ret'): break
        new_code_ea += len(real_code)
        if idc.print_insn_mnem(ea) == 'jmp' and idc.get_operand_type(ea, 0) != idc.o_reg:  # jmp reg is a swtich
            jmp_ea = idc.get_operand_value(ea, 0)
            if reloc.get(jmp_ea)[1] == False: break  # 跳回之前的代码说明是个循环
            ea = reloc.get(jmp_ea)[0]
        else:
            ea += get_item_size(ea)
return new_code

def solve():
entry_point = 0x48F4
new_code_start = 0x96150
new_code_ea = new_code_start

jmp_table = (0x892ac, 0x8c000)  # [0x8c000 + (eax>>2) - 0x2d54] + 0x8c000

for _ in range(0x10000): idc.del_items(new_code_ea + _)
ida_bytes.patch_bytes(new_code_ea, 0x10000 \* b'\x90')

func_queue = Queue()
func_queue.put(entry_point)

while not func_queue.empty():
    func_address = func_queue.get()
    if reloc.get(func_address)[1] == False: continue
    reloc.merge(func_address, new_code_ea)
    branch_queue = Queue()
    branch_queue.put(func_address)
    if func_address == 0x4148:  # 特判 0x4148 地址处的函数,读取跳转表。
        assert new_code_ea == 0x963d0
        for eax in range(0x20):
            jmp_target = (ida_bytes.get_dword(jmp_table[0] + eax \* 4) + jmp_table[1]) & 0xFFFFFFFF
            new_jmp_target, need_handle = reloc.get(jmp_target)
            if need_handle: branch_queue.put(jmp_target)

    while not branch_queue.empty():
        branch_address = branch_queue.get()
        new_code = handle_one_branch(branch_address, new_code_ea)
        ida_bytes.patch_bytes(new_code_ea, new_code)

        # 当前 branch 去完混淆之后需要遍历代码找到 call 和 jmp 指令从而找到其他的 function 和 branch 。
        ea = new_code_ea
        while ea < new_code_ea + len(new_code):
            idc.create_insn(ea)
            if idc.print_insn_mnem(ea) == 'call':
                call_target, need_handle = reloc.get(get_operand_value(ea, 0))
                if need_handle: func_queue.put(call_target)
            elif idc.print_insn_mnem(ea)[0] == 'j' and idc.get_operand_type(ea, 0) != idc.o_reg:
                jcc_target, need_handle = reloc.get(get_operand_value(ea, 0))
                if need_handle == True:
                    branch_queue.put(jcc_target)
            ea += get_item_size(ea)
        new_code_ea += len(new_code)

ea = new_code_start
while ea < new_code_ea:
    idc.create_insn(ea)
    mnem = idc.print_insn_mnem(ea)

    if mnem == 'call':
        call_target, need_handle = reloc.get(get_operand_value(ea, 0))
        assert need_handle == False
        ida_bytes.patch_bytes(ea, asm('call 0x%x' % (call_target), ea))
    elif mnem[0] == 'j' and idc.get_operand_type(ea, 0) != idc.o_reg:
        jcc_target, need_handle = reloc.get(get_operand_value(ea, 0))
        assert need_handle == False
        ida_bytes.patch_bytes(ea, asm('%s 0x%x' % (mnem, jcc_target), ea).ljust(idc.get_item_size(ea), b'\x90'))
    elif mnem == 'pushf':
        ida_bytes.patch_bytes(ea, b'\x90' \* 9)
        ea += 9
        continue
    ea += get_item_size(ea)

new_jmp_table = (0xA6000 - 0x2D54, 0xA6000)

# 移动并修复跳转表
for eax in range(0x20):
    jmp_target = (ida_bytes.get_dword(jmp_table[0] + eax \* 4) + jmp_table[1]) & 0xFFFFFFFF
    new_jmp_target, need_handle = reloc.get(jmp_target)
    assert need_handle == False
    ida_bytes.patch_dword(new_jmp_table[0] + eax \* 4, (new_jmp_target - new_jmp_table[1]) & 0xFFFFFFFF)

need_patch_addr = 0x963D7
ida_bytes.patch_bytes(need_patch_addr, asm('call 0x900;add ebx, 0x%x' % (new_jmp_table[1] - (need_patch_addr + 5)), need_patch_addr))  # 修复指令
ida_bytes.patch_bytes(new_jmp_table[1] - 0x2d7a, ida_bytes.get_bytes(jmp_table[1] - 0x2d7a, 0x26))  # 复制字符串到正确位置

for _ in range(0x10000): idc.del_items(new_code_ea + _)
idc.jumpto(new_code_start)
ida_funcs.add_func(new_code_start)

print("finish")

solve()


### 例题:SUSCTF2022 tttree


[附件下载链接]( )


首先将 `0x140010074` ,`0x140017EFA` ,`140018C67` 起始处的数据转换为汇编。


观察汇编,发现很多代码块之间相互跳转,因此先按照 `retn` 划分代码块。通过对代码块的观察,发现这些代码块按照 `call $+5;pop rax`(即 `E8 00 00 00 00 58` ) 的出现次数可以分为三种:


* 出现 0 次:  
 ![在这里插入图片描述](https://img-blog.csdnimg.cn/67406817fb8245f3b205089974dffd72.png#)  
 本质上是 `其它操作` + `retn` 。
* 出现 1 次:  
 ![在这里插入图片描述](https://img-blog.csdnimg.cn/b2851e0bb0ed448aa3cc52d42c3f9524.png)  
 这种代码块本质为 `其它操作` + `jmp target` ,注意 `其它操作` 中可能包含 branch 。
* 出现 2 次:  
 ![在这里插入图片描述](https://img-blog.csdnimg.cn/b5c676861f124cf698e82e321921038e.png)

 这个可以看做 2 个出现 1 次的代码块两个拼在一起,其中前面一个代码块去掉 `retn` 。执行完前面一个代码块后由于没有 `retn` ,因此 `target1` 留在栈中。执行第 2 个代码块跳转到 `target2` 执行 ,在 `target2` 代码块返回时会返回到 `target1` 。因此这种代码块本质上相当于 `其它操作` + `call target2` 且下一个要执行的代码块为 `target1` 。


我们定义代码块 `Block` 几个关键信息:


* `start_addr`:代码块的起始地址。
* `asm_list`:代码块的有效汇编,由于汇编指令可能包含 `[rip + xxx]` ,因此需要记录汇编指令的地址以便后续修正。
* `direct_next`:执行完此代码块后接下来要执行的代码块地址。
* `branch_list`:代码块中的所有条件跳转语句跳到的地址。
* `call_target`:代码块调用函数地址。



class Block:
def __init__(self, start_ea, asm_list, direct_next, branch_list, call_target):
self.start_ea = start_ea
self.asm_list = asm_list
self.direct_next = direct_next
self.branch_list = branch_list
self.call_target = call_target

def \_\_str\_\_(self):
    return 'start\_ea: 0x%x\ndirect\_next: 0x%x\ncall\_target: 0x%x\nbranch\_list: %s\nasm\_list:\n%s\n' % (
        0 if self.start_ea == None else self.start_ea,
        0 if self.direct_next == None else self.direct_next,
        0 if self.call_target == None else self.call_target,
        str([hex(x) for x in self.branch_list]),
        str('\n'.join([hex(addr) + ' ' + asm for addr, asm in self.asm_list]))
    )

`get_block` 函数可以获取给定地址处的代码块并提取相关信息。代码块中可能有 `push xxx;pop xxx;` 这样的无意义指令,可以通过栈模拟来去除。



def get_block(start_ea):
ea = start_ea
stack = []
asm_list = []
branch_list = []
call_target = None
direct_next = None

while True:
    idc.create_insn(ea)
    mnem = idc.print_insn_mnem(ea)

    # 处理混淆中跳转的情况
    if mnem == 'pushfq':
        ea += idc.get_item_size(ea)

        assert idc.get_bytes(ea, idc.get_item_size(ea)) == b'\xE8\x00\x00\x00\x00'
        ea += idc.get_item_size(ea)
        jmp_base = ea

        assert idc.print_insn_mnem(ea) == 'pop' and idc.get_operand_type(ea, 0) == o_reg
        reg = idc.print_operand(ea, 0)
        ea += idc.get_item_size(ea)

        assert idc.print_insn_mnem(ea) == 'add' and idc.print_operand(ea, 0) == reg
        assert idc.get_operand_type(ea, 1) == o_imm

        jmp_target = (jmp_base + idc.get_operand_value(ea, 1)) & 0xFFFFFFFFFFFFFFFF
        ea += idc.get_item_size(ea)

        assert idc.get_bytes(ea, idc.get_item_size(ea)) == asm('mov [rsp + 0x10], %s' % reg, ea)
        ea += idc.get_item_size(ea)

        assert idc.print_insn_mnem(ea) == 'popfq'
        ea += idc.get_item_size(ea)

        assert idc.print_insn_mnem(ea) == 'pop' and idc.print_operand(ea, 0) == reg
        assert len(stack) != 0 and stack[-1][0] == 'push' and stack[-1][1] == reg
        stack.pop()
        asm_list.pop()

        assert len(stack) != 0 and stack[-1][0] == 'push' and stack[-1][1] == reg
        stack.pop()
        asm_list.pop()

        ea += idc.get_item_size(ea)

        if idc.print_insn_mnem(ea) == 'retn':
            if direct_next == None:
                direct_next = jmp_target
            elif call_target == None:
                call_target = jmp_target
                asm_list.append((0, 'call 0x%x' % (call_target)))
            else:
                print("🤔🤔🤔🤔🤔🤔")
                assert False
            break
        else:
            assert call_target == None and direct_next == None
            direct_next = jmp_target
            continue

    if mnem == 'push':
        stack.append((mnem, idc.print_operand(ea, 0)))
    elif mnem == 'pop':
        if len(stack) != 0 and stack[-1][0] == 'push' and stack[-1][1] == idc.print_operand(ea, 0):
            stack.pop()
            asm_list.pop()
            ea += idc.get_item_size(ea)
            continue
        else:
            stack.clear()
    else:
        stack.clear()

    asm_list.append((ea, disasm(idc.get_bytes(ea, idc.get_item_size(ea)), ea)))

    if mnem == 'retn': break
    if mnem[0] == 'j' and mnem != 'jmp' and idc.get_operand_type(ea, 0) != o_reg:
        branch_list.append(idc.get_operand_value(ea, 0))

    if mnem == 'jmp':
        if idc.get_segm_name(idc.get_operand_value(ea, 0)) not in ['.text', '.aaa']:
            break
        else:
            ea = idc.get_operand_value(ea, 0)
    else:
        ea += idc.get_item_size(ea)

return Block(start_ea, asm_list, direct_next, branch_list, call_target)

能够获取代码块信息之后就可以 bfs 函数以及函数中的所有分支,提取出汇编代码并写入 `newcode` 段。这里需要注意以下几点:


* 涉及 rip 的汇编指令不能只是简单把指令中的 `rip` 替换为对应的具体数值,因为有的指令立即数的长度被限制在 4 字节,直接替换成数值会溢出。一个比较好的解决方法是将 `rip` 替换为 `rip + (指令原本地址 - 指令当前地址)` 。这样借助 rip 寄存器扩大访问范围并且代码移动的距离不会超过 0x100000000 因此可以保证正确性。
* 如果 `block.direct_next` 对应的代码已经被去混淆了需要加上一条 jmp 指令跳转到已经去混淆的代码。
* 有的汇编指令 keystone 不支持汇编,比如 `bnd ret` ,需要特判。



while not func_queue.empty():
    func_address = func_queue.get()
    if reloc.get(func_address)[1] == False: continue
    branch_queue = Queue()
    branch_queue.put(func_address)

    while not branch_queue.empty():
        branch_address = branch_queue.get()
        ea = branch_address

        while True:
            block = get_block(ea)
            reloc.merge(ea, new_code_ea)

            for addr, insn in block.asm_list:
                insn = insn.replace('rip', 'rip - 0x%x' % (new_code_ea - addr))
                if insn == 'bnd ret ;':
                    code = b'\xF2\xC3'
                else:
                    code = asm(insn, new_code_ea)
                ida_bytes.patch_bytes(new_code_ea, code)
                if addr != 0: reloc.merge(addr, new_code_ea)
                new_code_ea += len(code)

            if block.call_target != None:
                call_target, need_handle = reloc.get(block.call_target)
                if need_handle: func_queue.put(call_target)

            for branch_address in block.branch_list:
                jcc_target, need_handle = reloc.get(branch_address)
                if need_handle: branch_queue.put(jcc_target)

            if block.direct_next == None: break

            next_target, need_handle = reloc.get(block.direct_next)
            if need_handle == False:
                code = asm('jmp 0x%x' % (next_target), new_code_ea)
                ida_bytes.patch_bytes(new_code_ea, code)
                new_code_ea += len(code)
                break
            else:
                ea = block.direct_next

最后对代码进行重定位,需要注意的是代码块中的有效指令中也可能有 call 指令,这里 call 调用的是一个类似 plt 表的结构,会直接跳转到导入表中的函数地址表指向的函数,需要特判这种情况。



ea = new_code_start
while ea < new_code_ea:
    assert idc.create_insn(ea) != 0
    mnem = idc.print_insn_mnem(ea)

    if mnem == 'call':
        call_target, need_handle = reloc.get(get_operand_value(ea, 0))
        if need_handle == True:
            if idc.print_insn_mnem(call_target) == 'jmp' and idc.get_segm_name(idc.get_operand_value(call_target, 0)) == '.idata':
                ea += get_item_size(ea)
                continue
            else:
                assert False
        ida_bytes.patch_bytes(ea, asm('call 0x%x' % (call_target), ea).ljust(idc.get_item_size(ea), b'\x90'))
    elif mnem[0] == 'j' and idc.get_operand_type(ea, 0) != idc.o_reg:
        jcc_target, need_handle = reloc.get(get_operand_value(ea, 0))
        assert need_handle == False
        ida_bytes.patch_bytes(ea, asm('%s 0x%x' % (mnem, jcc_target), ea).ljust(idc.get_item_size(ea), b'\x90'))

    ea += get_item_size(ea)

最后完整代码:



from queue import *
from idc import *
import idc
from keystone import *
from capstone import *

asmer = Ks(KS_ARCH_X86, KS_MODE_64)
disasmer = Cs(CS_ARCH_X86, CS_MODE_64)

def disasm(machine_code, addr=0):
l = “”
for i in disasmer.disasm(machine_code, addr):
l += “{:8s} {};\n”.format(i.mnemonic, i.op_str)
return l.strip(‘\n’)

def asm(asm_code, addr=0):
l = b’’
for i in asmer.asm(asm_code, addr)[0]:
l += bytes([i])
return l

class RelocDSU:

def \_\_init\_\_(self):
    self.reloc = {}

def get(self, ea):
    if ea not in self.reloc:
        if idc.print_insn_mnem(ea) == 'jmp' and idc.get_operand_type(ea, 0) != idc.o_reg:
            jmp_ea = idc.get_operand_value(ea, 0)

            if idc.get_segm_name(jmp_ea) == '.idata':
                self.reloc[ea] = ea
                return self.reloc[ea], False

            self.reloc[ea], need_handle = self.get(idc.get_operand_value(ea, 0))
            return self.reloc[ea], need_handle
        else:
            self.reloc[ea] = ea
    if self.reloc[ea] != ea: self.reloc[ea] = self.get(self.reloc[ea])[0]
    return self.reloc[ea], idc.get_segm_name(self.reloc[ea]) in ['.text', '.aaa']

def merge(self, ea, reloc_ea):
    # print((hex(ea), hex(reloc\_ea)))
    self.reloc[self.get(ea)[0]] = self.get(reloc_ea)[0]

reloc = RelocDSU()

class Block:
def __init__(self, start_ea, asm_list, direct_next, branch_list, call_target):
self.start_ea = start_ea
self.asm_list = asm_list
self.direct_next = direct_next
self.branch_list = branch_list
self.call_target = call_target

def \_\_str\_\_(self):
    return 'start\_ea: 0x%x\ndirect\_next: 0x%x\ncall\_target: 0x%x\nbranch\_list: %s\nasm\_list:\n%s\n' % (
        0 if self.start_ea == None else self.start_ea,
        0 if self.direct_next == None else self.direct_next,
        0 if self.call_target == None else self.call_target,
        str([hex(x) for x in self.branch_list]),
        str('\n'.join([hex(addr) + ' ' + asm for addr, asm in self.asm_list]))
    )

def get_block(start_ea):
ea = start_ea
stack = []
asm_list = []
branch_list = []
call_target = None
direct_next = None

while True:
    idc.create_insn(ea)
    mnem = idc.print_insn_mnem(ea)

    # 处理混淆中跳转的情况
    if mnem == 'pushfq':
        ea += idc.get_item_size(ea)

        assert idc.get_bytes(ea, idc.get_item_size(ea)) == b'\xE8\x00\x00\x00\x00'
        ea += idc.get_item_size(ea)
        jmp_base = ea

        assert idc.print_insn_mnem(ea) == 'pop' and idc.get_operand_type(ea, 0) == o_reg
        reg = idc.print_operand(ea, 0)
        ea += idc.get_item_size(ea)

        assert idc.print_insn_mnem(ea) == 'add' and idc.print_operand(ea, 0) == reg
        assert idc.get_operand_type(ea, 1) == o_imm

        jmp_target = (jmp_base + idc.get_operand_value(ea, 1)) & 0xFFFFFFFFFFFFFFFF
        ea += idc.get_item_size(ea)

        assert idc.get_bytes(ea, idc.get_item_size(ea)) == asm('mov [rsp + 0x10], %s' % reg, ea)
        ea += idc.get_item_size(ea)

        assert idc.print_insn_mnem(ea) == 'popfq'
        ea += idc.get_item_size(ea)

        assert idc.print_insn_mnem(ea) == 'pop' and idc.print_operand(ea, 0) == reg
        assert len(stack) != 0 and stack[-1][0] == 'push' and stack[-1][1] == reg
        stack.pop()
        asm_list.pop()

        assert len(stack) != 0 and stack[-1][0] == 'push' and stack[-1][1] == reg
        stack.pop()
        asm_list.pop()

        ea += idc.get_item_size(ea)

        if idc.print_insn_mnem(ea) == 'retn':
            if direct_next == None:
                direct_next = jmp_target
            elif call_target == None:
                call_target = jmp_target
                asm_list.append((0, 'call 0x%x' % (call_target)))
            else:
                print("🤔🤔🤔🤔🤔🤔")
                assert False
            break
        else:
            assert call_target == None and direct_next == None
            direct_next = jmp_target
            continue

    if mnem == 'push':
        stack.append((mnem, idc.print_operand(ea, 0)))
    elif mnem == 'pop':
        if len(stack) != 0 and stack[-1][0] == 'push' and stack[-1][1] == idc.print_operand(ea, 0):
            stack.pop()
            asm_list.pop()
            ea += idc.get_item_size(ea)
            continue
        else:
            stack.clear()
    else:
        stack.clear()

    asm_list.append((ea, disasm(idc.get_bytes(ea, idc.get_item_size(ea)), ea)))

    if mnem == 'retn': break
    if mnem[0] == 'j' and mnem != 'jmp' and idc.get_operand_type(ea, 0) != o_reg:
        branch_list.append(idc.get_operand_value(ea, 0))

    if mnem == 'jmp':
        if idc.get_segm_name(idc.get_operand_value(ea, 0)) not in ['.text', '.aaa']:
            break
        else:
            ea = idc.get_operand_value(ea, 0)
    else:
        ea += idc.get_item_size(ea)

return Block(start_ea, asm_list, direct_next, branch_list, call_target)

entry_point = 0x1400133B7
new_code_start = 0x14001D000

def solve():
for i in range(0x10000):
idc.set_name(new_code_start + i, ‘’)
idc.del_items(new_code_start + i)
ida_bytes.patch_bytes(new_code_start, b’\x90’ * 0x10000)

func_queue = Queue()
func_queue.put(entry_point)
new_code_ea = new_code_start

while not func_queue.empty():
    func_address = func_queue.get()
    if reloc.get(func_address)[1] == False: continue
    branch_queue = Queue()
    branch_queue.put(func_address)

    while not branch_queue.empty():
        branch_address = branch_queue.get()
        ea = branch_address

        while True:
            block = get_block(ea)
            reloc.merge(ea, new_code_ea)

            for addr, insn in block.asm_list:
                insn = insn.replace('rip', 'rip - 0x%x' % (new_code_ea - addr))
                if insn == 'bnd ret ;':
                    code = b'\xF2\xC3'
                else:
                    code = asm(insn, new_code_ea)
                ida_bytes.patch_bytes(new_code_ea, code)
                if addr != 0: reloc.merge(addr, new_code_ea)
                new_code_ea += len(code)

            if block.call_target != None:
                call_target, need_handle = reloc.get(block.call_target)
                if need_handle: func_queue.put(call_target)

            for branch_address in block.branch_list:
                jcc_target, need_handle = reloc.get(branch_address)
                if need_handle: branch_queue.put(jcc_target)

            if block.direct_next == None: break

            next_target, need_handle = reloc.get(block.direct_next)
            if need_handle == False:
                code = asm('jmp 0x%x' % (next_target), new_code_ea)
                ida_bytes.patch_bytes(new_code_ea, code)
                new_code_ea += len(code)
                break
            else:
                ea = block.direct_next

ea = new_code_start
while ea < new_code_ea:
    assert idc.create_insn(ea) != 0
    mnem = idc.print_insn_mnem(ea)

    if mnem == 'call':
        call_target, need_handle = reloc.get(get_operand_value(ea, 0))
        if need_handle == True:
            if idc.print_insn_mnem(call_target) == 'jmp' and idc.get_segm_name(idc.get_operand_value(call_target, 0)) == '.idata':
                ea += get_item_size(ea)
                continue
            else:
                assert False
        ida_bytes.patch_bytes(ea, asm('call 0x%x' % (call_target), ea).ljust(idc.get_item_size(ea), b'\x90'))
    elif mnem[0] == 'j' and idc.get_operand_type(ea, 0) != idc.o_reg:
        jcc_target, need_handle = reloc.get(get_operand_value(ea, 0))
        assert need_handle == False
        ida_bytes.patch_bytes(ea, asm('%s 0x%x' % (mnem, jcc_target), ea).ljust(idc.get_item_size(ea), b'\x90'))

    ea += get_item_size(ea)

for i in range(0x10000): idc.del_items(new_code_start + i)
idc.jumpto(new_code_start)
idc.add_func(new_code_start)

print("finish")

solve()


## 去花指令


花指令目前没有太好的去除办法,但是同一题目中花指令种类和变化都是有限的,也就是说我们可以将题目中所有花指令的类型总结出来,然后分别编写相应的查找和处理规则。


### 例题:看雪CTF2019 圆圈舞DancingCircle


[附件下载链接]( )


用IDA打开DancingCircle,按G输入 `0x401f58` 跳转至核心函数,发现有大量花指令。因此需要借助 ida python 脚本正则表达式匹配去除。  
 分析汇编代码,发现花指令有如下几类:


#### call 花指令


* call + pop  
 例如 0x00401F9B 处的花指令![在这里插入图片描述](https://img-blog.csdnimg.cn/980a9de034224729a7226d206aba6e15.png)  
 另外还有 push eax + call + pop eax + pop eax 类型的。
* call + add esp, 4  
 例如 0x00401F62 处的花指令  
 ![在这里插入图片描述](https://img-blog.csdnimg.cn/3d3ab5e1c4f24bc8b36d6bedcff4e178.png)
* call + add [esp], 6 + retn  
 例如 0x00401FA3 处的花指令  
 ![在这里插入图片描述](https://img-blog.csdnimg.cn/6b206805804d4c6fafad6958943079f9.png)


#### jx + jnx 花指令


例如 0x00402D67 处的花指令  
 ![在这里插入图片描述](https://img-blog.csdnimg.cn/3670c332b3e54cc39b388d9560121fd0.png)  
 这类花指令可以做如下检测:


* 两个跳转指令的第一个字节相差 1 且较小的那个是偶数。
* 前一个跳转的立即数比后一个多 2 。


#### fake jmp 花指令


例如 0x00401FB2 这处花指令:  
 ![在这里插入图片描述](https://img-blog.csdnimg.cn/cec5fa38b64543fea2bda6825663c47f.png)  
 这里有很多跳转,但分析后发现这些跳转都可以忽略。由于这一类花指令比较单一,因此直接匹配特征即可。


#### stx + jx 花指令


例如 0x0040261F 和 0x004026D7 两处花指令:  
 ![在这里插入图片描述](https://img-blog.csdnimg.cn/f307e7fcce59484f872c5355af853232.png)


![在这里插入图片描述](https://img-blog.csdnimg.cn/c827787340ef47da89a8aa0f03cc7f44.png)  
 此类花指令本质是通过设置标志寄存器的值使得满足后面的条件跳转。由于此类指令较少,直接匹配特征即可。



import regex as re
from idc import *
import idc
from keystone import *
from capstone import *

asmer = Ks(KS_ARCH_X86, KS_MODE_32)
disasmer = Cs(CS_ARCH_X86, CS_MODE_32)

def disasm(machine_code, addr=0):
l = “”
for i in disasmer.disasm(machine_code, addr):
l += “{:8s} {};\n”.format(i.mnemonic, i.op_str)
return l.strip(‘\n’)

def asm(asm_code, addr=0):
l = b’’
for i in asmer.asm(asm_code, addr)[0]:
l += bytes([i])
return l

def check_call_to_jmp(call_insn_addr):
call_target = idc.get_operand_value(call_insn_addr, 0)
if call_target not in range(start_ea, end_ea): return None
idc.create_insn(call_target)
if ida_bytes.get_bytes(call_target, idc.get_item_size(call_target)) == asm(‘add esp, 4’, call_target):
return call_target + idc.get_item_size(call_target)
if idc.print_insn_mnem(call_target) == ‘pop’:
return call_target + idc.get_item_size(call_target)
insn = disasm(ida_bytes.get_bytes(call_target, idc.get_item_size(call_target)), call_target)
if ‘[esp],’ in insn and (‘add’ in insn or ‘sub’ in insn) and idc.get_operand_type(call_target, 1) == o_imm:
idc.create_insn(call_target + idc.get_item_size(call_target))
if idc.print_insn_mnem(call_target + idc.get_item_size(call_target)) == ‘retn’:
return (call_insn_addr + 5 + (1 if idc.print_insn_mnem(call_target) == ‘add’ else -1) * idc.get_operand_value(call_target, 1)) & 0xFFFFFFFF
return None

def check_jcc_to_jmp(jcc_insn_addr):
code1 = ida_bytes.get_bytes(jcc_insn_addr, idc.get_item_size(jcc_insn_addr))
next_insn_addr = jcc_insn_addr + idc.get_item_size(jcc_insn_addr)
idc.create_insn(next_insn_addr)
code2 = ida_bytes.get_bytes(next_insn_addr, idc.get_item_size(next_insn_addr))
if abs(code1[0] - code2[0]) == 1 and min(code1[0], code2[0]) % 2 == 0 and idc.get_operand_value(jcc_insn_addr, 0) == idc.get_operand_value(next_insn_addr, 0):
return idc.get_operand_value(jcc_insn_addr, 0)
code = ida_bytes.get_bytes(jcc_insn_addr, 12)

print("bbbbb")
pattern_list = [
    re.compile(rb"(?s)\x7C\x03\xEB\x03.\x74\xFB"),
    re.compile(rb"(?s)\xEB\x07.\xEB\x01.\xEB\x04.\xEB\xF8."),
    re.compile(rb"(?s)\xEB\x01.")
]

for pattern in pattern_list:
    match = re.match(pattern, code)
    if match != None and match.span()[1] != 0:
        return jcc_insn_addr + match.span()[1]

return None

st_mnem_map = {‘clc’: [‘jnb’], ‘stc’: [‘jb’]}

def check_st_to_jmp(st_insn_addr):
st_mnem = idc.print_insn_mnem(st_insn_addr)
next_insn_addr = st_insn_addr + idc.get_item_size(st_insn_addr)
idc.create_insn(next_insn_addr)
if idc.print_insn_mnem(next_insn_addr) in st_mnem_map[st_mnem]:
return idc.get_operand_value(next_insn_addr, 0)
return None

start_ea = 0x401000
end_ea = 0x4B9CD0

一、Python所有方向的学习路线

Python所有方向的技术点做的整理,形成各个领域的知识点汇总,它的用处就在于,你可以按照下面的知识点去找对应的学习资源,保证自己学得较为全面。

img
img

二、Python必备开发工具

工具都帮大家整理好了,安装就可直接上手!img

三、最新Python学习笔记

当我学到一定基础,有自己的理解能力的时候,会去阅读一些前辈整理的书籍或者手写的笔记资料,这些笔记详细记载了他们对一些技术点的理解,这些理解是比较独到,可以学到不一样的思路。

img

四、Python视频合集

观看全面零基础学习视频,看视频学习是最快捷也是最有效果的方式,跟着视频中老师的思路,从基础到深入,还是很容易入门的。

img

五、实战案例

纸上得来终觉浅,要学会跟着视频一起敲,要动手实操,才能将自己的所学运用到实际当中去,这时候可以搞点实战案例来学习。img

六、面试宝典

在这里插入图片描述

在这里插入图片描述

简历模板在这里插入图片描述

网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。

需要这份系统化学习资料的朋友,可以戳这里获取

一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!

  • 17
    点赞
  • 27
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值