参考:https://www.52pojie.cn/thread-586130-1-1.html
本文中的代码示例来源于vgk.sys(1.0.0.3),主要展示调试/简化vm代码的结果。
调试中的相关数据:
1,ida中静态ImageBase=0x140000000
2,WibDbg中动态ImageBase=0x2bf70d00000
从文件中抽出的某个函数的开头部分,代码出现的先后顺序就是其逻辑上的执行顺序。
第一个跳转地址(push rdi retn)计算,未去混淆的代码片段如下:
.stub0:00000001405BC2AD push 2B6CD1BFh
.stub0:00000001405BC2B2 call sub_1405bc2b2
.stub0:00000001405716F3 sub_1405bc2b2 proc near
.stub0:00000001405716F3 var_210 = dword ptr -210h ;局部变量
.stub0:00000001405716F3 arg_0 = qword ptr 8 ;入栈的参数
.stub0:00000001405716F3 push r9
.stub0:00000001405716F5 jmp loc_1404CCEC7
.stub0:00000001405716F5 sub_1405bc2b2 endp
.stub0:00000001404CCEC7 loc_1404CCEC7:
.stub0:00000001404CCEC7 push r11
.stub0:00000001404CCEC9 mov r9d, 70D45E67h
.stub0:00000001404CCECF movsxd r9, r11d
.stub0:00000001404CCED2 jmp loc_1404D640D
.stub0:00000001404D640D loc_1404D640D:
.stub0:00000001404D640D pushfq
.stub0:00000001404D640E push rbp
.stub0:00000001404D640F movsx r11, di
.stub0:00000001404D6413 movzx r11w, r10b
.stub0:00000001404D6418 movzx r11, r9w
.stub0:00000001404D641C push r14
.stub0:00000001404D641E push r13
.stub0:00000001404D6420 movsx r9, ax
.stub0:00000001404D6424 inc r11b
.stub0:00000001404D6427 push r12
.stub0:00000001404D6429 movsx r11d, r12w
.stub0:00000001404D642D push r15
.stub0:00000001404D642F push rbx
.stub0:00000001404D6430 mov r11b, sil
.stub0:00000001404D6433 push rdx
.stub0:00000001404D6434 movsx r9, r12w
.stub0:00000001404D6438 mov r9b, 4Fh ; 'O'
.stub0:00000001404D643B push rcx
.stub0:00000001404D643C not bl
.stub0:00000001404D643E movsx ecx, r8w
.stub0:00000001404D6442 push r8
.stub0:00000001404D6444 movzx cx, dil
.stub0:00000001404D6449 movsxd rbx, r15d
.stub0:00000001404D644C push rax
.stub0:00000001404D644D xchg r8b, r11b
.stub0:00000001404D6450 push rdi
.stub0:00000001404D6451 push r10
.stub0:00000001404D6453 push rsi
.stub0:00000001404D6454 mov r9, 0
.stub0:00000001404D645E movsxd rax, r11d
.stub0:00000001404D6461 movsx ecx, r11w
.stub0:00000001404D6465 mov esi, 520669DDh
.stub0:00000001404D646A push r9
.stub0:00000001404D646C lahf
.stub0:00000001404D646D cdqe
.stub0:00000001404D646F cwde
.stub0:00000001404D6470 mov rsi, [rsp+88h+arg_0]
.stub0:00000001404D6478 movsx r8, dx
.stub0:00000001404D647C cbw
.stub0:00000001404D647E bswap esi
.stub0:00000001404D6480 dec esi
.stub0:00000001404D6482 movsx bx, r10b
.stub0:00000001404D6487 neg esi
.stub0:00000001404D6489 sets r11b
.stub0:00000001404D648D cdqe
.stub0:00000001404D648F bswap di
.stub0:00000001404D6492 inc esi
.stub0:00000001404D6494 lea rsi, [rsi+r9]
.stub0:00000001404D6498 mov rax, 100000000h
.stub0:00000001404D64A2 and r11b, r14b
.stub0:00000001404D64A5 add rsi, rax
.stub0:00000001404D64A8 or bh, dl
.stub0:00000001404D64AA shld r11w, r9w, 82h
.stub0:00000001404D64B0 shl bl, cl
.stub0:00000001404D64B2 mov rbx, rsp
.stub0:00000001404D64B5 sub rsp, 180h
.stub0:00000001404D64BC mov r11w, r8w
.stub0:00000001404D64C0 rcr r11w, cl
.stub0:00000001404D64C4 and rsp, 0FFFFFFFFFFFFFFF0h ;
.stub0:00000001404D64CB rcr di, 74h
.stub0:00000001404D64CF bsr r8, r15
.stub0:00000001404D64D3 rol r11, 0BEh
.stub0:00000001404D64D7
.stub0:00000001404D64D7 loc_1404D64D7:
.stub0:00000001404D64D7 mov r11, rsi
.stub0:00000001404D64DA ror ch, cl
.stub0:00000001404D64DC bt r8d, r14d
.stub0:00000001404D64E0 bt r8d, 9
.stub0:00000001404D64E5 mov r8, 0
.stub0:00000001404D64EF xchg dil, cl
.stub0:00000001404D64F2 movsx ecx, sp
.stub0:00000001404D64F5 sal ecx, 87h
.stub0:00000001404D64F8 sub r11, r8
.stub0:00000001404D64FB bt di, cx
.stub0:00000001404D64FF xor rdi, r13
.stub0:00000001404D6502 sar cl, cl
.stub0:00000001404D6504
.stub0:00000001404D6504 loc_1404D6504:
.stub0:00000001404D6504 lea rdi, loc_1404D6504
.stub0:00000001404D650B sal cl, cl
.stub0:00000001404D650D sub rsi, 4
.stub0:00000001404D6514 dec ch
.stub0:00000001404D6516 mov ecx, [rsi]
.stub0:00000001404D6518 jmp loc_1404A11A6
.stub0:00000001404A11A6 loc_1404A11A6:
.stub0:00000001404A11A6 xor ecx, r11d
.stub0:00000001404A11A9 clc
.stub0:00000001404A11AA stc
.stub0:00000001404A11AB rol ecx, 3
.stub0:00000001404A11AE dec ecx
.stub0:00000001404A11B0 ror ecx, 1
.stub0:00000001404A11B2 jmp loc_140580F7F
.stub0:0000000140580F7F loc_140580F7F:
.stub0:0000000140580F7F inc ecx
.stub0:0000000140580F81 rol ecx, 1
.stub0:0000000140580F83 neg ecx
.stub0:0000000140580F85 dec ecx
.stub0:0000000140580F87 push r11
.stub0:0000000140580F89 sub r11b, 0A5h
.stub0:0000000140580F8D xor [rsp+210h+var_210], ecx
.stub0:0000000140580F90 xadd r11b, r11b
.stub0:0000000140580F94 inc r11b
.stub0:0000000140580F97 xor r11b, 0E1h
.stub0:0000000140580F9B pop r11
.stub0:0000000140580F9D jmp sub_1404ECD69
.stub0:00000001404ECD69 sub_1404ECD69 proc near
.stub0:00000001404ECD69 movsxd rcx, ecx
.stub0:00000001404ECD6C test ecx, eax
.stub0:00000001404ECD6E test sp, 1898h
.stub0:00000001404ECD73 add rdi, rcx
.stub0:00000001404ECD76 push rdi
.stub0:00000001404ECD77 retn
'mvo r9,0',如下图所示:
经过人工简化去掉混淆后,如下:
push r9
push r11
pushfq ;保存标志寄存器数据
push rbp
push r14
push r13
push r12
push r15
push rbx
push rdx
push rcx
push r8
push rax
push rdi
push r10
push rsi
;保存通用寄存器及标志寄存器数据
;开始工作
;下面这条指令很奇怪,
;ida 反汇编显示的是一个被标红的'0'
;WinDBg 显示的是一个有效的地址(但是又不是ImageBase)
;PE文件的静态ImageBase=0x14000000h
;此次动态调试的ImageBase=0x2bf70d00000h
;0x2be30d00000=0x2bf70d00000-0x14000000
mov r9,0 ;mov r9,2be30d00000h
push r9 ;r9=0x2be30d00000h
mov rsi,[rsp] ; [rsp]=[rsp+0x88+arg_0],就是最开始的'0x2b6cd1bf'参数
; 后续的数据计算都是围绕 'rsi' 展开的
;rsi=2b6cd1bfh
bswap esi ;按字节循环移动,esi=0xbfd16c2b
dec esi ;esi=0xbfd16c2a
neg esi ;esi=0x402e93d6
inc esi ;esi=0x402e93d7
lea rsi,[rsi+r9] ;rsi=0x2be30d00000+0x402e93d7=0x2be70fe93d7
mov rax,100000000h
add rsi,rax ;rsi=0x2bf70fe93d7
mov rbx,rsp ;将栈指针保存到rbx ,供下一阶段使用
sub rsp,180h ;开辟栈空间
and rsp,0fffffffffffffff0h ;可能会使 栈指针减去8
mov r11,rsi ;r11=rsi=0x2bf70fe93d7
mov r8,0 ;情况同上面的 'mov r9,0',r8=0x2be30d00000
sub r11,r8 ;r11=0x1402e93d7,
;r11 相当于基于静态的ImageBase得到的地址
loc_1404d6504:
lea rdi,loc_1404d6504 ;取当前指令的地址,
sub rsi,4 ;rsi=0x2bf70fe93d7,
;相当于基于动态加载的ImageBase得到的地址
;rsi=rsi-4=0x2bf70fe93d3
mov ecx,[rsi] ;取加密后的跳转偏移
;ecx=0x3fd04eb4
xor ecx,r11d ;ecx=(0x3fd04eb4)^(0x402e93d7)=0x7ffedd63
rol ecx,3 ;ecx=0xfff6eb1b
dec ecx ;ecx=0xfff6eb1a
ror ecx,1 ;ecx=0x7ffb758d
inc ecx ;ecx=0x7ffb758e
rol ecx,1 ;ecx=0xfff6eb1c
neg ecx ;ecx=0x914e4
dec ecx ;ecx=0x914e3
push r11 ;将数据入栈,r11=0x1402e93d7
xor [rsp+210h+var_210],ecx ;然后在栈中处理数据
;[rsp+210+var_210]=0x140278734
pop r11 ; r11=0x140278734
add rdi,rcx ;静态中的地址;
rdi=0x1404d6504+0x914e3=0x1405679e7
push rdi
retn ;jmp to rdi
加密的跳转偏移,在ida中如下图所示:
在WinDbg中,如下图所示:
最后 ‘push rdi retn ’ 的截图,如下:
第二个 ’push rdi retn' 跳转地址计算 ,未去混淆的代码片段如下:
sub_1405679E7 proc near
.stub0:00000001405679E7 var_8 = dword ptr -8
.stub0:00000001405679E7 mov rdx, [rbx]
.stub0:00000001405679EA sar ebp, 3Dh
.stub0:00000001405679ED movsx eax, dx
.stub0:00000001405679F0 not bpl
.stub0:00000001405679F3 add rbx, 8
.stub0:00000001405679FA sub rsi, 1
.stub0:0000000140567A01 sal rbp, 58h
.stub0:0000000140567A05 cmp r14, r13
.stub0:0000000140567A08 cmc
.stub0:0000000140567A09 movzx eax, byte ptr [rsi]
.stub0:0000000140567A0C xadd bpl, bpl
.stub0:0000000140567A10 test dl, 21h
.stub0:0000000140567A13 xor al, r11b
.stub0:0000000140567A16 sub al, 0FEh
.stub0:0000000140567A18 rcr bp, cl
.stub0:0000000140567A1B rol al, 1
.stub0:0000000140567A1D sal bpl, 0A3h
.stub0:0000000140567A21 dec bpl
.stub0:0000000140567A24 shld rbp, rdx, 1
.stub0:0000000140567A29 not al
.stub0:0000000140567A2B neg al
.stub0:0000000140567A2D shld bp, r11w, 0A7h
.stub0:0000000140567A33 sal bp, 0FAh
.stub0:0000000140567A37 sbb bpl, sil
.stub0:0000000140567A3A xor r11b, al
.stub0:0000000140567A3D btr bp, r10w
.stub0:0000000140567A42 sub bpl, bl
.stub0:0000000140567A45 shr bpl, cl
.stub0:0000000140567A48 mov [rsp+rax+0], rdx
.stub0:0000000140567A4C movzx ebp, r14w
.stub0:0000000140567A50 rcr bp, 3
.stub0:0000000140567A54 sub rsi, 4
.stub0:0000000140567A5B shl ebp, 0Ch
.stub0:0000000140567A5E xadd bp, bp
.stub0:0000000140567A62 mov ebp, [rsi]
.stub0:0000000140567A64 test r13w, 27Ah
.stub0:0000000140567A6A cmc
.stub0:0000000140567A6B stc
.stub0:0000000140567A6C xor ebp, r11d
.stub0:0000000140567A6F test r10b, 81h
.stub0:0000000140567A73 cmp rsp, rbx
.stub0:0000000140567A76 bswap ebp
.stub0:0000000140567A78 clc
.stub0:0000000140567A79 test sil, 33h
.stub0:0000000140567A7D cmp dh, 9Bh
.stub0:0000000140567A80 neg ebp
.stub0:0000000140567A82 clc
.stub0:0000000140567A83 rol ebp, 3
.stub0:0000000140567A86 test r10b, 88h
.stub0:0000000140567A8A bswap ebp
.stub0:0000000140567A8C clc
.stub0:0000000140567A8D cmc
.stub0:0000000140567A8E test sil, bpl
.stub0:0000000140567A91 push r11
.stub0:0000000140567A93 cmp dx, ax
.stub0:0000000140567A96 clc
.stub0:0000000140567A97 xor [rsp+8+var_8], ebp
.stub0:0000000140567A9A pop r11
.stub0:0000000140567A9C test edx, r12d
.stub0:0000000140567A9F movsxd rbp, ebp
.stub0:0000000140567AA2 add rdi, rbp
.stub0:0000000140567AA5 jmp loc_1404B495A
tub0:00000001404B495A loc_1404B495A:
.stub0:00000001404B495A push rdi
.stub0:00000001404B495B retn
经过人工简化去掉混淆后,如下:
mov rdx,[rbx] ;rdx=0x2be30d00000
add rbx,8 ;rbx=0xc4bf4ff530
sub rsi,1 ;rsi=rsi-1=0x2bf70fe93d2
movsz eax,byte ptr [rsi] ;eax=0x85
;r11=0x140278734
xor al,r11b ;al=(0x85)^(0x34)=0xb1
sub al,0feh ;al=0xb3
rol al,1 ;al=0x67
not al ;al=0x98
neg al ;al=0x68
xor r11b,al ;r11b=(0x34)^(0x68)=0x5c
mov [rsp+rax+0],rdx
sub rsi,4 ;rsi=0x2bf70fe93ce
mov ebp,[rsi] ;ebp=0xe0782db0
xor ebp,r11d ;ebp=(0xe0782db0)^(0x4027875c)=0xa05faaec
bswap ebp ;ebp=0xecaa5fa0
neg ebp ;ebp=0x1355a060
rol ebp,3 ;ebp=0x9aad0300
bswap ebp ;ebp=0x3ad9a
push r11 ;r11=0x14027875c
xor [rsp+8+var_8],ebp
pop r11 ;r11=(0x4027875c)^(0x3ad9a)=0x140242ac6
movsxd rbp,ebp
add rdi,rbp ;静态中的地址
;rdi=0x0x1405679e7+0x3ad9a=0x1405a2781
push rdi
retn
第三个 ’push rdi retn' 跳转地址计算,未去混淆的代码片段如下:如下:
sub_1405A2781 proc near
.stub0:00000001405A2781 var_8 = dword ptr -8
.stub0:00000001405A2781 mov rdx, [rbx]
.stub0:00000001405A2784 shld ax, r14w, 0F3h
.stub0:00000001405A278A add rbx, 8
.stub0:00000001405A2791 rcr ebp, 0CBh
.stub0:00000001405A2794 sub rsi, 1
.stub0:00000001405A279B bt rax, r11
.stub0:00000001405A279F btc eax, 0
.stub0:00000001405A27A3 movzx eax, byte ptr [rsi]
.stub0:00000001405A27A6 xor al, r11b
.stub0:00000001405A27A9 bsf bp, r11w
.stub0:00000001405A27AE sar bp, 27h
.stub0:00000001405A27B2 sub al, 0FEh
.stub0:00000001405A27B4 bts bp, r14w
.stub0:00000001405A27B9 not bpl
.stub0:00000001405A27BC rol ebp, 2Bh
.stub0:00000001405A27BF rol al, 1
.stub0:00000001405A27C1 btc bp, r11w
.stub0:00000001405A27C6 or bpl, 0BDh
.stub0:00000001405A27CA btr ebp, esp
.stub0:00000001405A27CD not al
.stub0:00000001405A27CF xor bpl, 0D7h
.stub0:00000001405A27D3 neg al
.stub0:00000001405A27D5 add bpl, cl
.stub0:00000001405A27D8 shrd bp, dx, 0A2h
.stub0:00000001405A27DD inc bp
.stub0:00000001405A27E0 xor r11b, al
.stub0:00000001405A27E3 bsf ebp, ecx
.stub0:00000001405A27E6 movzx bp, al
.stub0:00000001405A27EA and bpl, sil
.stub0:00000001405A27ED mov [rsp+rax+0], rdx
.stub0:00000001405A27F1 sub rsi, 4
.stub0:00000001405A27F8 mov ebp, [rsi]
.stub0:00000001405A27FA xor ebp, r11d
.stub0:00000001405A27FD test ebp, 6C203A32h
.stub0:00000001405A2803 test r10b, r11b
.stub0:00000001405A2806 cmc
.stub0:00000001405A2807 bswap ebp
.stub0:00000001405A2809 neg ebp
.stub0:00000001405A280B rol ebp, 3
.stub0:00000001405A280E stc
.stub0:00000001405A280F bswap ebp
.stub0:00000001405A2811 push r11
.stub0:00000001405A2813 inc r11w
.stub0:00000001405A2817 movsx r11w, r14b
.stub0:00000001405A281C xor [rsp+8+var_8], ebp
.stub0:00000001405A281F pop r11
.stub0:00000001405A2821 cmp r11w, 5A7Fh
.stub0:00000001405A2827 cmp cl, spl
.stub0:00000001405A282A movsxd rbp, ebp
.stub0:00000001405A282D test r9d, r11d
.stub0:00000001405A2830 cmp rdi, 5EC80D81h
.stub0:00000001405A2837 add rdi, rbp
.stub0:00000001405A283A jmp loc_1405B1EF1
loc_1405B1EF1:
.stub0:00000001405B1EF1 push rdi
.stub0:00000001405B1EF2 retn
经过人工简化去掉混淆后,如下:
sub_1405a2781
mov rdx,[rbx] ;rdx=8
add rbx,8 ;rbx=c4bf4ff538h
sub rsi,1 ;rsi=0x2bf70fe93cd
movzx eax,byte ptr [rsi] ;eax=0xb
xor al,r11b ;r11=0x140242ac6
;al=0xcd
sub al,0feh ;al=0xcf
rol al,1 ;al=0x9f
not al ;al=0x60
neg al ;al=0xa0
xor r11b,al ;r11b=(0xc6)^(0xa0)=0x66
mov [rsp+rax+0],rdx ;rsp=0xc4bf4ff3a0 数据转移
;;; 下面开始计算跳转偏移
sub rsi,4 ;rsi=0x2bf70fe92c9
mov ebp,[rsi] ;ebp=0x21c4ac78
xor ebp,r11d ;ebp=(0x21c4ac78)^(0x40242a66)=0x61e0861e
bswap ebp ;ebp=0x1e86e061
neg ebp ;ebp=0xe1791f9f
rol ebp,3 ;ebp=0xbc8fcff
bswap ebp ;ebp=0xfffcc80b
push r11 ;r11=0x140242a66
xor [rsp+8+var_8],ebp
pop r11 ;r11=0x1bfd8e26d
movsxd rbp,ebp
add rdi,rbp ;rdi 的值没有被修改过,
;直接沿用上一阶段的 rdi
;rdi=0x1405a2781+0xfffcc80b=0x14056ef8c
jmp loc_1405b1ef1
loc_1405b1ef1:
push rdi
retn
对比第二次和第三次原始的代码片段,抛开“废”代码,其逻辑(简化后代码)是一样的:
1, 以 'rsi' 为数据源进行操作,提取经过处理的偏移;
2,总共有2个偏移(rax,栈数据偏移;rbp,跳转偏移),一个偏移用来保存[rbx] 中的数据到栈中;一个用来计算下一阶段跳转偏移。
通过上述三个阶段分析可以发现:
1,从最开始的 参数(通过push 入栈)处理,阶段之间数据是一环扣一环的(前一阶段的计算结果直接影响下一阶段)。
2,阶段性的处理逻辑目前是一样的。
从目前的分析结果可以发现:handler5的任务就是通过传入的参数,从数据段中解密出逻辑和数据。被VMProtect 保护的文件,在ida中的整体结构,如下图:
被保护的代码最开始应该在左边的部分,经过VMProtect 处理后,其原本的逻辑和数据就变成了中间这一大段的数据;这些数据有后面的红色部分进行解析。
!!!@@@###$$$%%%^^^&&&***((()))---___===+++
(2021/12/21)
简化规则:
1,(由于该样本中都是跳到绝对地址)cmp,test,cmc等指令没有产生实质性的影响,直接替换
2,产生过影响,但是被后续运算给覆盖了。例如,下图简化前的‘pop rdi'指令前面的'sub dil,58h'和 'shl dil,cl’。
写了代码,对上面的混淆代码进行简化;用nop指令替换冗余的指令,效果对比如下:
简化前:
简化后:
(to be continued!)