方法1
mov eax, 0
mov ebx, 0
mov ecx, 0
mov edx, 0
方法2
xor eax, eax
xor edx, edx
xor ecx, ecx
xor edx, ecx
方法3
hmm...yep, here the stall is partially balanced by the non-change of the source argument (a work/branch avoided by the cpu), so in this specific case, the difference is minimised.
xor eax, eax
xor ecx, ecx
mov ebx, eax
mov edx, ecx
technically the "result" stall is slower (but never enough to put a useless instruction ! even a nop !), there is also possible stall with the source argument (when r+i/r), but not here. so you don't need to change the register in the last line, you will obtain the benefit of a non-changed source (there is no dependency/alteration here) :
xor eax, eax
xor ecx, ecx
mov ebx, eax
mov edx, eax
方法4
include /masm32/include/masm32rt.inc
.data?
Null8 dd 8 dup (?)
.code
start:
mov Null8[12], esp ; save stack pointer in a global variable
mov esp, offset Null8 ; put stack pointer in front of 8 nullwords
popad
mov esp, Null8[12] ; restore stack pointer
getkey
exit ; short form of invoke ExitProcess, 0
end start
方法5
对于AMD的CPU
mov al,99
cpuid
说明:
1、对于Intel Celeron M,ebx,ecx, edx会被置0
2.On Intel 64 processors, CPUID clears the high 32 bits of the RAX/RBX/RCX/RDX registers in all modes.
This was info from: Intel 3-180 Vol. 2A CPUID-CPU Identification.
I think this translate to the high 32 bits of said register are trashed,
over-written with zeros.
方法6
fldz
.xmm
prefetchnta qword ptr es:[esp-8]
push cs
verw word ptr es:[esp]
pop ecx
fistp qword ptr es:[esp-8]
pause
lock cmpxchg8b qword ptr es:[esp-8]
cmovne ecx,eax
cmovnz ebx,ecx
方法7
sub esp,32
mov edi,esp
xor eax,eax
mov ecx,8
rep stosd
popad
方法8
push 32
pop ecx
sub esp, ecx
mov edi, esp
xor eax, eax
rep stosb
popad
方法9
.data
zer dd 0,0,0,0,0,0,0,0, zer
.code
xchg esp, [zer+8*4]
popad
pop esp
00401004 87 25 E4 E4 42 00 xchg esp, dword ptr ds:[42E4E4h]
0040100A 61 popad
0040100B 5C pop esp ; just 8 bytes
测试代码:
Code:
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
include /masm32/include/masm32rt.inc
.686
include /masm32/macros/timers.asm
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
.data
.code
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
start:
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
invoke Sleep, 4000
counter_begin 1000, HIGH_PRIORITY_CLASS
REPEAT 100
xor eax, eax
xor ecx, ecx
mov ebx, eax
mov edx, ecx
ENDM
counter_end
print ustr$(eax),13,10
counter_begin 1000, HIGH_PRIORITY_CLASS
REPEAT 100
xor eax, eax
xor ebx, eax
mov ecx, eax
mov edx, eax
ENDM
counter_end
print ustr$(eax),13,10,13,10
counter_begin 1000, HIGH_PRIORITY_CLASS
REPEAT 100
add eax, 1
add ecx, 1
mov ebx, eax
mov edx, ecx
ENDM
counter_end
print ustr$(eax),13,10
counter_begin 1000, HIGH_PRIORITY_CLASS
REPEAT 100
add eax, 1
xor ebx, eax
mov ecx, eax
mov edx, eax
ENDM
counter_end
print ustr$(eax),13,10,13,10
inkey "Press any key to exit..."
exit
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
end start
Running on my P3, I would expect to see a much larger effect from 100 or more stalls:
206
208
207
205
以上源自MASM32官方论坛的精彩讨论贴:http://www.masm32.com/board/index.php?topic=11138.15