#ifndef __TIMING__INCLUDED__IOSA_4__
#define __TIMING__INCLUDED__IOSA_4__
#include <crtdefs.h>
#include <stdio.h>
#include <windows.h>
#include <mmsystem.h>
#ifdef __cplusplus
extern "C" {
#endif
_CRTIMP void __cdecl _wassert(_In_z_ const wchar_t* _Message, _In_z_ const wchar_t* _File, _In_ unsigned _Line);
#ifdef __cplusplus
}
#endif
#define ENFORCE(_Expression) (void)((!!(_Expression)) || (_wassert(_CRT_WIDE(#_Expression),_CRT_WIDE(__FILE__),__LINE__),0))
static unsigned __int64 UisHnack, CpuFreq, MilliCpuFreq;
__declspec(naked) static unsigned __int64 LoadTSC ( void ) {
__asm rdtsc
__asm ret
}
__declspec(naked) static void TSleep ( unsigned int dwMilliseconds ) {
static unsigned __int64 tStart;
__asm {
rdtsc
mov dword ptr [tStart], eax
mov dword ptr [tStart+4], edx
mov eax, 4[esp]
mov dword ptr [UisHnack+4], 0
mov dword ptr [UisHnack], eax
}
UisHnack *= MilliCpuFreq;
while ( LoadTSC () - tStart <= UisHnack );
__asm ret
}
static unsigned __int64 LoadSpecifyCpuFreq ( unsigned int CpuCoreIndex ) {
unsigned __int64 timeStart64, cpu_fm64, timeToRDTSC, timeToQPF, Q_Error, QsecTCnt, pfq, lpQFCount;
unsigned __int32 OldPriClassFlags, OldPriThreadFlags;
HANDLE CurProcessHandle = GetCurrentProcess();
HANDLE CurThreadHandle = GetCurrentThread ();
OldPriClassFlags = GetPriorityClass ( CurProcessHandle );
OldPriThreadFlags = GetThreadPriority( CurThreadHandle );
ENFORCE ( CpuCoreIndex < 4 );
ENFORCE ( SetThreadAffinityMask ( CurThreadHandle, ( (unsigned int) 1 << CpuCoreIndex ) ) );
ENFORCE ( SetProcessAffinityMask ( CurProcessHandle, ( (unsigned int) 1 << CpuCoreIndex ) ) );
ENFORCE ( QueryPerformanceFrequency ( ( (LARGE_INTEGER*) &pfq) ) );
ENFORCE ( SetPriorityClass ( CurProcessHandle, REALTIME_PRIORITY_CLASS ) );
ENFORCE ( SetThreadPriority( CurThreadHandle, THREAD_PRIORITY_HIGHEST ) );
__asm {
rdtsc
mov ebx, eax /* call rdtsc * 1 */
mov ecx, edx
rdtsc
sbb edx, ecx
sub eax, ebx
mov dword ptr [timeToRDTSC], eax
mov dword ptr [timeToRDTSC+4], edx
rdtsc
mov dword ptr [timeToQPF], eax
mov dword ptr [timeToQPF+4], edx
push offset UisHnack
call dword ptr [QueryPerformanceCounter]
rdtsc
sub eax, dword ptr [timeToQPF]
sbb edx, dword ptr [timeToQPF+4]
mov dword ptr [timeToQPF], eax
mov dword ptr [timeToQPF+4], edx
push offset UisHnack
call dword ptr [QueryPerformanceCounter]
lea eax, QsecTCnt
push eax
call dword ptr [QueryPerformanceCounter]
mov eax, dword ptr [UisHnack] ; low
mov ebx, dword ptr [UisHnack+4] ; high
sub dword ptr [QsecTCnt], eax
sbb dword ptr [QsecTCnt+4], ebx
}
QueryPerformanceCounter((LARGE_INTEGER*)&lpQFCount);
timeStart64 = lpQFCount;
__asm {
rdtsc
mov dword ptr [cpu_fm64], eax
mov dword ptr [cpu_fm64+4], edx
}
while ((Q_Error = (lpQFCount - timeStart64)) <= pfq )
{
QueryPerformanceCounter((LARGE_INTEGER*)&lpQFCount);
}
__asm {
rdtsc
sub eax, dword ptr [cpu_fm64]
sbb edx, dword ptr [cpu_fm64+4]
mov dword ptr [cpu_fm64], eax
mov dword ptr [cpu_fm64+4], edx
}
MilliCpuFreq = ( CpuFreq = cpu_fm64 = cpu_fm64 - timeToQPF - timeToRDTSC - (unsigned __int64)( (double) ( Q_Error - pfq ) / (double) QsecTCnt * ( double ) timeToQPF ) ) / 1000;
ENFORCE ( timeToRDTSC < 800 );
ENFORCE ( SetPriorityClass ( CurProcessHandle, OldPriClassFlags ) );
ENFORCE ( SetThreadPriority( CurThreadHandle, OldPriThreadFlags ) );
return cpu_fm64;
}
static unsigned __int64 SetFrame ( unsigned int FrameTicks, unsigned int CpuCoreIndex ) {
return ( LoadSpecifyCpuFreq ( CpuCoreIndex ) / ( unsigned __int64 ) FrameTicks );
}
#endif
.686 ; create 32 bit code
.mmx
.xmm
.model flat, stdcall ; 32 bit memory model
option casemap :none ; case sensitive
; marco
byt equ byte ptr
wot equ word ptr
dwot equ dword ptr
; extrn Windows API ...
extrn exit:proc ; no WindowsAPI
extrn MessageBoxA@16:proc
extrn GetPriorityClass@4:proc
extrn SetPriorityClass@8:proc
extrn GetThreadPriority@4:proc
extrn SetThreadPriority@8:proc
extrn SetThreadAffinityMask@8:proc
extrn SetProcessAffinityMask@8:proc
extrn QueryPerformanceCounter@4:proc
extrn QueryPerformanceFrequency@4:proc
.data?
__rdtsc_t dd 2 dup(?)
__per_freq dd 2 dup(?)
__time_temp1 dd 2 dup(?)
__time_temp2 dd 2 dup(?)
__time_temp3 dd 2 dup(?)
__time_temp4 dd 2 dup(?)
__rdtsc_clock dd 2 dup(?)
__per_counter_clock dd 2 dup(?)
__per_counter_clock_plck dd 2 dup(?)
.code
__load_main_cpu_ticks proc C
option prologue:none, epilogue:none
push 1
push -2
call SetThreadAffinityMask@8
cmp eax, 0
je __exit
push 1
push -1
call SetProcessAffinityMask@8
cmp eax, 0
je __exit
push 256
push -1
call SetPriorityClass@8
cmp eax, 0
je __exit
push 2
push -2
call SetThreadPriority@8
cmp eax, -1
je __exit
push offset __per_freq
call QueryPerformanceFrequency@4
; main
mov eax, 35600
wait_some:
dec eax
jne wait_some
rdtsc
mov ecx, eax ; low 8 bit
rdtsc
sub eax, ecx ; low 8 bit sub ...
mov dwot[__rdtsc_clock], eax
rdtsc
mov dwot[__rdtsc_t], eax
push offset __time_temp1
call QueryPerformanceCounter@4
rdtsc
sub eax, dwot[__rdtsc_t]
sub eax, dwot[__rdtsc_clock]
mov dwot[__per_counter_clock], eax
push offset __time_temp1
call QueryPerformanceCounter@4
push offset __time_temp2
call QueryPerformanceCounter@4
mov eax, dwot[__time_temp2]
sub eax, dwot[__time_temp1]
mov dwot[__per_counter_clock_plck], eax
rdtsc
mov dwot[__time_temp3], eax
mov dwot[__time_temp3+4], edx
push offset __time_temp1
call QueryPerformanceCounter@4
mov eax, dwot[__time_temp1]
mov edx, dwot[__time_temp1+4]
mov dwot[__time_temp2], eax
mov dwot[__time_temp2+4], edx
align 16
main_loop:
mov eax, dwot[__time_temp2]
mov edx, dwot[__time_temp2+4]
sub eax, dwot[__time_temp1]
sbb edx, dwot[__time_temp1+4]
cmp edx, dwot[__per_freq+4]
ja __out_step
jb __in_step
sub eax, dwot[__per_freq]
jae __out_step
__in_step:
push offset __time_temp2
call QueryPerformanceCounter@4
jmp main_loop
align 16
__out_step:
cvtsi2sd xmm0, eax
rdtsc
sub eax, dwot[__time_temp3]
sbb edx, dwot[__time_temp3+4]
sub eax, dwot[__rdtsc_clock]
sbb edx, dwot[__rdtsc_clock+4]
mov ecx, dwot[__per_counter_clock]
shl ecx, 1
sub eax, ecx
sbb edx, 0
cvtsi2sd xmm2, dwot[__per_counter_clock]
cvtsi2sd xmm1, dwot[__per_counter_clock_plck]
divsd xmm2, xmm1
mulsd xmm2, xmm0
cvtsd2si ecx, xmm2
sub eax, ecx
sbb edx, 0
ret
__exit:
push 0
push 0
push 0
push 0
call MessageBoxA@16
push -1
call exit
add esp, 4
ret
__load_main_cpu_ticks endp
end