;@echo off
;goto make
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;
; DFA3_A_Mnem - 状态机的汇编实现,Tiny的扩充改版
; 作用:
;
; Written by 问风 (wenfengmtd@163.com)
; Debug is fun job's in OLLyDBG v1.09
;
;
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
.386
.model flat, stdcall
option casemap:none
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; I N C L U D E F I L E S
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
include /masm32/include/windows.inc
include /masm32/include/kernel32.inc
include /masm32/include/user32.inc
include /masm32/include/MASM32.INC
include /masm32/include/shell32.inc
include /masm32/macros/mymacros.asm
includelib /masm32/lib/kernel32.lib
includelib /masm32/lib/user32.lib
includelib /masm32/lib/MASM32.LIB
includelib /masm32/lib/shell32.lib
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; U S E R D E F I N E D E Q U A T E S
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;define the C Type
CTypeD
;define the buffer's store length of read from file and the ID maximus length
BUFLEN equ 256
MAXTOKENLEN equ 40
;define the condition's compiler flag
TraceScan equ TRUE
;define the TokenType pre-T
enum TType, TENDFILE, TERROR, TIF, TTHEN, TELSE, TEND, TREPEAT, TUNTIL, TREAD, TWRITE, TID, /
TNUM, TASSIGN, TEQ, TLT, TPLUS, TMINUS, TTIMES, TOVER, TMOD, TLPAREN, TRPAREN, TSEMI, TWHILE, TDO,/
TNONE
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; U S E R D E F I N E D M A C R O S
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;the Macros to see the mymacros, here some come from the MASM 9.0 macro
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; D A T A
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
.data
;define the DFA table col
Classify char 256 dup(17)
;use the hash funtion to find the token is ID or keyword, the hash funtion is s[1]+s[2]%19, TNONE mean is ID
hashTab byte TWHILE, TEND, TNONE, TNONE, TREPEAT, TNONE, TNONE, TIF, TREAD, TNONE, TWRITE, /
TNONE, TNONE, TNONE, TELSE, TTHEN, TDO, TUNTIL, TNONE
; some message string
Accpect char "Accpect!",0dh, 0ah, 0
Error char "Error!",0dh, 0ah, 0
;the buffer to store the char from source file
lpbuffer byte BUFLEN dup(0), 0
;the source filename
filename char "test.tny",0
;to save the token sting
tokenString char MAXTOKENLEN dup (0), 0
;when could not open the source file ,show the message
ErrorMsg char "Could not open the file", 0dh, 0ah, 0
;the CR LF char to convenience show string
CRLF char 0dh, 0ah, 0
;to save 32bit b-num to ASCII string,but you will see is reversal,so here i open a var to change the sequence
ASCVALUE1 char 20 dup(0), 0
ASCVALUE2 char 20 dup(0), 0
;Scoure File's handle
hScoureFile dword ?
;the var record the current line number
lineno dword 0
;the var record the char number of buffer
bufferSize dword 0
;the keyword
reservedWord1 char 'while', 0
reservedWord2 char 'end', 0
reservedWord3 char 'repeat', 0
reservedWord4 char 'if', 0
reservedWord5 char 'read', 0
reservedWord6 char 'write', 0
reservedWord7 char 'else', 0
reservedWord8 char 'then', 0
reservedWord9 char 'do', 0
reservedWord0 char 'until', 0
;the keyword tab ,use in the hash find is ID or keyword. if compare equ, the token is keyword.
reservedWordTab dword offset reservedWord1,offset reservedWord2,0,0,offset reservedWord3,0,0,offset reservedWord4,/
offset reservedWord5, 0, offset reservedWord6, 0,0,0,offset reservedWord7,offset reservedWord8,offset reservedWord9,/
offset reservedWord0, 0
;chr$产生的字符串是在堆栈中的,如果把其它定义放在它后面会访问出错
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; C O D E
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
.code
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; P R O D U R E
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; getNextChar
;ecx to save the current next char pos(linepos) ,if ecx > bufferSize mean must read the next line
;esi save the lpbuffer current char point
;edi save the tokenString current char point
;and here 0 mean the EOF flag
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
getNextChar PROC uses ecx
mov ecx, esi
SBB ecx, offset lpbuffer
cmp ecx, bufferSize
jb @read ;如果没有读到行结束,简单返回下一个字符
inc lineno ;否则读入新的一行
mov bufferSize, fread(hScoureFile, addr lpbuffer, 256)
cmp eax, 0
je @EOF ;如果不能再读入,说明到达了文件的结束,返回-1代表EOF
mov esi, offset lpbuffer
jmp @read
@EOF: mov eax, 0
inc edi
ret
@read: mov al, [esi]
mov [edi], al
cmp al, 0dh
jne noEndLine
inc lineno
noEndLine: inc esi
inc edi
and eax, 0FFh
ret
getNextChar ENDP
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; ungetNextChar
; because use assembly languang, ungectChar only doing is dec esi
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
ungetNextChar PROC
dec esi
ret
ungetNextChar ENDP
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; fillTab
; fill the DFA tab's col ,may be you will ask how about DFA tab's row, you will see it define
; in DFA produre, here i use a assembly tip. The jump table.Jump table's always see in assembly
; code. This is hight language could not to give us!
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
FillTab proc uses ebx ecx
lea ebx, Classify
mov esi, 48
assume ebx: ptr byte
digit_: mov [ebx+esi], 0
inc esi
cmp esi, 57
jbe digit_
mov esi, 65
ualpha_:
mov [esi+ebx], 1
inc esi
cmp esi, 90
jbe ualpha_
mov esi, 97
lalpha_:
mov [esi+ebx], 1
inc esi
cmp esi, 122
jbe lalpha_
mov esi, ':'
mov [esi+ebx],2
mov esi, ' '
mov [esi+ebx],3
mov esi, 09h
mov [esi+ebx],3
mov esi, 0dh
mov [esi+ebx],3
mov esi, 0ah
mov [esi+ebx],3
mov esi, '{'
mov [esi+ebx],4
mov esi, '='
mov [esi+ebx],6
mov esi, 0
mov [esi+ebx],5
mov esi, '<'
mov [esi+ebx],7
mov esi, '+'
mov [esi+ebx],8
mov esi, '-'
mov [esi+ebx],9
mov esi, '*'
mov [esi+ebx],10
mov esi, '/'
mov [esi+ebx],11
mov esi, '%'
mov [esi+ebx],12
mov esi, '('
mov [esi+ebx],13
mov esi, ')'
mov [esi+ebx],14
mov esi, ';'
mov [esi+ebx],15
mov esi, '}'
mov [esi+ebx],16
assume ebx: nothing
ret
FillTab endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; hashLookup
; edx save the hash valus (s[1]+s[2])%19
; then we use the cmpsb to find the token is realy a token.Sure if the hash value is 0
; that mean it can't be the keyword, see the hashtab define
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
hashLookup proc uses eax edx edi esi
xor eax, eax
xor edx, edx
mov ecx, edi
mov edi, offset tokenString
sub ecx, edi
mov al, [edi+1]
add al, [edi+2]
mov ebx, 19
div ebx
mov esi, reservedWordTab[edx*4]
cmp esi, 0
je hid
cld
repe cmpsb
jne hid
mov cl, hashTab[edx]
ret
hid: mov cl, TID
ret
hashLookup endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; BTOACS
; eax save the coming change value
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
BTOACS proc uses ecx esi edx edi
mov ecx, 10
LEA esi, ASCVALUE1
LEA edi, ASCVALUE2
BTOACSL1:
CMP eax, ecx
jb BTOACSL2
xor edx, edx
div ecx
or dl, 30h
mov [esi], dl
inc esi
jmp BTOACSL1
BTOACSL2:
or al, 30h
mov [edi], al
inc edi
mov ecx, esi
sub ecx, offset ASCVALUE1
cmp ecx, 0
jne BTOACSL3
mov byte ptr [edi], ':'
mov byte ptr [edi+1], ' '
mov byte ptr [edi+2], 0
invoke StdOut, addr ASCVALUE2
ret
BTOACSL3:
dec esi
BTOACSL4:
mov al, [esi]
mov [edi], al
dec esi
inc edi
loop BTOACSL4
mov byte ptr [edi], ':'
mov byte ptr [edi+1], ' '
mov byte ptr [edi+2], 0
invoke StdOut, addr ASCVALUE2
ret
BTOACS endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; printfToken
; here same the C Tiny printfToken, but i use jump table instead of the switch statement
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
printfToken proc uses ebx ecx
movzx ebx, cl
jmp TokenCodeTab[ebx*4]
TokenCodeTab dword PENDFILECode,PErrorCode, PReservedWordCode, PReservedWordCode, PReservedWordCode, /
PReservedWordCode, PReservedWordCode, PReservedWordCode, PReservedWordCode, /
PReservedWordCode, PIDCode, PNUMCode, PASSIGNCode, PEQCode, PLTCode, PPLUSCode, PMINUSCode, /
PTIMESCode, POVERCode, PMODCode, PLPARENCode, PRPARENCode, PSEMICode, /
PReservedWordCode, PReservedWordCode
PErrorCode :
invoke StdOut, chr$('ERROR: ')
invoke StdOut, addr tokenString
invoke StdOut, addr CRLF
ret
PReservedWordCode :
invoke StdOut, chr$('reserved word: ')
invoke StdOut, addr tokenString
invoke StdOut, addr CRLF
ret
PASSIGNCode:
invoke StdOut, chr$(':=')
invoke StdOut, addr CRLF
ret
PLTCode:
invoke StdOut, chr$('<')
invoke StdOut, addr CRLF
ret
PEQCode:
invoke StdOut, chr$('=')
invoke StdOut, addr CRLF
ret
PLPARENCode:
invoke StdOut, chr$(40)
invoke StdOut, addr CRLF
ret
PRPARENCode:
invoke StdOut, chr$(41)
invoke StdOut, addr CRLF
ret
PSEMICode:
invoke StdOut, chr$(';')
invoke StdOut, addr CRLF
ret
PPLUSCode:
invoke StdOut, chr$('+')
invoke StdOut, addr CRLF
ret
PMINUSCode:
invoke StdOut, chr$('-')
invoke StdOut, addr CRLF
ret
PTIMESCode:
invoke StdOut, chr$('*')
invoke StdOut, addr CRLF
ret
POVERCode:
invoke StdOut, chr$('/')
invoke StdOut, addr CRLF
ret
PMODCode:
invoke StdOut, chr$('%')
invoke StdOut, addr CRLF
ret
PENDFILECode:
invoke StdOut, chr$('EOF')
invoke StdOut, addr CRLF
ret
PNUMCode:
invoke StdOut, chr$('NUM, val=')
invoke StdOut, addr tokenString
invoke StdOut, addr CRLF
ret
PIDCode:
invoke StdOut, chr$('ID, name=')
invoke StdOut, addr tokenString
invoke StdOut, addr CRLF
ret
printfToken endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; getToken
; the getToken produre, in assembly language use the Tab instead of the many contion statement
; will more nature
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
getToken proc uses ebx eax
xor eax, eax
xor ecx, ecx
lea ebx, Classify
State0: invoke getNextChar
xlat Classify
mov cl, TokenTab[eax]
cmp al,3
jb S0next
cmp al,5
ja S0next
dec edi
S0next: jmp State0Tab[eax*4]
TokenTab TType TNONE, TNONE, TNONE, TNONE, TNONE, TENDFILE, TEQ, TLT, /
TPLUS, TMINUS, TTIMES, TOVER, TMOD, TLPAREN,TRPAREN,TSEMI, /
TERROR, TERROR
State0Tab dword State3, State4, State2, State0, State1, State5, State5, State5,/
State5, State5, State5, State5, State5, State5, State5, State5,/
State5, State5
State1: invoke getNextChar
dec edi
xlat
cmp al, 16
je State0
jmp State1
State2: invoke getNextChar
xlat
cmp al, 6
jne S2_NEQ
mov cl, TASSIGN
jmp State5
S2_NEQ: invoke ungetNextChar
dec edi
mov cl, TERROR
jmp State5
State3: invoke getNextChar
xlat
cmp al, 0
je State3
invoke ungetNextChar
dec edi
mov cl, TNUM
jmp State5
State4: invoke getNextChar
xlat
cmp al, 1
je State4
invoke ungetNextChar
dec edi
mov cl, TID
;jmp State5 --这条指令可省
State5:
assume edi : ptr byte
mov [edi], 0
.if cl == TID
invoke hashLookup
.endif
if TraceScan
push ecx
invoke StdOut, chr$(09h)
mov eax, lineno
invoke BTOACS
pop ecx
invoke printfToken
endif
assume edi : nothing
ret
getToken endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; start
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
start proc
invoke FillTab
mov esi, offset lpbuffer
mov hScoureFile, fopen_r(filename)
push ecx
test_: mov edi, offset tokenString
invoke getToken
cmp cl, TENDFILE
jne test_
pop ecx
invoke ExitProcess,0
start endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
end start
:make
set drv=DFA
/masm32/bin/ml /c /coff %drv%.bat
/masm32/bin/Link /SUBSYSTEM:CONSOLE %drv%.obj
del %drv%.obj
echo.
pause
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;宏定义
comment * -----------------------------------------------------------------
Preprocessor code for high level language simulation in MASM32
Updated 22th 6 2006
---------------------------------------------------------------- *
reparg MACRO arg
LOCAL nustr
quot SUBSTR <arg>,1,1
IFIDN quot,<"> ;; if 1st char = "
.data
nustr db arg,0 ;; write arg to .DATA section
.code
EXITM <ADDR nustr> ;; append name to ADDR operator
ELSE
EXITM <arg> ;; else return arg
ENDIF
ENDM
; -------------------------------------------------------------------------
; open an existing file with read / write access and return the file handle
; -------------------------------------------------------------------------
fopen_r MACRO filename
LOCAL ErrorMsg
.data
ErrorMsg db "Could not open the file", 0dh, 0ah, 0
.code
invoke CreateFile, addr filename, GENERIC_READ ,
NULL,NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL
.if eax == INVALID_HANDLE_VALUE
invoke MessageBox, NULL, addr ErrorMsg, NULL, MB_OK OR MB_ICONEXCLAMATION
invoke ExitProcess, 0
.endif
EXITM <eax> ;; return file handle
ENDM
; ------------------------------------------------
; read data from an open file into a memory buffer
; ------------------------------------------------
fread MACRO hFile,buffer,bcnt
LOCAL var
.data?
var dd ?
.code
invoke ReadFile,hFile,buffer,bcnt,ADDR var,NULL
mov eax, var
EXITM <eax> ;; return bytes read
ENDM
; ------------------------------------------------
; define a enum type enumname a the type name
; ------------------------------------------------
enum MACRO enumname,parmlist:VARARG
LOCAL count
count = 0
enumname typedef byte
FOR parm, <parmlist>
parm = count
count = count + 1
ENDM
ENDM
; ------------------------------------------------
; define C type
; ------------------------------------------------
CTypeD MACRO
integer typedef word
unsigned typedef word
ushort typedef word
long typedef dword
ulong typedef dword
char typedef byte
ENDM
chr$ MACRO any_text:VARARG
LOCAL txtname
.data
txtname db any_text,0
.code
EXITM <OFFSET txtname>
ENDM
len MACRO lpString
invoke szLen,reparg(lpString)
EXITM <eax>
ENDM
ustr$ MACRO DDvalue ;; unsigned integer from string
LOCAL rvstring
.data
rvstring db 20 dup (0)
align 4
.code
;; invoke dwtoa,DDvalue,ADDR rvstring
invoke crt__ultoa,DDvalue,ADDR rvstring,10
EXITM <OFFSET rvstring>
ENDM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;测试文件 文件名test.yny
{ Sample program
In Tiny language –
Computes factorial
}
read x{ input an integer }
if 0 < x then { don’t compute if x <= 0 }
fact := 1;
while 0 < x do
x := x%10 ;
fact := fact +1
end
write fact { output the D-bit of x }
end
end