上次只是简单词法分析程序,经过几天努力,终于完成了!!!源代码分析及文档,在下周应该也会发布........continue.........*__*随便一提的是背景音乐是一首名为"安静的音乐",我想原名应该不是这个,是网友"鱼在水在哭"推荐给我的,这首音乐陪伴着我写完整个编译器.其中,曾想过放弃..是这首音乐把我拉回了现实,原来音乐真的可以抚平心灵的创伤...是它不再让我沉伦..希望每一个朋友也喜欢这首音乐..
;@echo off
;goto make
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;
; Tiny - Tiny编译器的汇编版本,只有17KB,比原来39.KB,少左好多,不过其实还可以精简,没扩充,而且减少好多
; 中间变量,全部放入寄存器中
; 作用:
;
; Written by 问风 (wenfengmtd@163.com)
; Debug is fun job's in OLLyDBG v1.09
;
;
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
.386
.model flat, stdcall
option casemap:none
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; I N C L U D E F I L E S
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
include /masm32/include/windows.inc
include /masm32/include/kernel32.inc
include /masm32/include/user32.inc
include /masm32/include/MASM32.INC
include /masm32/include/shell32.inc
include /masm32/macros/mymacros.asm
include /masm32/include/comdlg32.inc
includelib /masm32/lib/kernel32.lib
includelib /masm32/lib/user32.lib
includelib /masm32/lib/MASM32.LIB
includelib /masm32/lib/shell32.lib
includelib /masm32/lib/comdlg32.lib
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; U S E R D E F I N E D E Q U A T E S
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;define the C Type
CTypeD
;define the buffer's store length of read from file and the ID maximus length
BUFLEN equ 256
MAXTOKENLEN equ 40
;define the tree's children node num
MAXCHILDREN equ 3
;define the condition's compiler flag
TraceScan equ FALSE
NO_PARSE equ FALSE
TraceAnalyze equ FALSE
ANALYZE equ TRUE
TraceCode equ FALSE
TraceParse equ FALSE
;define the TokenType pre-T
enum TType, TENDFILE, TERROR, TIF, TTHEN, TELSE, TEND, TREPEAT, TUNTIL, TREAD, TWRITE, TID, /
TNUM, TASSIGN, TEQ, TLT, TPLUS, TMINUS, TTIMES, TOVER, TMOD, TLPAREN, TRPAREN, TSEMI, TWHILE, TDO,/
TNONE
;define the NodeKind
enum NKind, StmtK, ExpK, NNONE
;define the StmtKind
enum SKind, IfK, RepeatK, AssignK, ReadK, WriteK, SNONE
;define the ExpKind
enum EKind, OpK, ConstK, IdK, ENONE
;define the ExpType for type checking
enum EType, Void, Integer, Boolean, ETNONE
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; U S E R D E F I N E D M A C R O S
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;the Macros to see the mymacros, here some come from the MASM 9.0 macro
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; D A T A
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
.data
;define the DFA table col
Classify char 256 dup(17)
;use the hash funtion to find the token is ID or keyword, the hash funtion is s[1]+s[2]%19, TNONE mean is ID
hashTab TType TWHILE, TEND, TNONE, TNONE, TREPEAT, TNONE, TNONE, TIF, TREAD, TNONE, TWRITE, /
TNONE, TNONE, TNONE, TELSE, TTHEN, TDO, TUNTIL, TNONE
; some message string
Accpect char "Accpect!",0dh, 0ah, 0
Error char "Error!",0dh, 0ah, 0
;the buffer to store the char from source file
lpbuffer byte BUFLEN dup(0), 0
;the source filename
filename char 20 dup(0)
filename1 char 20 dup(0)
testfile char "test", 0
TNY char ".tny", 0
TM char ".tm", 0
filenameMsg char "Could not file the file, use the defaule filenaem: test.tny, test.tm", 0
;to save the token sting
tokenString char MAXTOKENLEN dup (0), 0
;when could not open the source file ,show the message
ErrorMsg char "Could not open the file", 0dh, 0ah, 0
;the CR LF char to convenience show string
CRLF char 0dh, 0ah, 0
;to save 32bit b-num to ASCII string,but you will see is reversal,so here i open a var to change the sequence
ASCVALUE1 char 20 dup(0), 0
ASCVALUE2 char 20 dup(0), 0
;Scoure File's handle
hScoureFile dword ?
hcodeFile dword ?
lpNumber dword ?
;the var record the current line number
lineno dword 1
;the var record the char number of buffer
bufferSize dword 0
;the keyword
reservedWord1 char 'while', 0
reservedWord2 char 'end', 0
reservedWord3 char 'repeat', 0
reservedWord4 char 'if', 0
reservedWord5 char 'read', 0
reservedWord6 char 'write', 0
reservedWord7 char 'else', 0
reservedWord8 char 'then', 0
reservedWord9 char 'do', 0
reservedWord0 char 'until', 0
;the keyword tab ,use in the hash find is ID or keyword. if compare equ, the token is keyword.
reservedWordTab dword offset reservedWord1,offset reservedWord2,0,0,offset reservedWord3,0,0,offset reservedWord4,/
offset reservedWord5, 0, offset reservedWord6, 0,0,0,offset reservedWord7,offset reservedWord8,offset reservedWord9,/
offset reservedWord0, 0
;chr$产生的字符串是在堆栈中的,如果把其它定义放在它后面会访问出错
;define the treenode structure
treeNode struct
child dword MAXCHILDREN dup(NULL)
sibling dword NULL
lineno dword ?
nodekind NKind ?
kind byte ? ;StmtKind or Expkind
attr dword ? ;include the TokenType, val, or name
exptype EType ETNONE
treeNode ends
szBuffer byte 4096 dup (?)
allocMsg char 'Out of memory error at line', 0
ArgNum equ 1
ItemBuffer dword 20 dup(0)
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; C O D E
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
.code
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; P R O D U R E
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; getNextChar
;ecx to save the current next char pos(linepos) ,if ecx > bufferSize mean must read the next line
;esi save the lpbuffer current char point
;edi save the tokenString current char point
;and here 0 mean the EOF flag
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
getNextChar PROC uses ecx
mov ecx, esi
SBB ecx, offset lpbuffer
cmp ecx, bufferSize
jb @read ;如果没有读到行结束,简单返回下一个字符
;inc lineno ;否则读入新的一行
mov bufferSize, fread(hScoureFile, addr lpbuffer, 256)
cmp eax, 0
je @EOF ;如果不能再读入,说明到达了文件的结束,返回-1代表EOF
mov esi, offset lpbuffer
jmp @read
@EOF: mov eax, 0
inc edi
ret
@read: mov al, [esi]
mov [edi], al
cmp al, 0ah
jne noEndLine
inc lineno
noEndLine: inc esi
inc edi
and eax, 0FFh
ret
getNextChar ENDP
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; ungetNextChar
; because use assembly languang, ungectChar only doing is dec esi
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
ungetNextChar PROC
dec esi
ret
ungetNextChar ENDP
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; fillTab
; fill the DFA tab's col ,may be you will ask how about DFA tab's row, you will see it define
; in DFA produre, here i use a assembly tip. The jump table.Jump table's always see in assembly
; code. This is hight language could not to give us!
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
FillTab proc uses ebx ecx
lea ebx, Classify
mov esi, 48
assume ebx: ptr byte
digit_: mov [ebx+esi], 0
inc esi
cmp esi, 57
jbe digit_
mov esi, 65
ualpha_:
mov [esi+ebx], 1
inc esi
cmp esi, 90
jbe ualpha_
mov esi, 97
lalpha_:
mov [esi+ebx], 1
inc esi
cmp esi, 122
jbe lalpha_
mov esi, ':'
mov [esi+ebx],2
mov esi, ' '
mov [esi+ebx],3
mov esi, 09h
mov [esi+ebx],3
mov esi, 0dh
mov [esi+ebx],3
mov esi, 0ah
mov [esi+ebx],3
mov esi, '{'
mov [esi+ebx],4
mov esi, '='
mov [esi+ebx],6
mov esi, 0
mov [esi+ebx],5
mov esi, '<'
mov [esi+ebx],7
mov esi, '+'
mov [esi+ebx],8
mov esi, '-'
mov [esi+ebx],9
mov esi, '*'
mov [esi+ebx],10
mov esi, '/'
mov [esi+ebx],11
mov esi, '%'
mov [esi+ebx],12
mov esi, '('
mov [esi+ebx],13
mov esi, ')'
mov [esi+ebx],14
mov esi, ';'
mov [esi+ebx],15
mov esi, '}'
mov [esi+ebx],16
assume ebx: nothing
ret
FillTab endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; hashLookup
; edx save the hash valus (s[1]+s[2])%19
; then we use the cmpsb to find the token is realy a token.Sure if the hash value is 0
; that mean it can't be the keyword, see the hashtab define
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
hashLookup proc uses eax edx edi esi
xor eax, eax
xor edx, edx
mov ecx, edi
mov edi, offset tokenString
sub ecx, edi
mov al, [edi+1]
add al, [edi+2]
mov ebx, 19
div ebx
mov esi, reservedWordTab[edx*4]
cmp esi, 0
je hid
cld
repe cmpsb
jne hid
mov cl, hashTab[edx]
ret
hid: mov cl, TID
ret
hashLookup endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; BTOACS
; eax save the coming change value
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
BTOACS proc uses ecx esi edx edi
mov ecx, 10
LEA esi, ASCVALUE1
LEA edi, ASCVALUE2
BTOACSL1:
CMP eax, ecx
jb BTOACSL2
xor edx, edx
div ecx
or dl, 30h
mov [esi], dl
inc esi
jmp BTOACSL1
BTOACSL2:
or al, 30h
mov [edi], al
inc edi
mov ecx, esi
sub ecx, offset ASCVALUE1
cmp ecx, 0
jne BTOACSL3
mov byte ptr [edi], ':'
mov byte ptr [edi+1], ' '
mov byte ptr [edi+2], 0
invoke StdOut, addr ASCVALUE2
ret
BTOACSL3:
dec esi
BTOACSL4:
mov al, [esi]
mov [edi], al
dec esi
inc edi
loop BTOACSL4
mov byte ptr [edi], ':'
mov byte ptr [edi+1], ' '
mov byte ptr [edi+2], 0
invoke StdOut, addr ASCVALUE2
ret
BTOACS endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; printfToken
; here same the C Tiny printfToken, but i use jump table instead of the switch statement
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
printToken proc uses ebx ecx
mov ebx, ecx
jmp TokenCodeTab[ebx*4]
TokenCodeTab dword PENDFILECode,PErrorCode, PReservedWordCode, PReservedWordCode, PReservedWordCode, /
PReservedWordCode, PReservedWordCode, PReservedWordCode, PReservedWordCode, /
PReservedWordCode, PIDCode, PNUMCode, PASSIGNCode, PEQCode, PLTCode, PPLUSCode, PMINUSCode, /
PTIMESCode, POVERCode, PMODCode, PLPARENCode, PRPARENCode, PSEMICode, /
PReservedWordCode, PReservedWordCode
PErrorCode :
invoke StdOut, chr$('ERROR: ')
invoke StdOut, addr tokenString
invoke StdOut, addr CRLF
ret
PReservedWordCode :
invoke StdOut, chr$('reserved word: ')
invoke StdOut, addr tokenString
invoke StdOut, addr CRLF
ret
PASSIGNCode:
invoke StdOut, chr$(':=')
invoke StdOut, addr CRLF
ret
PLTCode:
invoke StdOut, chr$('<')
invoke StdOut, addr CRLF
ret
PEQCode:
invoke StdOut, chr$('=')
invoke StdOut, addr CRLF
ret
PLPARENCode:
invoke StdOut, chr$(40)
invoke StdOut, addr CRLF
ret
PRPARENCode:
invoke StdOut, chr$(41)
invoke StdOut, addr CRLF
ret
PSEMICode:
invoke StdOut, chr$(';')
invoke StdOut, addr CRLF
ret
PPLUSCode:
invoke StdOut, chr$('+')
invoke StdOut, addr CRLF
ret
PMINUSCode:
invoke StdOut, chr$('-')
invoke StdOut, addr CRLF
ret
PTIMESCode:
invoke StdOut, chr$('*')
invoke StdOut, addr CRLF
ret
POVERCode:
invoke StdOut, chr$('/')
invoke StdOut, addr CRLF
ret
PMODCode:
invoke StdOut, chr$('%')
invoke StdOut, addr CRLF
ret
PENDFILECode:
invoke StdOut, chr$('EOF')
invoke StdOut, addr CRLF
ret
PNUMCode:
invoke StdOut, chr$('NUM, val=')
invoke StdOut, addr tokenString
invoke StdOut, addr CRLF
ret
PIDCode:
invoke StdOut, chr$('ID, name=')
invoke StdOut, addr tokenString
invoke StdOut, addr CRLF
ret
printToken endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; getToken
; the getToken produre, in assembly language use the Tab instead of the many contion statement
; will more nature
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
getToken proc uses ebx eax
xor eax, eax
xor ecx, ecx
mov edi, offset tokenString
lea ebx, Classify
State0: invoke getNextChar
xlat Classify
mov cl, TokenTab[eax]
cmp al,3
jb S0next
cmp al,5
ja S0next
dec edi
S0next: jmp State0Tab[eax*4]
TokenTab TType TNONE, TNONE, TNONE, TNONE, TNONE, TENDFILE, TEQ, TLT, /
TPLUS, TMINUS, TTIMES, TOVER, TMOD, TLPAREN,TRPAREN,TSEMI, /
TERROR, TERROR
State0Tab dword State3, State4, State2, State0, State1, State5, State5, State5,/
State5, State5, State5, State5, State5, State5, State5, State5,/
State5, State5
State1: invoke getNextChar
dec edi
xlat
cmp al, 16
je State0
jmp State1
State2: invoke getNextChar
xlat
cmp al, 6
jne S2_NEQ
mov cl, TASSIGN
jmp State5
S2_NEQ: invoke ungetNextChar
dec edi
mov cl, TERROR
jmp State5
State3: invoke getNextChar
xlat
cmp al, 0
je State3
invoke ungetNextChar
dec edi
mov cl, TNUM
jmp State5
State4: invoke getNextChar
xlat
cmp al, 1
je State4
invoke ungetNextChar
dec edi
mov cl, TID
;jmp State5 --这条指令可省
State5:
assume edi : ptr byte
mov [edi], 0
.if cl == TID
invoke hashLookup
.endif
if TraceScan
push ecx
invoke StdOut, chr$(09h)
mov eax, lineno
invoke BTOACS
pop ecx
invoke printToken
endif
assume edi : nothing
ret
getToken endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; newStmtNode
; function newStmtNode creates a new statement node for syntax tree construction
; kind mean the StmtKind, and the result-newStmtNode will return by the eax rv
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
newStmtNode proc uses ecx edx, kind
mov eax, halloc(sizeof(treeNode))
cmp eax, NULL
jne @alloc
invoke StdOut, addr allocMsg
mov eax, lineno
invoke BTOACS
invoke StdOut, addr CRLF
ret
@alloc: assume eax: ptr treeNode
mov ecx, 0
.while ecx < 3
mov [eax].child[ecx*4], NULL
inc ecx
.endw
mov [eax].sibling, NULL
mov edx, lineno
mov [eax].lineno, edx
mov edx, kind
mov [eax].kind, dl
mov [eax].nodekind, StmtK
assume eax: nothing
ret
newStmtNode endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; newExpNode
; function newExpNode creates a new expression node for syntax tree construction
; kind mean the ExpKind, and the result-newStmtNode will return by the eax rv
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
newExpNode proc uses ecx edx, kind
mov eax, halloc(sizeof(treeNode))
cmp eax, NULL
jne @alloc
invoke StdOut, addr allocMsg
mov eax, lineno
invoke BTOACS
invoke StdOut, addr CRLF
ret
@alloc: assume eax: ptr treeNode
mov ecx, 0
.while ecx < 3
mov [eax].child[ecx*4], NULL
inc ecx
.endw
mov [eax].sibling, NULL
mov edx, lineno
mov [eax].lineno, edx
mov edx, kind
mov [eax].kind, dl
mov [eax].nodekind, ExpK
mov [eax].exptype, Void
assume eax: nothing
ret
newExpNode endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; CopyString
; function CopyString allocates amd makes a new copy of an existing string
; return from the eax rv
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
copyString proc uses ecx edi esi,s
.if s == NULL
mov eax, NULL
ret
.endif
mov eax, halloc(len(s))
.if eax == NULL
invoke StdOut, addr allocMsg
mov eax, lineno
invoke BTOACS
invoke StdOut, addr CRLF
.else
cld
push eax
mov ecx, len(s) ; marcros will modify the eax
pop eax
mov esi, s
mov edi, eax
rep movsb
.endif
ret
copyString endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; syntaxError
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
syntaxError proc uses eax ecx, message
invoke StdOut, addr CRLF
invoke StdOut, chr$(">>> ")
invoke StdOut, chr$("Syntax error at line ")
mov eax, lineno
invoke BTOACS
invoke StdOut, chr$(": ")
invoke StdOut, message
ret
syntaxError endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; match
; ecx is the token
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
match proc expected
.if ecx == expected
invoke getToken
.else
invoke syntaxError, chr$("unexpected token -> ");
invoke printToken
push ecx
invoke StdOut, chr$(" ");
pop ecx
.endif
ret
match endp
statement proto
stmt_sequence proto
if_stmt proto
repeat_stmt proto
assign_stmt proto
read_stmt proto
write_stmt proto
exp proto
simple_exp proto
term proto
factor proto
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; exp
; return the TreeNode* in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
exp proc uses ebx
invoke simple_exp
mov ebx, eax
.if ecx == TLT || ecx ==TEQ
invoke newExpNode, OpK
assume eax: ptr treeNode
.if eax != NULL
mov [eax].child[0], ebx
mov [eax].attr, ecx
mov ebx, eax
.endif
assume eax: nothing
invoke match, ecx
.if ebx != NULL
invoke simple_exp
assume ebx: ptr treeNode
mov [ebx].child[4], eax
assume ebx: nothing
.endif
.endif
mov eax, ebx
ret
exp endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; simple_exp
; return the TreeNode* in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
simple_exp proc uses ebx
assume eax: ptr treeNode
assume ebx: ptr treeNode
invoke term
mov ebx, eax
.while ecx == TPLUS || ecx == TMINUS
invoke newExpNode, OpK
.if eax != NULL
mov [eax].child[0], ebx
mov [eax].attr, ecx
mov ebx, eax
invoke match, ecx
invoke term
mov [ebx].child[4], eax
.endif
.endw
assume eax: nothing
assume ebx: nothing
mov eax, ebx
ret
simple_exp endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; term
; return the TreeNode* in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
term proc uses ebx
assume eax: ptr treeNode
assume ebx: ptr treeNode
invoke factor
mov ebx, eax
.while ecx == TTIMES || ecx == TOVER
invoke newExpNode, OpK
.if eax != NULL
mov [eax].child[0], ebx
mov [eax].attr, ecx
mov ebx, eax
invoke match, ecx
invoke factor
mov [ebx].child[4], eax
.endif
.endw
assume eax: nothing
assume ebx: nothing
mov eax, ebx
ret
term endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; factor
; return the TreeNode* in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
factor proc uses ebx
assume eax: ptr treeNode
mov eax, NULL
.if ecx == TNUM
invoke newExpNode, ConstK
.if eax != NULL
push eax
push ecx
invoke atol, addr tokenString
mov ebx,eax
pop ecx
pop eax
mov [eax].attr, ebx
.endif
invoke match, TNUM
.elseif ecx == TID
invoke newExpNode, IdK
.if eax != NULL
push eax
invoke copyString, addr tokenString
mov ebx, eax
pop eax
mov [eax].attr, ebx
.endif
invoke match, TID
.elseif ecx == TLPAREN
invoke match, TLPAREN
invoke exp
invoke match, TRPAREN
.else
invoke syntaxError, chr$("unexpected token -> ");
invoke printToken
invoke getToken
.endif
assume eax: nothing
ret
factor endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; if_stmt
; return the TreeNode* in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
if_stmt proc uses ebx
invoke newStmtNode, IfK
mov ebx, eax
invoke match, TIF
assume ebx: ptr treeNode
.if ebx != NULL
invoke exp
mov [ebx].child[0], eax
.endif
invoke match, TTHEN
.if ebx != NULL
invoke stmt_sequence
mov [ebx].child[4], eax
.endif
.if ecx == TELSE
invoke match , TELSE
.if ebx != NULL
invoke stmt_sequence
mov [ebx].child[8], eax
.endif
.endif
invoke match, TEND
assume ebx: nothing
mov eax, ebx
ret
if_stmt endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; repeat_stmt
; return the TreeNode* in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
repeat_stmt proc uses ebx
invoke newStmtNode, RepeatK
mov ebx, eax
invoke match, TREPEAT
assume ebx: ptr treeNode
.if ebx != NULL
invoke stmt_sequence
mov [ebx].child[0], eax
.endif
invoke match, TUNTIL
.if ebx != NULL
invoke exp
mov [ebx].child[4], eax
.endif
assume ebx: nothing
mov eax, ebx
ret
repeat_stmt endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; assign_stmt
; return the TreeNode* in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
assign_stmt proc uses ebx
invoke newStmtNode, AssignK
mov ebx, eax
assume ebx: ptr treeNode
.if ebx != NULL && ecx == TID
invoke copyString, addr tokenString
mov [ebx].attr, eax
.endif
invoke match, TID
invoke match, TASSIGN
.if ebx != NULL
invoke exp
mov [ebx].child[0], eax
.endif
assume ebx: nothing
mov eax, ebx
ret
assign_stmt endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; read_stmt
; return the TreeNode* in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
read_stmt proc uses ebx
invoke newStmtNode, ReadK
mov ebx, eax
invoke match, TREAD
assume ebx: ptr treeNode
.if ebx != NULL && ecx == TID
invoke copyString, addr tokenString
mov [ebx].attr, eax
.endif
invoke match, TID
assume ebx: nothing
mov eax, ebx
ret
read_stmt endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; write_stmt
; return the TreeNode* in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
write_stmt proc uses ebx
invoke newStmtNode, WriteK
mov ebx, eax
invoke match, TWRITE
assume ebx: ptr treeNode
.if ebx != NULL
invoke exp
mov [ebx].child[0], eax
.endif
assume ebx: nothing
mov eax, ebx
ret
write_stmt endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; statemen
; return the TreeNode* in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
statement proc uses ebx
mov eax, NULL
.if ecx == TIF
invoke if_stmt
.elseif ecx == TREPEAT
invoke repeat_stmt
.elseif ecx == TID
invoke assign_stmt
.elseif ecx == TREAD
invoke read_stmt
.elseif ecx == TWRITE
invoke write_stmt
.else
invoke syntaxError, chr$("unexpected token -> ");
invoke printToken
invoke getToken
.endif
ret
statement endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; stmt_sequence
; return the TreeNode* in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
stmt_sequence proc uses ebx edx
local t: dword
invoke statement
mov t, eax
mov ebx, eax
.while ecx !=TENDFILE && ecx !=TEND && ecx != TELSE && ecx!=TUNTIL
invoke match, TSEMI
invoke statement ;这里修改了ebx?
mov edx, eax
.if edx != NULL
.if ebx == NULL ;表示第一条语句
mov ebx, edx
mov t, edx
.else
assume ebx : ptr treeNode
mov [ebx].sibling, edx
assume ebx : nothing
mov ebx, edx
.endif
.endif
.endw
mov eax, t
ret
stmt_sequence endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; parese
; return the TreeNode* in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
parse proc
invoke getToken
invoke stmt_sequence
.if ecx != TENDFILE
invoke syntaxError, chr$("Code ends before file ");
invoke StdOut, addr CRLF
.endif
ret
parse endp
printSpace proc uses ecx edx eax
mov ecx, 0
.while ecx < edx
push ecx
push edx
invoke StdOut, chr$(" ")
pop edx
pop ecx
inc ecx
.endw
ret
printSpace endp
printTree proc uses ecx
assume eax: ptr treeNode
add edx, 2
.while eax != NULL
invoke printSpace
.if [eax].nodekind == StmtK
push eax
push edx
push ecx
.if [eax].kind == IfK
invoke StdOut, addr reservedWord4
invoke StdOut, addr CRLF
.elseif [eax].kind == RepeatK
invoke StdOut, addr reservedWord3
invoke StdOut, addr CRLF
.elseif [eax].kind == AssignK
push eax
invoke StdOut, chr$("Assgin to: ")
pop eax
invoke StdOut, [eax].attr
invoke StdOut, addr CRLF
.elseif [eax].kind == ReadK
push eax
invoke StdOut, chr$("Read: ")
pop eax
invoke StdOut, [eax].attr
invoke StdOut, addr CRLF
.elseif [eax].kind == WriteK
invoke StdOut, addr reservedWord6
invoke StdOut, addr CRLF
.else
invoke StdOut, chr$("Unknow ExpNode")
invoke StdOut, addr CRLF
.endif
pop ecx
pop edx
pop eax
.elseif [eax].nodekind == ExpK
push eax
push edx
push ecx
.if [eax].kind == OpK
push eax
invoke StdOut, chr$("Op: ")
pop eax
mov ecx, [eax].attr
invoke printToken
.elseif [eax].kind == ConstK
push eax
invoke StdOut, chr$("const: ")
pop eax
push eax
mov eax, [eax].attr
invoke BTOACS
pop eax
invoke StdOut, addr CRLF
.elseif [eax].kind == IdK
push eax
invoke StdOut, chr$("Id: ")
pop eax
invoke StdOut, [eax].attr
invoke StdOut, addr CRLF
.else
invoke StdOut, chr$("Unknow ExpNode")
invoke StdOut, addr CRLF
.endif
pop eax
pop edx
pop eax
.else
push eax
push edx
push ecx
invoke StdOut, chr$("Unknow node kind")
invoke StdOut, addr CRLF
pop eax
pop edx
pop eax
.endif
mov ecx, 0
.while ecx < MAXCHILDREN
push eax
mov eax, [eax].child[ecx*4]
invoke printTree
inc ecx
pop eax
.endw
mov eax, [eax].sibling
.endw
sub edx, 2
assume eax: nothing
ret
printTree endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; symtab
; Symbol tab implementation for the TINY compiler (allows only one symbol table)
; Symbol table is implemented as a chained hash table
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
.data
tabSize equ 211
SHIFT equ 4
LineListRec struct
lineno dword ?
next dword ? ;here next is the pointer of the LineListRec
LineListRec ends
BucketListRec struct
varName dword ?
lines dword ? ;the head pointer of the LineListRec
memloc dword ?
next dword ? ;the pointer of the BucketListRec
BucketListRec ends
hashTable dword tabSize dup (NULL) ;211's pointer of BucketListRec
.code
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; hash
; the hash funtion, will modify the rv eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
hash proc uses ebx edx ecx,varName
xor eax, eax
xor ebx, ebx
mov ecx, tabSize
mov ebx, [varName+ebx]
assume ebx : ptr byte
.while [ebx] != 0
shl eax, SHIFT
add al, [ebx]
xor edx, edx
div ecx
mov eax, edx
inc ebx
.endw
ret
hash endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; st_insert
; procedure st_insert inserts line numbers and memory locations into the symbol table
; loc = memory location is inserted only the first time, otherwise ignored
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
st_insert proc uses eax ebx ecx edx, varName, line_no, loc
invoke hash, varName
push eax ;push the hash value
mov ebx, hashTable[eax*4] ;get the BucketList head
assume ebx: ptr BucketListRec
@begin: or ebx, ebx
jz @end
invoke szCmp, varName, [ebx].varName ;will modify edx
or eax, eax
jnz @end
mov ebx, [ebx].next
jmp @begin
@end: .if ebx == NULL ;here much m2m instuction , so may be can do well use the rv pass parameter
mov ebx, halloc(sizeof(BucketListRec))
mov eax, varName
mov [ebx].varName, eax
mov [ebx].lines, halloc(sizeof(LineListRec))
mov eax, loc
mov [ebx].memloc, eax
pop ecx ;get the hash value
mov eax, hashTable[ecx*4]
mov [ebx].next, eax
mov hashTable[ecx*4], ebx
mov ebx, [ebx].lines
assume ebx: ptr LineListRec
mov eax, line_no
mov [ebx].lineno, eax
mov [ebx].next, NULL
.else
assume ebx: ptr BucketListRec
mov ebx, [ebx].lines
.while [ebx].next != NULL
assume ebx: ptr LineListRec
mov ebx, [ebx].next
.endw
mov [ebx].next, halloc(sizeof(LineListRec))
mov ebx, [ebx].next
mov eax, line_no
mov [ebx].lineno, eax
mov [ebx].next, NULL
pop ecx ;to balance the stack
.endif
ret
st_insert endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; st_lookup
; Function st_loopup returns the memory location of a variable or -1 if
; not found, return in the ebx
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
st_lookup proc uses eax edx, varName
invoke hash, varName
mov ebx, hashTable[eax*4]
assume ebx: ptr BucketListRec
@begin: or ebx, ebx
jz @end
invoke szCmp, varName, [ebx].varName
or eax, eax
jnz @end
mov ebx, [ebx].next
jmp @begin
@end: .if ebx == NULL
mov ebx, -1
.else
mov ebx, [ebx].memloc
.endif
ret
st_lookup endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; printSymTab
; Procedure printSymTab prints a formatted listing of the symbol table
; contents to the listing file. eax store the tree pointer
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
printSymTab proc uses eax ecx edx
invoke StdOut, chr$("Variable Name Location Line Numbers")
invoke StdOut, addr CRLF
invoke StdOut, chr$("------------- -------- ------------")
invoke StdOut, addr CRLF
mov ecx, 0
.while ecx < tabSize
.if hashTable[ecx*4] != NULL
mov ebx, hashTable[ecx*4]
assume ebx: ptr BucketListRec
assume edx: ptr LineListRec
.while ebx != NULL
mov edx, [ebx].lines
push edx
push ecx
invoke StdOut, [ebx].varName ;will modify eax, ecx ,edx
invoke StdOut, chr$(" ")
mov eax, [ebx].memloc
invoke BTOACS
pop ecx
pop edx
.while edx !=NULL
mov eax, [edx].lineno
invoke BTOACS
mov edx, [edx].next
.endw
push edx
push ecx
invoke StdOut, addr CRLF
pop ecx
pop edx
mov ebx, [ebx].next
.endw
.endif
inc ecx
.endw
ret
printSymTab endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; traverse
; Procedure traverse is a generic recursive syntax tree treaversal routine:
; it applies preProc in preorder and postProc and postProc in postorder to tree pointed to by t
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
traverse proc uses ecx, preProc, postProc
assume eax: ptr treeNode
.if eax != NULL
call preProc
mov ecx, 0
.while ecx < MAXCHILDREN
push eax
mov eax, [eax].child[ecx*4]
invoke traverse, preProc, postProc
pop eax
inc ecx
.endw
call postProc
push eax
mov eax, [eax].sibling
invoke traverse, preProc, postProc
pop eax
.endif
ret
traverse endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; nullProc
; nullProc is a do-nothing procedure to generate preorder-only or postorder-only
; traversals from traverse
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
nullProc proc
ret
nullProc endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; insertNode
; Procedure inserNode inserts identifiers stored in t into the symbol table,
; edx is the couter for variable memory locations
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
insertNode proc
assume eax: ptr treeNode
.if [eax].nodekind == StmtK
.if [eax].kind == AssignK || [eax].kind == ReadK
invoke st_lookup, [eax].attr
.if ebx == -1
invoke st_insert, [eax].attr, [eax].lineno, edx
inc edx
.else
invoke st_insert, [eax].attr, [eax].lineno, 0
.endif
.endif
.elseif [eax].nodekind == ExpK
.if [eax].kind == IdK
invoke st_lookup, [eax].attr
.if ebx == -1
invoke st_insert, [eax].attr, [eax].lineno, edx
inc edx
.else
invoke st_insert, [eax].attr, [eax].lineno, 0
.endif
.endif
.endif
ret
insertNode endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; buildSymtab
; Function buildSymtab constructs the symbol table by preorder traversal of the syntax tree
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
buildSymtab proc uses eax
xor edx, edx ;counter for variable memory locations
invoke traverse, insertNode, nullProc
if TraceAnalyze
invoke StdOut, addr CRLF
invoke StdOut, chr$("Symbol table: ")
invoke StdOut, addr CRLF
invoke StdOut, addr CRLF
invoke printSymTab
endif
ret
buildSymtab endp
.code
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; typeError
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
typeError proc uses eax edx ecx ,message
assume eax: ptr treeNode
push eax
invoke StdOut, chr$("Type error at line ")
pop eax
mov eax, [eax].lineno
invoke BTOACS
invoke StdOut, addr message
mov Error, TRUE
ret
typeError endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; checkNode
; Procedure checkNode performs type checking at a single tree node
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
checkNode proc uses eax ebx ecx
assume eax: ptr treeNode
assume ebx: ptr treeNode
assume ecx: ptr treeNode
.if [eax].nodekind == ExpK
.if [eax].kind == OpK
mov ecx, [eax].child
mov ebx, [eax].child[4]
.if [ebx].exptype != Integer || [ecx].exptype != Integer
invoke typeError, chr$("Op applied to non-integer")
.endif
.if [eax].attr == TEQ || [eax].attr == TLT
mov [eax].exptype, Boolean
.else
mov [eax].exptype, Integer
.endif
.elseif [eax].kind == ConstK || [eax].kind == IdK
mov [eax].exptype, Integer
.else
.endif
.elseif [eax].nodekind == StmtK
push eax
.if [eax].kind == IfK
mov eax, [eax].child
.if [eax].exptype == Integer
invoke typeError, chr$("if test is not Boolean")
.endif
.elseif [eax].kind == AssignK
mov eax, [eax].child
.if [eax].exptype != Integer
invoke typeError, chr$("assignment of non-integer value")
.endif
.elseif [eax].kind == WriteK
mov eax, [eax].child
.if [eax].exptype != Integer
invoke typeError, chr$("write of non-integer value")
.endif
.elseif [eax].kind == RepeatK
mov eax, [eax].child[4]
.if [eax].exptype == Integer
invoke typeError, chr$("repeat test is not Boolean")
.endif
.else
.endif
pop eax
.else
ret
.endif
ret
checkNode endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; typeCheck
; Procedure typeCheck performs type checking by a postorder syntax tree traversal
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
typeCheck proc
invoke traverse, nullProc, checkNode
ret
typeCheck endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; Code emitting utilities for the TINY compiler and interface to the TM machine
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; pc = program counter
pc equ 7
; mp = "memory pointer" points to top of memory (for temp storage)
mp equ 6
; gp = "global pointer" points to top of memory for (glocal) variable storage
gp equ 5
; accumulator
ac0 equ 0
; 2nd accumulator
ac1 equ 1
; TM location number for current instruction emission -------esi
; Highest TM location emitted so far For use in conjunction with
; emitSkip, emitBackup and emitRestore-----------------------edi
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; emitComment
; procedure emitComment prints a comment line with comment c in the code file
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
.data
szFmtWord1 byte "* %s", 0dh, 0ah, 0
.code
emitComment proc c0
pushad
.if TraceCode
invoke wsprintf, addr szBuffer, addr szFmtWord1, c0
invoke lstrlen, addr szBuffer
mov ecx, eax
invoke WriteFile, hcodeFile, addr szBuffer, ecx, addr lpNumber, NULL
.endif
popad
ret
emitComment endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; emitRO
; procedure emitRO emits a register-only TM instruction
; op = the opcode
; r = target register
; s = 1st source register
; t = 2st source register
; c = a comment to be printed if TranceCode is TRUE
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
.data
szFmtWord2 byte "%3d: %5s %d, %d, %d ", 0
szFmtWord3 byte 09h, "%s", 0
.code
emitRO proc uses eax ecx, op1, r1, s1, t1, c1
invoke wsprintf, addr szBuffer, addr szFmtWord2, esi, op1, r1, s1, t1
invoke lstrlen, addr szBuffer
mov ecx, eax
invoke WriteFile, hcodeFile, addr szBuffer, ecx, addr lpNumber, NULL
inc esi
.if TraceCode
invoke wsprintf, addr szBuffer, addr szFmtWord3, c1
invoke lstrlen, addr szBuffer
mov ecx, eax
invoke WriteFile, hcodeFile, addr szBuffer, ecx, addr lpNumber, NULL
.endif
invoke WriteFile, hcodeFile, addr CRLF, 2, addr lpNumber, NULL
.if edi < esi
mov edi, esi;
.endif
ret
emitRO endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; emitRM
; procedure emitRM emits a register-to-memory TM instruction
; op = the opcode
; r = target register
; d = the offset
; s = the base register
; c = a comment to be printed if TranceCode is TRUE
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
.data
szFmtWord4 byte "%3d: %5s %d, %d(%d) ", 0
.code
emitRM proc uses eax ecx, op2, r2, d2, s2, c2
invoke wsprintf, addr szBuffer, addr szFmtWord4, esi, op2, r2, d2, s2
invoke lstrlen, addr szBuffer
mov ecx, eax
invoke WriteFile, hcodeFile, addr szBuffer, ecx, addr lpNumber, NULL
inc esi
.if TraceCode
invoke wsprintf, addr szBuffer, addr szFmtWord3, c2
invoke lstrlen, addr szBuffer
mov ecx, eax
invoke WriteFile, hcodeFile, addr szBuffer, ecx, addr lpNumber, NULL
.endif
invoke WriteFile, hcodeFile, addr CRLF, 2, addr lpNumber, NULL
.if edi < esi
mov edi, esi;
.endif
ret
emitRM endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; emitSkip
; Function emitSkip skips "howmany" code locations for later backpatch.It also returns
; the current code position, return value put in the eax
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
emitSkip proc howmany
mov eax, esi
add esi, howmany
.if edi < esi
mov edi, esi;
.endif
ret
emitSkip endp
.data
szMsg1 byte "BUG in emitBackup", 0
.code
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; emitBackup
; procedure emitBackup backs up to loc = a previously skipped location
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
emitBackup proc loc
.if edi <= loc
invoke emitComment, addr szMsg1
.endif
mov esi, loc
ret
emitBackup endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; emitRestore
; procedure emitRestore restores the current code position to the hightest previously
; unemitted position
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
emitRestore proc
mov esi, edi
ret
emitRestore endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; emitRM_Abs
; procedure emitRM_Abs converts an absolute reference to a pc-relative reference when
; emitting a register-to-memory TM instruction
; op = the opcode
; r = target register
; d = the offset
; a = the obsolute location in memory
; c = a comment to be printed if TranceCode is TRUE
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
emitRM_Abs proc uses eax ecx,op3, r3, a3, c2
pushad
mov eax, a3
sub eax, esi
dec eax
invoke wsprintf, addr szBuffer, addr szFmtWord4, esi, op3, r3, eax, pc
inc esi
invoke lstrlen, addr szBuffer
mov ecx, eax
invoke WriteFile, hcodeFile, addr szBuffer, ecx, addr lpNumber, NULL
.if TraceCode
invoke wsprintf, addr szBuffer, addr szFmtWord3, c2
invoke lstrlen, addr szBuffer
mov ecx, eax
invoke WriteFile, hcodeFile, addr szBuffer, ecx, addr lpNumber, NULL
.endif
invoke WriteFile, hcodeFile, addr CRLF, 2, addr lpNumber, NULL
.if edi < esi
mov edi, esi;
.endif
ret
emitRM_Abs endp
; tempOffset storge the memory offset for temps
; It is decremented each time a temp is
; stored, and incremented when loaded again
.data
tempOffset dword 0
.const
szMsg2 byte "-> if", 0
szMsg3 byte "if: jump to else belongs here", 0
JEQ byte "JEQ", 0
szMsg4 byte "if: jump to else ", 0
szMsg5 byte "<- if",0
szMsg6 byte "-> repeat", 0
szMsg7 byte "<- repeat", 0
LDA byte "LDA",0
szMsg8 byte "if: jump to end", 0
szMsg9 byte "repeat: jump after body comes back here", 0
szMsg10 byte "repeat: jump back to body", 0
szMsg11 byte "->assign", 0
szMsg12 byte "<-assign", 0
IST byte "ST", 0
szMsg13 byte "assign: store value", 0
IIN byte "IN", 0
szMsg14 byte "read: integer value", 0
szMsg15 byte "read: store value", 0
IOUT byte "OUT", 0
szMsg16 byte "write ac", 0
.code
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; genStmt
; Procedure genStmt generates code at a statement node
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
genStmt proc uses eax ebx
local savedLoc1
local savedLoc2
local currentLoc
mov ebx, eax
.if [ebx].kind == IfK
.if TraceCode
invoke emitComment, addr szMsg2
.endif
mov eax, [ebx].child
call cGen
invoke emitSkip, 1
mov savedLoc1, eax
invoke emitComment, addr szMsg3
mov eax, [ebx].child[4]
call cGen
invoke emitSkip, 1
mov savedLoc2, eax
invoke emitComment, addr szMsg3
invoke emitSkip, 0
mov currentLoc, eax
invoke emitBackup, savedLoc1
invoke emitRM_Abs, addr JEQ, ac0, currentLoc, addr szMsg4
invoke emitRestore
mov eax, [ebx].child[8]
invoke emitSkip, 0
mov currentLoc, eax
invoke emitBackup, savedLoc2
invoke emitRM_Abs, addr LDA, pc, currentLoc, addr szMsg8
invoke emitRestore
.if TraceCode
invoke emitComment, addr szMsg5
.endif
.elseif [ebx].kind == RepeatK
.if TraceCode
invoke emitComment, addr szMsg6
.endif
invoke emitSkip, 0
mov savedLoc1, eax
invoke emitComment, addr szMsg9
mov eax, [ebx].child
call cGen
mov eax, [ebx].child[4]
call cGen
invoke emitRM_Abs, addr JEQ, ac0, savedLoc1, addr szMsg10
.if TraceCode
invoke emitComment, addr szMsg7
.endif
.elseif [ebx].kind == AssignK
.if TraceCode
invoke emitComment, addr szMsg11
.endif
mov eax, [ebx].child
call cGen
invoke st_lookup, [ebx].attr
invoke emitRM, addr IST, ac0, ebx, gp, addr szMsg13
.if TraceCode
invoke emitComment, addr szMsg12
.endif
.elseif [ebx].kind == ReadK
invoke emitRO, addr IIN, ac0, 0, 0, addr szMsg14
invoke st_lookup, [ebx].attr
invoke emitRM, addr IST, ac0, ebx, gp, addr szMsg15
.elseif [ebx].kind == WriteK
mov eax, [ebx].child
call cGen
invoke emitRO, addr IOUT, ac0, 0, 0, addr szMsg16
.else
.endif
ret
genStmt endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; genExp
; procedure genExp generates code at an expression node
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
.const
szMsg17 byte "-> Const", 0
LDC byte "LDC", 0
szMsg18 byte "load const", 0
szMsg19 byte "<- Const", 0
LD byte "LD", 0
szMsg20 byte "-> Id", 0
szMsg21 byte "<- ID", 0
szMsg22 byte "load id value", 0
szMsg23 byte "-> Op", 0
szMsg24 byte "<- Op", 0
szMsg25 byte "op: push left", 0
szMsg26 byte "op: load left", 0
IADD byte "ADD", 0
ISUB byte "SUB", 0
IIMUL byte "MUL", 0
IIDIV byte "DIV", 0
szMsg27 byte "op +", 0
szMsg28 byte "op -", 0
szMsg29 byte "op *", 0
szMsg30 byte "op /", 0
szMsg31 byte "op <", 0
JLT byte "JLT", 0
szMsg32 byte "br if true", 0
szMsg33 byte "false case", 0
szMsg34 byte "unconditional jmp", 0
szMsg35 byte "true case", 0
szMsg36 byte "op ==", 0
.code
genExp proc uses eax ebx
mov ebx, eax
.if [ebx].kind == ConstK
.if TraceCode
invoke emitComment, addr szMsg17
.endif
invoke emitRM, addr LDC, ac0, [ebx].attr, 0, addr szMsg18
.if TraceCode
invoke emitComment, addr szMsg19
.endif
.elseif [ebx].kind == IdK
.if TraceCode
invoke emitComment, addr szMsg20
.endif
invoke st_lookup, [ebx].attr
invoke emitRM, addr LD, ac0, ebx, gp, addr szMsg22
.if TraceCode
invoke emitComment, addr szMsg21
.endif
.elseif [ebx].kind == OpK
.if TraceCode
invoke emitComment, addr szMsg23
.endif
mov eax, [ebx].child
call cGen
invoke emitRM, addr IST, ac0, tempOffset, mp, addr szMsg25
dec tempOffset
mov eax, [ebx].child[4]
call cGen
inc tempOffset
invoke emitRM, addr LD, ac1, tempOffset, mp, addr szMsg26
.if [ebx].attr == TPLUS
invoke emitRO, addr IADD, ac0, ac1, ac0, addr szMsg27
.elseif [ebx].attr == TMINUS
invoke emitRO, addr ISUB, ac0, ac1, ac0, addr szMsg28
.elseif [ebx].attr == TTIMES
invoke emitRO, addr IIMUL, ac0, ac1, ac0, addr szMsg29
.elseif [ebx].attr == TOVER
invoke emitRO, addr IIDIV, ac0, ac1, ac0, addr szMsg30
.elseif [ebx].attr == TLT
invoke emitRO, addr ISUB, ac0, ac1, ac0, addr szMsg31
invoke emitRM, addr JLT, ac0, 2, pc, addr szMsg32
invoke emitRM, addr LDC, ac0, 0, ac0, addr szMsg33
invoke emitRM, addr LDA, pc, 1, pc, addr szMsg34
invoke emitRM, addr LDC, ac0, 1, ac0, addr szMsg35
.elseif [ebx].attr == TEQ
invoke emitRO, addr ISUB, ac0, ac1, ac0, addr szMsg36
invoke emitRM, addr JEQ, ac0, 2, pc, addr szMsg32
invoke emitRM, addr LDC, ac0, 0, ac0, addr szMsg33
invoke emitRM, addr LDA, pc, 1, pc, addr szMsg34
invoke emitRM, addr LDC, ac0, 1, ac0, addr szMsg35
.else
.endif
.if TraceCode
invoke emitComment, addr szMsg24
.endif
.else
.endif
ret
genExp endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; cGen
; procedure cGen recirsively generates code by tree traversal, the eax is the pointer of
; treeNode
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
cGen proc
.if eax != NULL
.if [eax].nodekind == StmtK
invoke genStmt
.elseif [eax].nodekind == ExpK
invoke genExp
.else
.endif
mov eax, [eax].sibling
call cGen
.endif
ret
cGen endp
.const
szFmtWord5 byte "File: %s",0
szMsg37 byte "TINY Compilation to TM code", 0
szMsg38 byte "Standard prelude", 0
szMsg39 byte "load maxaddress from location 0", 0
szMsg40 byte "clear location 0", 0
szMsg41 byte "End of standard prelude", 0
szMsg42 byte "End of execution", 0
HALT byte "HALT", 0
non_char byte 0
.code
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; the primary function of the code generator--codeGen
; procedure codeGen generates code to a code file by traversal of the syntax tree.
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
codeGen proc
invoke emitComment, addr szMsg37
push eax
invoke wsprintf, addr ASCVALUE1, addr szFmtWord5, addr filename1
pop eax
invoke emitComment, addr ASCVALUE1
invoke emitComment, addr szMsg38
invoke emitRM, addr LD, mp, 0, ac0, addr szMsg39
invoke emitRM, addr IST, ac0, 0, ac0, addr szMsg40
invoke emitComment, addr szMsg41
call cGen
invoke emitComment, addr szMsg42
invoke emitRO, addr HALT, 0, 0, 0, addr non_char
ret
codeGen endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; start
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
start proc
local syntaxTree : ptr treeNode
invoke ArgClC, ArgNum, addr ItemBuffer
invoke GetCL, ArgNum, addr filename
.if eax == 1
invoke InString,1,addr filename,chr$(".")
.if eax == 0
invoke lstrcpy, addr filename1, addr filename
invoke lstrcat, addr filename, addr TNY
invoke lstrcat, addr filename1, addr TM
.else
cld
mov ecx, eax
mov esi, offset filename
mov edi, offset filename1
rep movsb
invoke lstrcat, addr filename1, addr TM
.endif
.else
invoke StdOut, addr filenameMsg
invoke lstrcpy, addr filename, addr testfile
invoke lstrcpy, addr filename1, addr testfile
invoke lstrcat, addr filename, addr TNY
invoke lstrcat, addr filename1, addr TM
.endif
invoke FillTab
mov esi, offset lpbuffer
mov hScoureFile, fopen_r(filename)
if NO_PARSE
push ecx
test_:
invoke getToken
cmp ecx, TENDFILE
jne test_
pop ecx
else
invoke parse
.if TraceParse
xor edx, edx
push eax
invoke printTree ;will modify the eax value
pop eax
.endif
endif
if ANALYZE
.if Error != TRUE
push eax
invoke StdOut, addr CRLF
invoke StdOut, chr$("Building Symbol Table..")
invoke StdOut, addr CRLF
pop eax
invoke buildSymtab
push eax
invoke StdOut, addr CRLF
invoke StdOut, chr$("Checking Types...")
invoke StdOut, addr CRLF
pop eax
invoke typeCheck
push eax
invoke StdOut, addr CRLF
invoke StdOut, chr$("Type Checking Finished")
invoke StdOut, addr CRLF
.endif
endif
invoke CreateFile, addr filename1, GENERIC_WRITE, 0, NULL, OPEN_ALWAYS, /
FILE_ATTRIBUTE_NORMAL, 0
.if eax == INVALID_HANDLE_VALUE
invoke MessageBox, NULL, addr ErrorMsg, NULL, MB_OK OR MB_ICONEXCLAMATION
invoke ExitProcess, 0
.endif
mov hcodeFile, eax
pop eax
xor esi, esi
xor edi, edi
invoke codeGen
invoke CloseHandle, hScoureFile
invoke CloseHandle, hcodeFile
invoke ExitProcess,0
start endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
end start
:make
set drv=Tiny
/masm32/bin/ml /c /coff %drv%.bat
/masm32/bin/Link /SUBSYSTEM:CONSOLE %drv%.obj
del %drv%.obj
echo.
pause
以上为源代码
comment * -----------------------------------------------------------------
Preprocessor code for high level language simulation in MASM32
Updated 22th 6 2006
---------------------------------------------------------------- *
reparg MACRO arg
LOCAL nustr
quot SUBSTR <arg>,1,1
IFIDN quot,<"> ;; if 1st char = "
.data
nustr db arg,0 ;; write arg to .DATA section
.code
EXITM <ADDR nustr> ;; append name to ADDR operator
ELSE
EXITM <arg> ;; else return arg
ENDIF
ENDM
; -------------------------------------------------------------------------
; open an existing file with read / write access and return the file handle
; -------------------------------------------------------------------------
fopen_r MACRO filename
LOCAL ErrorMsg
.data
ErrorMsg db "Could not open the file", 0dh, 0ah, 0
.code
invoke CreateFile, addr filename, GENERIC_READ ,
NULL,NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL
.if eax == INVALID_HANDLE_VALUE
invoke MessageBox, NULL, addr ErrorMsg, NULL, MB_OK OR MB_ICONEXCLAMATION
invoke ExitProcess, 0
.endif
EXITM <eax> ;; return file handle
ENDM
; ------------------------------------------------
; read data from an open file into a memory buffer
; ------------------------------------------------
fread MACRO hFile,buffer,bcnt
LOCAL var
.data?
var dd ?
.code
invoke ReadFile,hFile,buffer,bcnt,ADDR var,NULL
mov eax, var
EXITM <eax> ;; return bytes read
ENDM
; ------------------------------------------------
; define a enum type enumname a the type name
; ------------------------------------------------
enum MACRO enumname,parmlist:VARARG
LOCAL count
count = 0
enumname typedef byte
FOR parm, <parmlist>
parm = count
count = count + 1
ENDM
ENDM
; ------------------------------------------------
; define C type
; ------------------------------------------------
CTypeD MACRO
integer typedef word
unsigned typedef word
ushort typedef word
long typedef dword
ulong typedef dword
char typedef byte
ENDM
chr$ MACRO any_text:VARARG
LOCAL txtname
.data
txtname db any_text,0
.code
EXITM <OFFSET txtname>
ENDM
len MACRO lpString
invoke szLen,reparg(lpString)
inc eax
EXITM <eax>
ENDM
ustr$ MACRO DDvalue ;; unsigned integer from string
LOCAL rvstring
.data
rvstring db 20 dup (0)
align 4
.code
;; invoke dwtoa,DDvalue,ADDR rvstring
invoke crt__ultoa,DDvalue,ADDR rvstring,10
EXITM <OFFSET rvstring>
ENDM
; ------------------------------------------------
; Function return value version of the above macro
; ------------------------------------------------
rv MACRO FuncName:REQ,args:VARARG
arg equ <invoke FuncName> ;; construct invoke and function name
FOR var,<args> ;; loop through all arguments
arg CATSTR arg,<,reparg(var)> ;; replace quotes and append arg
ENDM
arg ;; write the invoke macro
EXITM <eax> ;; EAX as the return value
ENDM
halloc MACRO bytecount
EXITM <rv(HeapAlloc,rv(GetProcessHeap),0,bytecount)>
ENDM
; ******************************************************
; BASIC style conversions from string to 32 bit integer
; ******************************************************
sval MACRO lpstring ; string to signed 32 bit integer
invoke atol, reparg(lpstring)
EXITM <eax>
ENDM
strcmp MACRO str1, str2
invoke szCmp, str1, str2
EXITM <eax>
ENDM
以上为用到的宏
{ Sample program
In Tiny language –
Computes factorial
}
read x; {input an integer}
if 0 < x then {don't compute if x <= 0}
fact := 1;
repeat
fact := fact * x;
x := x-1
until x = 0;
write fact { output factorial of x }
end
以上为测试文件
0: LD 6, 0(0)
1: ST 0, 0(0)
2: IN 0, 0, 0
3: ST 0, 0(5)
4: LDC 0, 0(0)
5: ST 0, 0(6)
6: LD 0, 0(5)
7: LD 1, 0(6)
8: SUB 0, 1, 0
9: JLT 0, 2(7)
10: LDC 0, 0(0)
11: LDA 7, 1(7)
12: LDC 0, 1(0)
14: LDC 0, 1(0)
15: ST 0, 1(5)
16: LD 0, 1(5)
17: ST 0, 0(6)
18: LD 0, 0(5)
19: LD 1, 0(6)
20: MUL 0, 1, 0
21: ST 0, 1(5)
22: LD 0, 0(5)
23: ST 0, 0(6)
24: LDC 0, 1(0)
25: LD 1, 0(6)
26: SUB 0, 1, 0
27: ST 0, 0(5)
28: LD 0, 0(5)
29: ST 0, 0(6)
30: LDC 0, 0(0)
31: LD 1, 0(6)
32: SUB 0, 1, 0
33: JEQ 0, 2(7)
34: LDC 0, 0(0)
35: LDA 7, 1(7)
36: LDC 0, 1(0)
37: JEQ 0, -22(7)
38: LD 0, 1(5)
39: OUT 0, 0, 0
13: JEQ 0, 27(7)
40: LDA 7, 0(7)
41: HALT 0, 0, 0
以上为编译器生成的TM代码
文件清单:
Tiny.bat-----------为自编译源代码文件(放在MASM9.0目录下双击就可以编译出tiny.exe文件)
mymacros.asm-------一些在源代码中用到的宏,为了正常编译需要把它放入MASM32中的macros文件夹,也可以修改Tiny.bat中的include
test.tny-------------是测试文件,也是忽略命令行时的默认测试文件
code.tm--------------是生成的TM代码
编译后生成exe文件仅为17kb,比原来在C中编译少了20多KB,而且如果在编译代码时注意少用chr$之类的宏,应该可以更加少.
追求速度与空间效率,汇编是最好的选择!
编写工具:
MASM v9.0
ollydb v1.09