用汇编写及扩充Tiny语言编译器(1)词法分析

;@echo off
;goto make

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;
;  DFA3_A_Mnem - 状态机的汇编实现,Tiny的扩充改版
;  作用:
;
;  Written by 问风 (wenfengmtd@163.com)
;  Debug is fun job's in OLLyDBG v1.09

;
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

.386
.model flat, stdcall
option casemap:none

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                                  I N C L U D E   F I L E S                                       
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

include /masm32/include/windows.inc
include /masm32/include/kernel32.inc
include /masm32/include/user32.inc
include /masm32/include/MASM32.INC
include /masm32/include/shell32.inc
include /masm32/macros/mymacros.asm

includelib /masm32/lib/kernel32.lib
includelib /masm32/lib/user32.lib
includelib /masm32/lib/MASM32.LIB
includelib /masm32/lib/shell32.lib
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                           U S E R   D E F I N E D   E Q U A T E S                                
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

;define the C Type
CTypeD

;define the buffer's store length of read from file and the ID maximus length
BUFLEN  equ 256
MAXTOKENLEN equ 40

;define the condition's compiler flag
TraceScan equ TRUE

;define the TokenType pre-T
enum TType, TENDFILE, TERROR, TIF, TTHEN, TELSE, TEND, TREPEAT, TUNTIL, TREAD, TWRITE, TID, /
     TNUM, TASSIGN, TEQ, TLT, TPLUS, TMINUS, TTIMES, TOVER, TMOD, TLPAREN, TRPAREN, TSEMI, TWHILE, TDO,/
     TNONE

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                           U S E R   D E F I N E D   M A C R O S                                  
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;the Macros to see the mymacros, here some come from the MASM 9.0 macro

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                                          D A T A                                               
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
.data

;define the DFA table col
Classify char 256 dup(17) 
;use the hash funtion to find the token is ID or keyword, the hash funtion is s[1]+s[2]%19, TNONE mean is ID
hashTab  byte TWHILE, TEND, TNONE, TNONE, TREPEAT, TNONE, TNONE, TIF, TREAD, TNONE, TWRITE, /
   TNONE, TNONE, TNONE, TELSE, TTHEN, TDO, TUNTIL, TNONE
; some message string
Accpect  char "Accpect!",0dh, 0ah, 0
Error  char "Error!",0dh, 0ah, 0
;the buffer to store the char from source file
lpbuffer byte BUFLEN dup(0), 0
;the source filename
filename char "test.tny",0
;to save the token sting
tokenString char MAXTOKENLEN dup (0), 0
;when could not open the source file ,show the message
ErrorMsg char  "Could not open the file", 0dh, 0ah, 0
;the CR LF char to convenience show string
CRLF  char 0dh, 0ah, 0
;to save 32bit b-num to ASCII string,but you will see is reversal,so here i open a var to change the sequence 
ASCVALUE1 char 20 dup(0), 0
ASCVALUE2 char 20 dup(0), 0
;Scoure File's handle
hScoureFile dword ?
;the var record the current line number
lineno  dword 0
;the var record the char number of buffer
bufferSize dword 0
;the keyword
reservedWord1 char 'while', 0
reservedWord2 char 'end',  0
reservedWord3 char 'repeat', 0
reservedWord4 char 'if',  0
reservedWord5 char 'read',  0
reservedWord6 char 'write', 0
reservedWord7 char 'else',  0
reservedWord8 char 'then',  0
reservedWord9 char 'do',  0
reservedWord0 char 'until', 0
;the keyword tab ,use in the hash find is ID or keyword. if compare equ, the token is keyword.
reservedWordTab dword offset reservedWord1,offset reservedWord2,0,0,offset reservedWord3,0,0,offset reservedWord4,/
   offset reservedWord5, 0, offset reservedWord6, 0,0,0,offset reservedWord7,offset reservedWord8,offset reservedWord9,/
   offset reservedWord0, 0
   ;chr$产生的字符串是在堆栈中的,如果把其它定义放在它后面会访问出错

 

 


;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                                          C O D E                                                 
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

.code

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                                          P R O D U R E                                                
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                                       getNextChar    
;ecx to save the current next char pos(linepos) ,if ecx > bufferSize mean must read the next line
;esi save the lpbuffer current char point
;edi save the tokenString current char point
;and here 0 mean the EOF flag
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
getNextChar PROC uses ecx
  mov ecx, esi
  SBB ecx, offset lpbuffer
  cmp ecx, bufferSize
  jb @read  ;如果没有读到行结束,简单返回下一个字符
  inc lineno  ;否则读入新的一行
  mov bufferSize, fread(hScoureFile, addr lpbuffer, 256)
  cmp eax, 0
  je @EOF  ;如果不能再读入,说明到达了文件的结束,返回-1代表EOF
  mov esi, offset lpbuffer
  jmp @read
@EOF:  mov eax, 0
  inc edi
  ret
@read:  mov al, [esi]
  mov [edi], al
  cmp al, 0dh
  jne noEndLine
  inc lineno
noEndLine: inc esi
  inc edi
  and eax, 0FFh
  ret
getNextChar ENDP

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                                       ungetNextChar   
; because use assembly languang, ungectChar only doing is dec esi
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
ungetNextChar PROC
  dec esi
  ret
ungetNextChar ENDP

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                                       fillTab    
; fill the DFA tab's col ,may be you will ask how about DFA tab's row, you will see it define
; in DFA produre, here i use a assembly tip. The jump table.Jump table's always see in assembly
; code. This is hight language could not to give us!
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
FillTab proc uses ebx ecx
 lea ebx, Classify
 mov esi, 48
 assume ebx: ptr byte
digit_: mov [ebx+esi], 0
 inc esi
 cmp esi, 57
 jbe digit_
 mov esi, 65
ualpha_:
 mov [esi+ebx], 1
 inc esi
 cmp esi, 90
 jbe ualpha_
 mov esi, 97
lalpha_:
 mov [esi+ebx], 1
 inc esi
 cmp esi, 122
 jbe lalpha_
 mov esi, ':'
 mov [esi+ebx],2
 mov esi, ' '
 mov [esi+ebx],3
 mov esi, 09h
 mov [esi+ebx],3
 mov esi, 0dh
 mov [esi+ebx],3
 mov esi, 0ah
 mov [esi+ebx],3
 mov esi, '{'
 mov [esi+ebx],4
 mov esi, '='
 mov [esi+ebx],6
 mov esi, 0
 mov [esi+ebx],5
 mov esi, '<'
 mov [esi+ebx],7
 mov esi, '+'
 mov [esi+ebx],8
 mov esi, '-'
 mov [esi+ebx],9
 mov esi, '*'
 mov [esi+ebx],10
 mov esi, '/'
 mov [esi+ebx],11
 mov esi, '%'
 mov [esi+ebx],12
 mov esi, '('
 mov [esi+ebx],13
 mov esi, ')'
 mov [esi+ebx],14
 mov esi, ';'
 mov [esi+ebx],15
 mov esi, '}'
 mov [esi+ebx],16
 assume ebx: nothing
 ret
FillTab endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                                       hashLookup
; edx save the hash valus (s[1]+s[2])%19
; then we use the cmpsb to find the token is realy a token.Sure if the hash value is 0
; that mean it can't be the keyword, see the hashtab define
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
hashLookup proc uses eax edx edi esi
 xor eax, eax
 xor edx, edx
 mov ecx, edi
 mov edi, offset tokenString
 sub ecx, edi
 mov al, [edi+1]
 add al, [edi+2]
 mov ebx, 19
 div ebx
 mov esi, reservedWordTab[edx*4]
 cmp esi, 0
 je hid
 cld
 repe cmpsb
 jne hid
 mov cl, hashTab[edx]
 ret 
hid: mov cl, TID
 ret
hashLookup endp

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;     BTOACS 
; eax save the coming change value                                                         
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
BTOACS proc uses ecx esi edx edi
 mov ecx, 10
 LEA esi, ASCVALUE1
 LEA edi, ASCVALUE2
BTOACSL1:
 CMP eax, ecx
 jb BTOACSL2
 xor edx, edx
 div ecx
 or dl, 30h
 mov [esi], dl
 inc esi
 jmp BTOACSL1
BTOACSL2:
 or al, 30h
 mov [edi], al
 inc edi
 mov ecx, esi
 sub ecx, offset ASCVALUE1
 cmp ecx, 0
 jne BTOACSL3
 mov byte ptr [edi], ':'
 mov byte ptr [edi+1], ' '
 mov byte ptr [edi+2], 0
 invoke StdOut, addr ASCVALUE2
 ret
BTOACSL3:
 dec esi
BTOACSL4:
 mov al, [esi]
 mov [edi], al
 dec esi
 inc edi
 loop BTOACSL4
 mov byte ptr [edi], ':'
 mov byte ptr [edi+1], ' '
 mov byte ptr [edi+2], 0
 invoke StdOut, addr ASCVALUE2
 ret 
BTOACS endp


;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                                       printfToken  
; here same the C Tiny printfToken, but i use jump table instead of the  switch statement
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
printfToken proc uses ebx ecx

  movzx ebx, cl
  jmp TokenCodeTab[ebx*4]
TokenCodeTab dword PENDFILECode,PErrorCode, PReservedWordCode, PReservedWordCode, PReservedWordCode, /
   PReservedWordCode, PReservedWordCode, PReservedWordCode, PReservedWordCode, /
   PReservedWordCode, PIDCode,  PNUMCode,  PASSIGNCode, PEQCode, PLTCode, PPLUSCode, PMINUSCode, /
   PTIMESCode,  POVERCode,  PMODCode,  PLPARENCode,  PRPARENCode,  PSEMICode, /
   PReservedWordCode,  PReservedWordCode
PErrorCode : 
  invoke StdOut, chr$('ERROR: ')
  invoke StdOut, addr tokenString
  invoke StdOut, addr CRLF
  ret
PReservedWordCode :
  invoke StdOut, chr$('reserved word: ')
  invoke StdOut, addr tokenString
  invoke  StdOut, addr CRLF
  ret
PASSIGNCode:
  invoke StdOut, chr$(':=')
  invoke StdOut, addr CRLF
  ret
PLTCode:
  invoke StdOut, chr$('<')
  invoke StdOut, addr CRLF
  ret
PEQCode:
  invoke StdOut, chr$('=')
  invoke StdOut, addr CRLF
  ret
PLPARENCode:
  invoke StdOut, chr$(40)
  invoke StdOut, addr CRLF
  ret
PRPARENCode:
  invoke StdOut, chr$(41)
  invoke StdOut, addr CRLF
  ret
PSEMICode:
  invoke StdOut, chr$(';')
  invoke StdOut, addr CRLF
  ret
PPLUSCode:
  invoke StdOut, chr$('+')
  invoke StdOut, addr CRLF
  ret
PMINUSCode:
  invoke StdOut, chr$('-')
  invoke StdOut, addr CRLF
  ret
PTIMESCode:
  invoke StdOut, chr$('*')
  invoke StdOut, addr CRLF
  ret
POVERCode:
  invoke StdOut, chr$('/')
  invoke StdOut, addr CRLF
  ret
PMODCode:
  invoke StdOut, chr$('%')
  invoke StdOut, addr CRLF
  ret
PENDFILECode:
  invoke StdOut, chr$('EOF')
  invoke StdOut, addr CRLF
  ret
PNUMCode:
  invoke StdOut, chr$('NUM, val=')
  invoke StdOut, addr tokenString
  invoke StdOut, addr CRLF
  ret
PIDCode:
  invoke StdOut, chr$('ID, name=')
  invoke StdOut, addr tokenString
  invoke StdOut, addr CRLF
  ret
printfToken endp

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                                      getToken 
; the getToken produre, in assembly language use the Tab instead of the many contion statement
; will more nature
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

getToken proc uses ebx eax
 xor eax, eax
 xor ecx, ecx
 lea ebx, Classify
State0: invoke getNextChar
 xlat    Classify
 mov cl, TokenTab[eax]
 cmp al,3
 jb S0next
 cmp al,5
 ja S0next
 dec edi 
S0next: jmp State0Tab[eax*4]

TokenTab  TType  TNONE, TNONE, TNONE, TNONE, TNONE, TENDFILE, TEQ,   TLT,   /
  TPLUS, TMINUS, TTIMES, TOVER, TMOD, TLPAREN,TRPAREN,TSEMI, /
  TERROR, TERROR

State0Tab dword State3, State4, State2, State0, State1, State5, State5, State5,/
  State5, State5, State5, State5, State5, State5, State5, State5,/
  State5, State5

State1: invoke getNextChar
 dec edi
 xlat
 cmp al, 16
 je State0
 jmp State1

 

State2: invoke getNextChar
 xlat
 cmp al, 6
 jne S2_NEQ
 mov cl, TASSIGN
 jmp State5
S2_NEQ: invoke ungetNextChar
 dec edi
 mov cl, TERROR
 jmp State5

State3: invoke getNextChar
 xlat
 cmp al, 0
 je State3
 invoke ungetNextChar
 dec edi
 mov cl, TNUM
 jmp State5


State4: invoke getNextChar
 xlat
 cmp al, 1
 je State4
 invoke ungetNextChar
 dec edi
 mov cl, TID
 ;jmp State5 --这条指令可省

State5:
 assume  edi : ptr byte
 mov  [edi], 0
 .if cl == TID
  invoke hashLookup
 .endif
 if TraceScan
  push ecx
  invoke StdOut, chr$(09h)
  mov eax, lineno
  invoke BTOACS
  pop ecx
  invoke printfToken
 endif
 assume  edi : nothing
 ret
getToken endp
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                                       start                                               
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

start proc
 invoke FillTab
 mov esi, offset lpbuffer
 mov hScoureFile, fopen_r(filename)
 push ecx
test_: mov edi, offset tokenString
 invoke getToken
 cmp cl, TENDFILE
 jne test_
 pop ecx
 invoke ExitProcess,0
start endp

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
;                                                                                                  
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

end start


:make

set drv=DFA

/masm32/bin/ml  /c /coff %drv%.bat
/masm32/bin/Link /SUBSYSTEM:CONSOLE %drv%.obj

del %drv%.obj

echo.
pause

 

 

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;宏定义

comment * -----------------------------------------------------------------
        Preprocessor code for high level language simulation in MASM32

                          Updated 22th 6 2006
         ---------------------------------------------------------------- *

 reparg MACRO arg
      LOCAL nustr
        quot SUBSTR <arg>,1,1
      IFIDN quot,<">            ;; if 1st char = "
        .data
          nustr db arg,0        ;; write arg to .DATA section
        .code
        EXITM <ADDR nustr>      ;; append name to ADDR operator
      ELSE
        EXITM <arg>             ;; else return arg
      ENDIF
    ENDM

  ; -------------------------------------------------------------------------
  ; open an existing file with read / write access and return the file handle
  ; -------------------------------------------------------------------------
    fopen_r MACRO filename
    LOCAL ErrorMsg
    .data
 ErrorMsg db "Could not open the file", 0dh, 0ah, 0
    .code
      invoke CreateFile, addr filename, GENERIC_READ ,
                        NULL,NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL
      .if eax == INVALID_HANDLE_VALUE
  invoke MessageBox, NULL, addr ErrorMsg, NULL, MB_OK OR MB_ICONEXCLAMATION
  invoke ExitProcess, 0
      .endif
      EXITM <eax>       ;; return file handle
    ENDM


  ; ------------------------------------------------
  ; read data from an open file into a memory buffer
  ; ------------------------------------------------
    fread MACRO hFile,buffer,bcnt
      LOCAL var
      .data?
        var dd ?
      .code
      invoke ReadFile,hFile,buffer,bcnt,ADDR var,NULL
      mov eax, var
      EXITM <eax>       ;; return bytes read
    ENDM

 ; ------------------------------------------------
  ; define a enum type enumname a the type name
  ; ------------------------------------------------
    enum MACRO enumname,parmlist:VARARG
 LOCAL count
 count = 0
 enumname typedef byte
 FOR parm, <parmlist>
  parm = count
  count = count + 1
 ENDM
    ENDM

; ------------------------------------------------
  ; define C type
  ; ------------------------------------------------
    CTypeD MACRO
 integer typedef word
 unsigned typedef word
 ushort typedef word
 long typedef dword
 ulong typedef dword
 char typedef byte
    ENDM

    chr$ MACRO any_text:VARARG
        LOCAL txtname
        .data
          txtname db any_text,0
        .code
        EXITM <OFFSET txtname>
      ENDM

      len MACRO lpString
        invoke szLen,reparg(lpString)
        EXITM <eax>
      ENDM

      ustr$ MACRO DDvalue   ;; unsigned integer from string
        LOCAL rvstring
        .data
          rvstring db 20 dup (0)
        align 4
        .code
        ;; invoke dwtoa,DDvalue,ADDR rvstring
        invoke crt__ultoa,DDvalue,ADDR rvstring,10
        EXITM <OFFSET rvstring>
      ENDM

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;测试文件 文件名test.yny

{ Sample program
  In Tiny language –
  Computes factorial
}
read x{ input an integer }
if 0 < x then { don’t compute if x <= 0 }
 fact := 1;
 while 0 < x do
  x := x%10 ;
  fact := fact +1
  end
  write fact { output the D-bit of x }
 end
end

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值