TINY编译器的词法分析程序在scan.c文件中。
程序目的是扫描程序,输出每行每个token的type和相关信息。
1. 代码分析
代码主要涉及的类型是
<span style="font-family:Microsoft YaHei;font-size:14px;">typedef enum
/* book-keeping tokens */
{ENDFILE,ERROR,
/* reserved words */
IF,THEN,ELSE,END,REPEAT,UNTIL,READ,WRITE,
/* multicharacter tokens */
ID,NUM,
/* special symbols */
ASSIGN,EQ,LT,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,SEMI
} TokenType;</span>
程序的核心函数是:TokenType getToken(void),实现了词法分析的状态机,这个函数逐行扫描源程序,输出token type。
这个函数的辅助函数有:
static int getNextChar(void) :获取下一个字符,如果当前行已经到最后,返回下一行的第一个字符。
static void ungetNextChar(void): 回退一个字符,第一个字符不会调用这个函数。
static TokenType reservedLookup (char * s):从保留token中查找扫描得到的token是否是保留字。保留字如下:
<span style="font-family:Microsoft YaHei;font-size:14px;">static struct
{ char* str;
TokenType tok;
} reservedWords[MAXRESERVED]
= {{"if",IF},{"then",THEN},{"else",ELSE},{"end",END},
{"repeat",REPEAT},{"until",UNTIL},{"read",READ},
{"write",WRITE}};</span>
其他变量意义如下:
char tokenString[MAXTOKENLEN+1]; token字符串,最多支持40个字节
static char lineBuf[BUFLEN]; 读取每行源代码的buffer
static int linepos = 0; 每行扫描到的位置,和lineBuf配合使用
static int bufsize = 0; 表示lineBuf的长度
static int EOF_flag = FALSE; 源代码文件终止标记
最后附词法分析核心函数getToken的源代码:
<span style="font-family:Microsoft YaHei;font-size:14px;">/****************************************/
/* the primary function of the scanner */
/****************************************/
/* function getToken returns the
* next token in source file
*/
TokenType getToken(void)
{ /* index for storing into tokenString */
int tokenStringIndex = 0;
/* holds current token to be returned */
TokenType currentToken;
/* current state - always begins at START */
StateType state = START;
/* flag to indicate save to tokenString */
int save;
while (state != DONE)
{ int c = getNextChar();
save = TRUE;
switch (state)
{ case START:
if (isdigit(c))
state = INNUM;
else if (isalpha(c))
state = INID;
else if (c == ':')
state = INASSIGN;
else if ((c == ' ') || (c == '\t') || (c == '\n'))
save = FALSE;
else if (c == '{')
{ save = FALSE;
state = INCOMMENT;
}
else
{ state = DONE;
switch (c)
{ case EOF:
save = FALSE;
currentToken = ENDFILE;
break;
case '=':
currentToken = EQ;
break;
case '<':
currentToken = LT;
break;
case '+':
currentToken = PLUS;
break;
case '-':
currentToken = MINUS;
break;
case '*':
currentToken = TIMES;
break;
case '/':
currentToken = OVER;
break;
case '(':
currentToken = LPAREN;
break;
case ')':
currentToken = RPAREN;
break;
case ';':
currentToken = SEMI;
break;
default:
currentToken = ERROR;
break;
}
}
break;
case INCOMMENT:
save = FALSE;
if (c == EOF)
{ state = DONE;
currentToken = ENDFILE;
}
else if (c == '}') state = START;
break;
case INASSIGN:
state = DONE;
if (c == '=')
currentToken = ASSIGN;
else
{ /* backup in the input */
ungetNextChar();
save = FALSE;
currentToken = ERROR;
}
break;
case INNUM:
if (!isdigit(c))
{ /* backup in the input */
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = NUM;
}
break;
case INID:
if (!isalpha(c))
{ /* backup in the input */
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = ID;
}
break;
case DONE:
default: /* should never happen */
fprintf(listing,"Scanner Bug: state= %d\n",state);
state = DONE;
currentToken = ERROR;
break;
}
if ((save) && (tokenStringIndex <= MAXTOKENLEN))
tokenString[tokenStringIndex++] = (char) c;
if (state == DONE)
{ tokenString[tokenStringIndex] = '\0';
if (currentToken == ID)
currentToken = reservedLookup(tokenString);
}
}
if (TraceScan) {
fprintf(listing,"\t%d: ",lineno);
printToken(currentToken,tokenString);
}
return currentToken;
} /* end getToken */
</span>
2. 程序运行结果
打开程序中如下开关#define NO_PARSE TRUE
int EchoSource = TRUE;
int TraceScan = TRUE;
运行程序后,结果如下:
附下TINY源代码
{ Sample program
in TINY language -
computes factorial
}
read x; { input an integer }
if 0 < x then { don't compute if x <= 0 }
fact := 1;
repeat
fact := fact * x;
x := x - 1
until x = 0;
write fact { output factorial of x }
end