词法分析
词法规则
< identifier > ::= < letter >|< identifier >< letter >|< identifier >< digit > < number > ::= < digit >|< number >< digit > < letter > ::= a|b|…|z|A|B|…|Z < digit > ::= 1|2|…|9|0 < singleword > ::= +|-|*|/|=|(|)|{|}|:|,|;|<|>|! < doubleword > ::= >=|<=|!=|==|&&| || < comment_first > ::= /* < comment_last > ::= */ 保留字表:”if”, “else”, “for”, “while”, “do”, “int”, “read”, “write”, “call”, “function”
源程序
Lexer.h
#include "stdio.h"
#include "ctype.h"
#include "stdlib.h"
#include "string.h"
#define keywordSum 10
char *keyword[keywordSum] = { "call" , "do" , "else" , "for" , "function" , "if" , "int" , "read" , "while" , "write" };
char singleword[50 ] = "+-*(){};,:\"\'" ;
char doubleword[10 ] = "><=!" ;
extern char Scanin[300 ] = "F:\\Lexer\\in.txt" , Scanout[300 ] = "F:\\Lexer\\out.txt" ;
extern FILE *fin, *fout;
int midsearch(char **word_str, char word_temp[]);
int TESTscan(){
char ch, token[40 ];
int es = 0 , j, n;
if ( ( fin = fopen( Scanin, "r" ) ) == NULL ){
printf ("\n打开词法分析输入文件出错!\n" );
return (1 );
}
if ( ( fout = fopen( Scanout, "w" ) ) == NULL ){
printf ("\n创建词法分析输出文件出错!\n" );
return (2 );
}
ch = getc(fin);
while ( ch != EOF ){
while ( ch == ' ' || ch == '\n' || ch == '\t' ) ch = getc(fin);
if ( isalpha (ch) ){
token[0 ] = ch;
j = 1 ;
ch = getc(fin);
while ( isalnum (ch) ){
token[j++] = ch;
ch = getc(fin);
}
token[j] = '\0' ;
n = midsearch( keyword, token );
if ( n == 1 )
fprintf ( fout, "%s\t%s\n" , "ID" , token );
if ( n == 0 )
fprintf ( fout, "%s\n" , token);
}else if ( isdigit (ch) ){
token[0 ] = ch;
j = 1 ;
ch = getc(fin);
while ( isdigit (ch) ){
token[j++] = ch;
ch = getc(fin);
}
token[j] = '\0' ;
fprintf ( fout, "%s\t%s\n" , "NUM" , token );
}else if ( strchr ( singleword, ch ) > 0 ){
token[0 ] = ch;
token[1 ] = '\0' ;
ch = getc(fin);
fprintf ( fout, "%s\t%s\n" , token, token );
}else if ( strchr ( doubleword, ch ) > 0 ){
token[0 ] = ch;
ch = getc(fin);
if ( ch == '=' ){
token[1 ] = ch;
token[2 ] = '\0' ;
ch = getc(fin);
}else
token[1 ] = '\0' ;
fprintf ( fout, "%s\t%s\n" , token, token );
}else if ( ch == '/' ){
ch = getc(fin);
if ( ch == '*' ){
char ch1;
ch1 = getc(fin);
do {
ch = ch1;
ch1 = getc(fin);
}while ( ( ch != '*' || ch1 != '/' ) && ch1 != EOF );
ch = getc(fin);
}else {
token[0 ] = '/' ;
token[1 ] = '\0' ;
fprintf ( fout, "%s\t%s\n" , token, token );
}
}else if ( ch == '&' ){
token[0 ] = '&' ;
ch = getc(fin);
if ( ch == '&' ){
token[1 ] = '&' ;
token[2 ] = '\0' ;
fprintf ( fout, "%s\t%s\n" , token, token );
ch = getc(fin);
}else {
token[1 ] = '\0' ;
fprintf ( fout, "%s\t%s\n" , token, token );
}
}else if ( ch == '|' ){
token[0 ] = '|' ;
ch = getc(fin);
if ( ch == '|' ){
token[1 ] = '|' ;
token[2 ] = '\0' ;
fprintf ( fout, "%s\t%s\n" , token, token );
ch = getc(fin);
}else {
token[1 ] = '\0' ;
fprintf ( fout, "%s\t%s\n" , token, token );
}
}else {
token[0 ] = ch;
token[1 ] = '\0' ;
ch = getc(fin);
es = 3 ;
fprintf ( fout, "%s\t%s\n" , "ERROR" , token );
}
}
fclose(fin);
fclose(fout);
return (es);
}
int midsearch(char **word_str, char word_temp[]){
int low = 0 ;
int high = keywordSum;
int i = ( low + high ) /2 ;
while ( low <= high ){
if ( strcmp ( word_str[i], word_temp ) == 0 ){
return 0 ;
break ;
}else if ( strcmp (word_str[i], word_temp )>0 )
high = i - 1 ;
else
low = i + 1 ;
i = ( low + high )/2 ;
}
if ( low > high ) return 1 ;
}
main.cpp
extern int TESTscan();
//char Scanin[300 ] = "F:\\Lexer\\in.txt" , Scanout[300 ] = "F:\\Lexer\\out.txt" ;
FILE *fin , *fout ;
int main(){
int es = 0 ;
es = TESTscan();
if ( es > 0 ) printf ("词法分析出错,编译停止!,错误代码:%d \n" , es);
else printf ("词法分析成功!\n" );
return 0 ;
}
测试文件
in.txt
@#int main(){
int a = 1 ;
int b34 = 3 ;
if ( a == 1 && b != 0 || c >= 1 )
printf ("hello" );
write();
printf ("world!" );
}
out.txt
ERROR @
ERROR #
int
ID main
( (
) )
{ {
int
ID a
NUM 1
; ;
int
ID b34
NUM 3
; ;
if
( (
ID a
NUM 1
&& &&
ID b
!= !=
NUM 0
|| ||
ID c
>= >=
NUM 1
) )
ID printf
( (
" "
ID hello
" "
) )
; ;
write
( (
) )
; ;
/ /
/ /
ID there
ID is
ID nothing
ID printf
( (
" "
ID world
! !
" "
) )
; ;
} }