这是笔者按照课本中的思路实现的词法分析程序,具体细节和意义请参考
《编译技术》张莉等著.–北京:高等教育出版社,2016.9.ISBN: 978-7-04-046317-0
注:本程序使用文件读入的方法进行字符的读取,每次识别一个词,返回值一个词的类型,类型码在注释中给出。
代码如下:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
#define BEGINSY 1 //begin
#define ENDSY 2 //end
#define IFSY 3 //if
#define THENSY 4 //then
#define ELSE 5 //else
#define IDSY 20 //标识符
#define INTSY 21 //整常数
#define PLUSSY 22 //+
#define MINUSSY 23 //-
#define STARSY 24 /
#define DIVISY 25 // /
#define LPARSY 26 //(
#define RPARSY 27 //)
#define COMMASY 28 //,
#define SEMISY 29 //;
#define COLONSY 30 //:
#define ASSIGNSY 31 //:=
#define EQUSY 32 //=
*/
char CHAR ;
char TOKEN[50] ;
enum SYMBOL{BEGINSY = 1 , ENDSY = 2, IFSY = 3, THENSY = 4, ELSE = 5, IDSY = 20,
INTSY = 21, PLUSSY = 22, MINUSSY = 23, STARSY = 24, DIVISY = 25,
LPARSY = 26, RPARSY = 27, COMMASY = 28, SEMISY = 29, COLONSY = 30,
ASSIGNSY = 31, EQUSY = 32} SYMBOL;
int NUM ;
int topow(int x){
int i ;
int result = 1 ;
for(i = 0 ; i < x ; i++){
result *= 10 ;
}
return result ;
}
void clearToken(){
int i = 0 ;
while(i < 50){
TOKEN[i] = '\0' ;
i++ ;
}
}
void retract(FILE* f){
if(!feof(f)){
fseek(f, -1, SEEK_CUR) ;
}
}
void catToken(){
int i = 0 ;
while(TOKEN[i] != '\0'){
i++ ;
}
TOKEN[i] = CHAR ;
}
int transNum(){
int num = 0 ;
int i = 0 ;
int result = 0 ;
while(TOKEN[i] != '\0'){
i++ ;
}
num = i - 1 ;
i-- ;
while(i >= 0){
result += (TOKEN[i] - '0') * topow(num - i) ;
i-- ;
}
return result ;
}
int compare(char des[], char token[]){//字符串比较,区分大小写
int i ;
if(strlen(des) != strlen(token)){
return 0 ;
}
for(i = 0 ; token[i] != '\0' ; i++){
if(des[i] != token[i]){
return 0 ;
}
}
return 1 ;
}
int reserver(){
char a[] = {'B','E','G','I','N','\0'} ;
char b[] = {'E','N','D','\0'} ;
char c[] = {'I','F','\0'} ;
char d[] = {'T','H','E','N','\0'} ;
char e[] = {'E','L','S','E','\0'} ;
if(compare(a, TOKEN) == 1){
return BEGINSY ;
}
else if(compare(b, TOKEN) == 1){
return ENDSY ;
}
else if(compare(c, TOKEN) == 1){
return IFSY ;
}
else if(compare(d, TOKEN) == 1){
return THENSY ;
}
else if(compare(e, TOKEN) == 1){
return ELSE ;
}
else{
return 0 ;
}
}
int isSpace(){
return CHAR == ' ' ? 1 : 0 ;
}
int isNewline(){
return CHAR == '\n' ? 1 : 0 ;
}
int isTab(){
return CHAR == '\t' ? 1 : 0 ;
}
int isLetter(){
return (CHAR >= 'a' && CHAR <= 'z') ||
(CHAR >= 'A' && CHAR <= 'Z') ? 1 : 0 ;
}
int isDigit(){
return CHAR <= '9' && CHAR >= '0' ? 1 : 0 ;
}
int isColon(){
return CHAR == ':' ? 1 : 0 ;
}
int isPlus(){
return CHAR == '+' ? 1 : 0 ;
}
int isMinus(){
return CHAR == '-' ? 1 : 0 ;
}
int isStar(){
return CHAR == '*' ? 1 : 0 ;
}
int isLpar(){
return CHAR == '(' ? 1 : 0 ;
}
int isRpar(){
return CHAR == ')' ? 1 : 0 ;
}
int isComma(){
return CHAR == ',' ? 1 : 0 ;
}
int isSemi(){
return CHAR == ';' ? 1 : 0 ;
}
int isDivi(){
return CHAR == '/' ? 1 : 0 ;
}
int isEqu(){
return CHAR == '=' ? 1 : 0 ;
}
void error(){
printf("NOT MATCHED!\n") ;
}
int getsym(FILE* f){
clearToken() ;
while(isSpace() || isNewline() || isTab()){
CHAR = fgetc(f) ;
if(CHAR == EOF){
break ;
}
}
if(isLetter()){
while(isLetter() || isDigit()){
catToken() ;
CHAR = fgetc(f) ;
}
retract(f) ;
int resultValue = reserver() ;
if(resultValue == 0){
SYMBOL = IDSY ;
}
else{
SYMBOL = resultValue ;
}
}
else if(isDigit()){
while(isDigit()){
catToken() ;
CHAR = fgetc(f) ;
}
retract(f) ;
NUM = transNum() ;
SYMBOL = INTSY ;
}
else if(isColon()){
CHAR = fgetc(f) ;
if(isEqu()){
SYMBOL = ASSIGNSY ;
}
else{
retract(f) ;
}
SYMBOL = COLONSY ;
}
else if(isPlus()){
SYMBOL = PLUSSY ;
}
else if(isMinus()){
SYMBOL = MINUSSY ;
}
else if(isStar()){
SYMBOL = STARSY ;
}
else if(isLpar()){
SYMBOL = LPARSY ;
}
else if(isRpar()){
SYMBOL = RPARSY ;
}
else if(isComma()){
SYMBOL = COMMASY ;
}
else if(isSemi()){
SYMBOL = SEMISY ;
}
else if(isDivi()){
CHAR = getchar() ;
if(isStar()){
do{
do{
CHAR = fgetc(f) ;
}while(!isStar()) ;
do{
CHAR = fgetc(f) ;
if(isDivi()){
return 0 ;
}
}while(isStar()) ;
}while(!isStar()) ;
}
else{
retract(f) ;
SYMBOL = DIVISY ;
}
}
else{
error() ;
return 0 ;
}
return SYMBOL ;
}
int main()
{
FILE* f = fopen("code.txt", "r") ;
if(!f){
printf("No such file !\n") ;
return 0 ;
}
while(!feof(f)){
CHAR = fgetc(f) ;
if(feof(f)){
break ;
}
printf("%d\n", getsym(f)) ;
}
fclose(f) ;
return 0;
}
对于样例:
code.txt
BEGIN begin 2333 hhhh + - __
测试结果如下: