Java词法分析器实现篇

最新推荐文章于 2022-09-20 08:27:28 发布

bitsjx

最新推荐文章于 2022-09-20 08:27:28 发布

阅读量5.5k

点赞数 1

分类专栏： C/C++/Java/汇编文章标签： java path null interface include import

本文链接：https://blog.csdn.net/bitsjx/article/details/5665082

版权

C/C++/Java/汇编专栏收录该内容

12 篇文章 0 订阅

订阅专栏

#include <stdio.h> #include <stdlib.h> #include <string.h> //缓冲区大小 #define MAX_PATH 128 #define BUF_SIZE 128 #define HALF_BUF_SIZE 64 #define MAX_LENGTH 128 //输入输出文件指针 FILE *inFile, *outFile; //输入输出文件路径 char inputPath[MAX_PATH]={'/0'}; char outputPath[MAX_PATH]={'/0'}; //输入缓冲区 char inputBuf[BUF_SIZE]={'/0'}; //扫描到的单词 char scanWord[MAX_LENGTH]={'/0'}; //属性值 char wordValue[6]={'/0'}; //属性类别 char wordType[64]={'/0'}; //输入缓冲区编号 int numberOfBuf=0; //每行单词数 int numberOfWords=0; //总单词数 int numberOfAllWords=0; //行号 int lineNumber=1; //总行数 int numberOfLines=1; //缓冲区当前位置的字符 int scanCount=0; //单词的索引 int indexOfWord=0; //状态机状态 int state=0; //缓冲区读取标志 int bufReadFlag=0; // ':' 类型标志，0为 '：'，1为 '？：' 标识 int colonTypeFlag=0; //关键字，最长为12（synchronize） char keyWords[50][13]={ "abstract","boolean","break","byte","case","catch","char","class", "const","continue","default","do","double","else","extends","false", "final","finally","float","for""goto","if","implements","import", "instanceof","int","interface","long","native","new","null","package", "private","protected","public","return","short","static","super","switch", "synchronized","this","throw","throws","transient","true","try","void", "volatile","while" }; //读取文件数据到输入缓冲区 //两个半缓冲区，每次读入半个缓冲区大小 void Scanner() { char inChar; int index=0; //读取缓冲区前半区 if (numberOfBuf==0) { index=0; while (index<HALF_BUF_SIZE) { inChar=fgetc(inFile); inputBuf[index]=inChar; index++; } scanCount=0; bufReadFlag=1; } //读取缓冲区后半区 if (numberOfBuf==1) { index=0; while (index<HALF_BUF_SIZE) { inChar=getc(inFile); inputBuf[index+HALF_BUF_SIZE]=inChar; index++; } scanCount=HALF_BUF_SIZE; bufReadFlag=0; } } //从缓冲区内读取单个字符 char ReadChar() { char inChar; //如果已经读完缓冲区的前/后半区 //只有在缓冲区的数据没有被读取的前提下才可以读取 if (scanCount==0&&bufReadFlag==0) { numberOfBuf=0; Scanner(); } if (scanCount==HALF_BUF_SIZE&&bufReadFlag==1) { numberOfBuf=1; Scanner(); } //读取缓冲区数据 inChar=inputBuf[scanCount]; scanCount=(scanCount+1)%BUF_SIZE; //记录读取的字符 //终态的时候，scanWord记录的字符串就是读取的单词 scanWord[indexOfWord]=inChar; indexOfWord=(indexOfWord+1)%MAX_LENGTH; return inChar; } //回退读取一个字符 void UntRead() { scanCount--; indexOfWord--; if (scanCount<0) { scanCount=BUF_SIZE-1; } if (indexOfWord<0) { indexOfWord=0; } scanWord[indexOfWord]='/0'; } //scanWord重新赋值 void ReInit() { memset(scanWord,'/0',sizeof(scanWord)); indexOfWord=0; } //判断 '?' 之后 ':' 是否存在 //只适用于非嵌套的情况 bool FindColon() { int index=0; for (index=0;index<HALF_BUF_SIZE;index++) { if (inputBuf[index+scanCount]==':') { colonTypeFlag=1; return true; } } return false; } //输出属性字流 void outputWordStream(int index) { //如果遇到了终结状态，将结果输出 switch(index) { case 1: strcpy(wordValue,"0x100"); strcpy(wordType,"错误的单词"); break; case 2: strcpy(scanWord,"注释部分"); strcpy(wordValue,"0x101"); strcpy(wordType,"注释"); break; case 4: strcpy(wordValue,"0x103"); strcpy(wordType,"关键字"); break; case 5: strcpy(wordValue,"0x104"); strcpy(wordType,"标识符"); break; case 6: strcpy(wordValue,"0x105"); strcpy(wordType,"布尔型"); break; case 7: strcpy(wordValue,"0x106"); strcpy(wordType,"字符型"); break; case 8: strcpy(wordValue,"0x107"); strcpy(wordType,"整型"); break; case 9: strcpy(wordValue,"0x108"); strcpy(wordType,"浮点型"); break; case 10: strcpy(wordValue,"0x109"); strcpy(wordType,"字符串"); break; case 11: strcpy(wordValue,"0x110"); strcpy(wordType,"=、+=、-=、*=、/=、%=、&=、^=、|=、>>=、<<=、>>>="); break; case 12: strcpy(scanWord,"?:"); strcpy(wordValue,"0x111"); strcpy(wordType,"?:"); break; case 13: strcpy(wordValue,"0x112"); strcpy(wordType,"||"); break; case 14: strcpy(wordValue,"0x113"); strcpy(wordType,"&&"); break; case 15: strcpy(wordValue,"0x114"); strcpy(wordType,"|"); break; case 16: strcpy(wordValue,"0x115"); strcpy(wordType,"^"); break; case 17: strcpy(wordValue,"0x116"); strcpy(wordType,"&"); break; case 18: strcpy(wordValue,"0x117"); strcpy(wordType,"==、!="); break; case 19: strcpy(wordValue,"0x118"); strcpy(wordType,"<、>、<=、>="); break; case 20: strcpy(wordValue,"0x119"); strcpy(wordType,"<<、>>、>>>"); break; case 21: strcpy(wordValue,"0x11a"); strcpy(wordType,"+、-"); break; case 22: strcpy(wordValue,"0x11b"); strcpy(wordType,"*、/、%"); break; case 23: strcpy(wordValue,"0x11c"); strcpy(wordType,"++、--、+(正)、–(负)、!、~"); break; case 24: strcpy(wordValue,"0x11d"); strcpy(wordType,"[]、()、."); break; case 25: strcpy(wordValue,"0x120"); strcpy(wordType,","); break; case 26: strcpy(wordValue,"0x121"); strcpy(wordType,"{}"); break; case 27: strcpy(wordValue,"0x122"); strcpy(wordType,";"); break; } numberOfWords++; numberOfAllWords++; //将属性字流输出到文件scanner_output.txt fprintf(outFile,"<%d.%d><%s/t/t%s>/t/t/t【%s】/n",lineNumber,numberOfWords,scanWord,wordValue,wordType); ReInit(); } //判断读取到的字符串是关键字、布尔常量、标识符 int JudgeStringType() { int index=0; int tempType=0; for (index=0;index<50;index++) { if (strcmp(scanWord,"true")==0||strcmp(scanWord,"false")==0||strcmp(scanWord,"TRUE")==0||strcmp(scanWord,"FALSE")==0) { return 6; } if (strcmp(scanWord,keyWords[index])==0) { return 4; } } return 5; } //判断是否是间隔符{}、[]/()/.、, int JudgeDelimeterType() { if (strcmp(scanWord,"{")==0||strcmp(scanWord,"}")==0) { return 26; } else if (strcmp(scanWord,"[")==0||strcmp(scanWord,"]")==0||strcmp(scanWord,"(")==0||strcmp(scanWord,")")==0||strcmp(scanWord,".")==0) { return 24; } else if (strcmp(scanWord,",")==0) { return 25; } else { return 27; } } //有限状态自动机 void DFA() { int tempType=0; state=0; char scanCode='/0'; while(scanCode!=EOF) { //每次读取一个字符，然后在DFA内跳转 scanCode=ReadChar(); switch(state) { case 0: switch(scanCode) { case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '$': case '_': state=1; break; case '0': state=3; break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': state=20; break; case '/'': state=23; break; case '/"': state=26; break; case '{': case '}': case '[': case ']': case ',': case '(': case ')': case ';': state=40; break; case '.': state=41; break; case '/': state=43; break; case '+': state=51; break; case '-': state=55; break; case '*': state=59; break; case '%': state=62; break; case '?': state=65; break; case '=': state=66; break; case '~': state=69; break; case '>': state=68; break; case '^': state=79; break; case '&': state=82; break; case '|': state=86; break; case '!': state=90; break; case '<': state=93; break; case '/n': state=102; break; case ' ': case '/t': case '/r': case '/f': case '/b': case ':': case '@': case '#': state=103; break; default: state=100; } break; //关键字、布尔常量、标识符 case 1: if((scanCode>='a'&&scanCode<='z')||(scanCode>='A'&&scanCode<='Z')||(scanCode>='0'&&scanCode<='9')||(scanCode=='$')||(scanCode=='_')) { state=1; } else { UntRead(); state=2; } break; case 2: UntRead(); tempType=JudgeStringType(); outputWordStream(tempType); state=0; break; //常量 case 3: if (scanCode=='X'||scanCode=='x') { state=4; } else if (scanCode>='0'&&scanCode<='7') { state=7; } else if (scanCode=='.') { state=10; } else { UntRead(); state=9; } break; case 4: if ((scanCode>='0'&&scanCode<='9')||(scanCode>='a'&&scanCode<='f')||(scanCode>='A'&&scanCode<='Z')) { state=5; } else { UntRead(); state=100; } break; case 5: if((scanCode>='0'&&scanCode<='9')||(scanCode>='a'&&scanCode<='f')||(scanCode>='A'&&scanCode<='Z')) { state=5; } else { UntRead(); state=6; } break; case 6: UntRead(); outputWordStream(8); state=0; break; case 7: if(scanCode>='0'&&scanCode<='7') { state=7; } else { UntRead(); state=8; } break; case 8: UntRead(); outputWordStream(8); state=0; break; case 9: UntRead(); outputWordStream(8); state=0; break; case 10: if (scanCode>='0'&&scanCode<='9') { state=11; } else { UntRead(); state=100; } break; case 11: if (scanCode>='0'&&scanCode<='9') { state=11; } else if (scanCode=='F'||scanCode=='f') { state=12; } else if (scanCode=='E'||scanCode=='e') { state=14; } else { UntRead(); state=13; } break; case 12: UntRead(); outputWordStream(9); state=0; break; case 13: UntRead(); outputWordStream(9); state=0; break; case 14: if (scanCode>='0'&&scanCode<='9') { state=15; } else { UntRead(); state=100; } break; case 15: if (scanCode>='0'&&scanCode<='9') { state=15; } else if (scanCode=='.') { state=17; } else { UntRead(); state=16; } break; case 16: UntRead(); outputWordStream(9); state=0; break; case 17: if (scanCode>='0'&&scanCode<='9') { state=18; } else { UntRead(); state=100; } break; case 18: if (scanCode>='0'&&scanCode<='9') { state=18; } else { UntRead(); state=19; } break; case 19: UntRead(); outputWordStream(9); state=0; break; case 20: if (scanCode>='0'&&scanCode<='9') { state=20; } else if (scanCode=='L'||scanCode=='l') { state=22; } else if (scanCode=='.') { state=10; } else { UntRead(); state=21; } break; case 21: UntRead(); outputWordStream(8); state=0; break; case 22: UntRead(); outputWordStream(8); state=0; break; //字符型 case 23: if (scanCode=='//') { state=28; } else { state=24; } break; case 24: if (scanCode=='/'') { state=25; } else { UntRead(); state=100; } break; case 25: UntRead(); outputWordStream(7); state=0; break; //字符串 case 26: if (scanCode=='/"') { state=27; } else if (scanCode=='//') { state=30; } else { state=26; } break; case 27: UntRead(); outputWordStream(10); state=0; break; case 28: if (scanCode=='/''||scanCode=='/"'||scanCode=='//'||scanCode=='t'||scanCode=='n'||scanCode=='r'||scanCode=='f'||scanCode=='b') { state=29; } else { UntRead(); state=100; } break; case 29: if (scanCode=='/'') { state=25; } else { UntRead(); state=100; } break; case 30: if (scanCode=='/"') { state=31; } else { state=26; } break; case 31: if (scanCode==';'||scanCode==')'||scanCode==',') { UntRead(); state=27; } else { state=26; } break; //间隔符 case 40: UntRead(); tempType=JudgeDelimeterType(); outputWordStream(tempType); state=0; break; // '.' 间隔符 case 41: if (scanCode>='0'&&scanCode<='9') { state=42; } else { UntRead(); state=40; } break; case 42: UntRead(); outputWordStream(24); state=0; break; case 43: if (scanCode=='=') { state=45; } else if (scanCode=='/') { // "//" 注释 state=46; } else if (scanCode=='*') { // "/**/" 注释 state=48; } else { UntRead(); state=44; } break; case 44: UntRead(); outputWordStream(22); state=0; break; case 45: UntRead(); outputWordStream(11); state=0; break; case 46: if(scanCode=='/n') { lineNumber++; numberOfLines++; state=47; } else { state=46; } break; case 47: UntRead(); outputWordStream(2); state=0; break; case 48: if (scanCode=='*') { state=49; } else if (scanCode=='/n') { lineNumber++; numberOfLines++; state=48; } else { state=48; } break; case 49: if (scanCode=='/') { state=50; } else if (scanCode=='/n') { lineNumber++; numberOfLines++; state=48; } else { state=48; } break; case 50: UntRead(); outputWordStream(2); state=0; break; //运算符 case 51: if (scanCode=='+') { state=53; } else if (scanCode=='=') { state=54; } else { UntRead(); state=52; } break; case 52: UntRead(); UntRead(); UntRead(); scanCode=ReadChar(); //判断 '+' 是正负号还是加减号 if (!(scanCode>='0'&&scanCode<='9')) { ReInit(); scanCode=ReadChar(); outputWordStream(23); } else { ReInit(); scanCode=ReadChar(); outputWordStream(21); } state=0; break; case 53: UntRead(); outputWordStream(23); state=0; break; case 54: UntRead(); outputWordStream(11); state=0; break; case 55: if (scanCode=='-') { state=57; } else if (scanCode=='=') { state=58; } else { UntRead(); state=56; } break; case 56: UntRead(); UntRead(); UntRead(); scanCode=ReadChar(); //判断 '-' 是正负号还是加减号 if (!(scanCode>='0'&&scanCode<='9')) { ReInit(); scanCode=ReadChar(); outputWordStream(23); } else { ReInit(); scanCode=ReadChar(); outputWordStream(21); } state=0; break; case 57: UntRead(); outputWordStream(23); state=0; break; case 58: UntRead(); outputWordStream(11); state=0; break; case 59: if (scanCode=='=') { state=61; } else { UntRead(); state=60; } break; case 60: UntRead(); outputWordStream(22); state=0; break; case 61: UntRead(); outputWordStream(11); state=0; break; case 62: if (scanCode=='=') { state=64; } else { UntRead(); state=63; } break; case 63: UntRead(); outputWordStream(22); state=0; break; case 64: UntRead(); outputWordStream(11); state=0; break; case 65: if (FindColon()==true) { state=99; } else { UntRead(); state=100; } break; case 66: if (scanCode=='=') { state=68; } else { UntRead(); state=67; } break; case 67: UntRead(); outputWordStream(11); state=0; break; case 68: UntRead(); outputWordStream(18); state=0; break; case 69: UntRead(); outputWordStream(23); state=0; break; case 70: if (scanCode=='=') { state=72; } else if (scanCode=='>') { state=73; } else { UntRead(); state=71; } break; case 71: UntRead(); outputWordStream(19); state=0; break; case 72: UntRead(); outputWordStream(19); state=0; break; case 73: if (scanCode=='=') { state=75; } else if (scanCode=='>') { state=76; } else { UntRead(); state=74; } break; case 74: UntRead(); outputWordStream(20); state=0; break; case 75: UntRead(); outputWordStream(11); state=0; break; case 76: if (scanCode=='=') { state=78; } else { UntRead(); state=77; } break; case 77: UntRead(); outputWordStream(20); state=0; break; case 78: UntRead(); outputWordStream(11); state=0; break; case 79: if (scanCode=='=') { state=80; } else { UntRead(); state=81; } break; case 80: UntRead(); outputWordStream(16); state=0; break; case 81: UntRead(); outputWordStream(11); state=0; break; case 82: if (scanCode=='&') { state=84; } else if (scanCode=='=') { state=85; } else { UntRead(); state=83; } break; case 83: UntRead(); outputWordStream(17); state=0; break; case 84: UntRead(); outputWordStream(14); state=0; break; case 85: UntRead(); outputWordStream(11); state=0; break; case 86: if (scanCode=='|') { state=88; } else if (scanCode=='=') { state=89; } else { UntRead(); state=87; } break; case 87: UntRead(); outputWordStream(15); state=0; break; case 88: UntRead(); outputWordStream(13); state=0; break; case 89: UntRead(); outputWordStream(11); state=0; break; case 90: if (scanCode=='=') { state=92; } else { UntRead(); state=91; } break; case 91: UntRead(); outputWordStream(23); state=0; break; case 92: UntRead(); outputWordStream(18); state=0; break; case 93: if (scanCode=='=') { state=95; } else if (scanCode=='<') { state=96; } else { UntRead(); state=94; } break; case 94: UntRead(); outputWordStream(19); state=0; break; case 95: UntRead(); outputWordStream(19); state=0; break; case 96: if (scanCode=='=') { state=97; } else { UntRead(); state=98; } break; case 97: UntRead(); outputWordStream(20); state=0; break; case 98: UntRead(); outputWordStream(11); state=0; break; case 99: UntRead(); outputWordStream(12); state=0; break; case 102: UntRead(); printf("第 %d 行，单词数 %d./n",lineNumber,numberOfWords); lineNumber++; numberOfLines++; numberOfWords=0; ReInit(); state=0; break; case 103: UntRead(); ReInit(); state=0; break; case 100: UntRead(); outputWordStream(1); state=0; break; } } } //主函数 int main() { //输入文件路径 printf("输入文件路径:/n"); gets(inputPath); //输出文件路径 printf("输出文件路径:/n"); gets(outputPath); //打开输入文件 inFile=fopen(inputPath,"r"); if(NULL==inFile) { printf("Open inFile error!/n"); return 0; } //打开输出文件 outFile=fopen(outputPath,"w"); if(NULL==outFile) { printf("Open outFile error!/n"); return 0; } printf("/n词法分析开始:/n/n"); DFA(); printf("/n总行数：%d./n",numberOfLines-1); printf("总单词数：%d./n",numberOfAllWords); printf("词法分析结果输出到 scanner_output.txt 文件中./n"); printf("/n词法分析结束!/n"); //关闭文件指针 fclose(inFile); fclose(outFile); return 0; }

贴个图：