要实现词法分析和语法分析的程序,我们首先要定义出每一个出现在程序中的字母或者字符串的含义,比如int minNum = 1;这段代码我们应该让程序分析出之中分为int、minNumber、=、1、;这5部分,它们之中有关键字、操作符、分隔符、标识符和数字,那么我们也应该定义出我们程序中所包含的关键字,分隔符等等,比如我的定义如下:
class KeyValueList
{
public Dictionary<String, int> keyWordList = new Dictionary<String, int>
{
{ "char",101},{"int", 102 },{"float",103 },{"break",104},{ "const",105},
{"return",106},{"void",107 },{"contiue",108 },{ "do",109},{"while",110 },
{"if",111},{"else",112 },{"for",113},{"String",114}
};
public Dictionary<String, int> delimitersList = new Dictionary<String, int>
{
{"{",301},{"}",302},{ ";",303},{",",304}
};
public Dictionary<String,int> OperatorList = new Dictionary<String, int>
{
{ "(",201},{")",202},{"[",203 },{"]",204 },{"!",205},{"*",206},{"/",207},
{"%",208 },{"+",209 },{"-",210 },{"<",217 },{"<=",212},{ ">=",214},{"==",215},
{"!=",216},{"&&",221},{"||",220},{"=",211},{ ">",218},{"<>",219},{ "|",230},{"&",240}
};
}
那么我们已经定义了集合,那么接下来获取界面或者控制台输入的程序段进行分析,其实就像匹配正则表达式一样,一个一个字母匹配就可以,我的代码如下:
private void LexcialAnalysis(object parameter)
{
JudgeProgram judgeProgram = new JudgeProgram();
lexcialAnlysisResult = new Dictionary<CharCode, int>(); //int代表行数,点击代码显示对应分析结果高亮用
lexcialErrorResult = new Dictionary<CharCode, int>();
int lineCount = 1;
String analysisString = this.FileText;
String str = "";
if(analysisString == null)
{
return;
}
for (int i = 0; i < analysisString.Length; i++)
{
str = "";
char ch = analysisString[i];
if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') //转义字符
{
if (ch == '\n')
{
lineCount++;
}
}
else if (judgeProgram.IsLetter(ch)) //关键字或者标识符
{
while (judgeProgram.IsLetter(ch) || judgeProgram.IsDight(ch))
{
str += ch;
if (++i < analysisString.Length)
{
ch = analysisString[i];
}
else
break;
}
i--;
if (judgeProgram.IsKey(str)) //关键字
{
CharCode charCode = new CharCode(keyValueList.keyWordList[str], str);
lexcialAnlysisResult.Add(charCode, lineCount);
}
else
{ //标识符
CharCode charCode = new CharCode(700, str);
lexcialAnlysisResult.Add(charCode, lineCount);
}
}
else if (judgeProgram.IsDight(ch)) //整数或者浮点数
{
while (judgeProgram.IsDight(ch) ||
(ch == '.' && judgeProgram.IsDight(analysisString[i + 1])))
{
str += ch;
if (++i < analysisString.Length)
{
ch = analysisString[i];
}
else break;
}
i--;
if (str.Contains("."))
{
CharCode charCode = new CharCode(800, str);
lexcialAnlysisResult.Add(charCode, lineCount);
}
else
{
CharCode charCode = new CharCode(400, str);
lexcialAnlysisResult.Add(charCode, lineCount);
}
}
else if (ch == '\'') //字符
{
if (i + 1 < analysisString.Length)
{
if (analysisString[i + 1] == '\'')
{
CharCode charCode = new CharCode(500, str + ch + analysisString[i + 1]);
lexcialAnlysisResult.Add(charCode, lineCount);
i++;
}
else
{
CharCode charCode = new CharCode(000, str + "无匹配符号error(')");
lexcialErrorResult.Add(charCode, lineCount);
}
}
else if (i + 2 < analysisString.Length)
{
if (analysisString[i + 2] == '\'')
{
CharCode charCode = new CharCode(500, str + ch + analysisString[i + 1] + analysisString[i + 2]);
lexcialAnlysisResult.Add(charCode, lineCount);
i += 2;
}
else
{
CharCode charCode = new CharCode(000, str + "无匹配符号error(')");
lexcialErrorResult.Add(charCode, lineCount);
}
}
else
{
CharCode charCode = new CharCode(000, str + "无匹配符号error(')");
lexcialErrorResult.Add(charCode, lineCount);
}
}
else if (ch == '\"') //字符串
{
int stringLength = analysisString.Substring(i + 1, analysisString.Length - i - 1).IndexOf("\"");
if (stringLength == -1)
{
CharCode charCode = new CharCode(000, str + "无匹配符号error(\")");
lexcialErrorResult.Add(charCode, lineCount);
}
else
{
CharCode charCode = new CharCode(600, str + analysisString.Substring(i, stringLength + 2));
lexcialAnlysisResult.Add(charCode, lineCount);
i += stringLength + 1;
}
}
else if (keyValueList.OperatorList.ContainsKey(ch + ""))//运算符
{
if (i + 1 >= analysisString.Length)
{
CharCode charCode = new CharCode(keyValueList.OperatorList[ch + str], ch + str);
lexcialAnlysisResult.Add(charCode, lineCount);
}
else
{
if (keyValueList.OperatorList.ContainsKey(ch + ""+analysisString[i + 1]))
{
CharCode charCode = new CharCode(keyValueList.OperatorList[ch +""+analysisString[i + 1]], ch +""+analysisString[i + 1]);
lexcialAnlysisResult.Add(charCode, lineCount);
i++;
}
else
{
CharCode charCode = new CharCode(keyValueList.OperatorList[ch + ""], ch + "");
lexcialAnlysisResult.Add(charCode, lineCount);
}
}
}
else if(keyValueList.delimitersList.ContainsKey(ch + ""))
{
CharCode charCode = new CharCode(keyValueList.delimitersList[ch + str], ch + str);
lexcialAnlysisResult.Add(charCode, lineCount);
}
else
{
CharCode charCode = new CharCode(1000, "无法识别的符号:" + ch + str);
lexcialErrorResult.Add(charCode, lineCount);
}
}
PrintLexcialResult();
}
根据定义的符号集合不一样,对应的词法分析程序也会发生相应的改变,词法分析中应该处理一些异常情况,比如int a = 1a2b3c;这段代码应该报出错误信息,我的程序中只处理了字符和字符串的异常情况。
根据上面的程序,我们可以将对应的代码段进行分析:
void testProgram(int exam)
{
int a = 1, b = 2, c = 3;
int exam = exam * a * b * c;
int result;
if(result > 60)
{
result = 1;
}
else
{
result = 0;
}
}
可以得到如下类似的结果:
至此,词法分析的任务已经完成。
然后是语法分析,语法分析相较于词法分析需要考虑的东西比较多,异常处理相较于词法分析也更加复杂,我们首先要定义终结符和非终结符的集合,比如一个if-else程序段:
if(a||b){
if(c&&d){
exper = 3
}
}
else{
exper = 2
}
我们使用语法分析中的自上而下分析方法可以写出对应的文法,然后找出对应的终结符和非终结符并在程序中定义
public Dictionary<String, int> unEndCharSet = new Dictionary<String, int>
{
{ "E",0},{ "X",1},{"T",2},{"Y",3 },{ "F",4},{"A",5},{"B",6},{"C",7 },
{ "D",8},{"G",9}
};
public Dictionary<String, int> endCharSet = new Dictionary<String, int>
{
{ "i",0},{"j",1 },{"+",2},{"-",3 },{"*",4 },{"/",5 },{"!",6 },{"|",7},
{ "&",8},{ "r",9},{"(",10 },{")",11}
};
然后是分析得到对应的分析表并用一个二维数组存储起来:
public string[,] analyzeTable = new string[10, 12]
{
{"TX","TX","","","","","TX","","","","TX","" },
{"","","","","","","","|TX","","","","null" },
{"FY","FY","","","","","FY","","","","TX","" },
{"","","","","","","","null","&FY","","TX","" },
{"ArA","j","","","","","!F","","","","(E)","" },
{"BC","","","","","","","","","","","" },
{"GD","TX","","","","","TX","","","","TX","" },
{"","","+BC","-BC","","","","null","","null","","" },
{"","","null","null","*GD","/GD","","null","","null","","" },
{"i","","","","","","","","","","","" },
};
最后是根据词法分析的结果和这个分析表即可以得到词法分析的结果,比如
上面的if程序段
if(a||b){
if(c&&d){
exper = 3
}
}
else{
exper = 2
}
通过词法分析程序,得到如下结果:
词法分析分析出了各个部分所对应的是关键字、分隔符还是操作符等等,然后根据这个结果对应语法分析中对应的模式字符,即可以得到如图的结果:
这个语法分析+词法分析程序就至此完成了。