题目
编译原理作业:设计及实现c语言的词法分析程序。要求:单词以二元式形式输出、输出有词法错误的单词及所在行号
思路
先给大家看一下运行结果,太长了,我就放在同一行了
思路其实很简单,对文件里的文本一个一个字符地读取,分情况来判断字符串的第一个字母
1.当为字母时,循环读取,直到不为字母或数字,然后在保留字中判断是否 为保留字,若为保留字,则在保留字集合中找到对应的代码,输入到我们定义好的二元组
2.当为数字时,循环读取,直到不为数字时,输入到我们定义好的二元组,若循环遇到字母,则出错
3.判断是否为界符,在界符集中判断,找到对应代码,输入到我们定义好的二元组
4.判断是否为运算符,在运算符集中判断,同上(这里要注意,运算符有两个字符的也有一个字符的,所以要多加一些情况判断)
注意:其实我的代码有些不完善,标识符集和常数集本来应该把找到的标识符和常数放到里面,方便查找,结果我偷懒,没做,所以大家可以自己完善一下
代码
github地址:https://github.com/DiseaseBeast/Lexical-analyzer
用完点个star谢谢了!
主程序代码
public class Main {
public static void main(String []args) throws IOException {
Judge judge = new Judge();
judge.Start("G:\\eclipse\\LexicalAnalyzer\\program.txt");
}
}
二元组代码
public class Binary {
private int x;
private String yString;
public Binary(int x, String y) {
// TODO Auto-generated constructor stub
this.x = x;
yString = y;
}
public String toString() {
return "<"+x+","+yString+">";
}
}
实现代码
public class Judge {
private ArrayList<String> Reserve; //保留字集合
private ArrayList<String> Operator; //运算符集合
private ArrayList<String> Boundary; //界符集合
private HashMap<String, Integer> Symbol; //标识符集
private HashMap<String, Integer> Digits; //常数集
private ArrayList<Binary> binaries; //二元组集
private char character; //当前判断的字符
File filename;
InputStreamReader reader;
/**
* 从文件逐行读取赋值给集合
* @param filename
* @param list
*/
private void Filer(File filename, ArrayList<String> list){
try {
InputStreamReader reader = new InputStreamReader(new FileInputStream(filename));
BufferedReader br = new BufferedReader(reader);
String line = "";
line = br.readLine();
while(line != null) {
list.add(line);
line = br.readLine();
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
/**
* 读取文件
* @param pathname
*/
private void FileInit(String pathname) {
filename = new File(pathname);
try {
reader = new InputStreamReader(new FileInputStream(pathname));
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
/**
* 词法分析
*/
private void getChar(){
try {
Symbol = new HashMap<>();
Digits = new HashMap<>();
binaries = new ArrayList<>();
InputStreamReader reader = new InputStreamReader(new FileInputStream(filename));
BufferedReader br = new BufferedReader(reader);
int num = br.read();
character = (char)num;
String string = "";
while(num != -1) {
if(character != ' ')
{
if(isLetter(character)) {
while(isLetter(character) || isDigit(character)) {
string += character;
num = br.read();
character = (char)num;
}
if(isReserve(string)) {
binaries.add(new Binary(Reserve.indexOf(string)+1, string));
}
else
{
binaries.add(new Binary(Reserve.size() + Operator.size() + Boundary.size()+1, string));
}
string="";
continue;
}
if(isDigit(character)) {
while(isDigit(character)) {
string += character;
num = br.read();
character = (char)num;
}
if(isDigit(character))
binaries.add(new Binary(Reserve.size() + Operator.size() + Boundary.size()+2, string));
else {
string += character;
System.out.println(string+"有错误");
}
string="";
continue;
}
if(isOperator(""+character) || character == '!') {
string += character;
num = br.read();
character = (char)num;
if(isOperator(""+character)) {
string += character;
if(isOperator(string)) {
binaries.add(new Binary(Operator.indexOf(string)+1, string));
}
else {
string+=character;
System.out.println(string+"有错误");
}
}
else {
binaries.add(new Binary(Operator.indexOf(string)+1, string));
}
string = "";
continue;
}
if(isBoundary(character)) {
binaries.add(new Binary(Reserve.size() + Operator.size() + Boundary.indexOf(""+character)+1, ""+character));
}
}
num = br.read();
character = (char)num;
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
/**
* 判断是否是数字
* @param charact
* @return
*/
private boolean isDigit(char charact) {
if (charact >= '0'&&charact <= '9')
return true;
return false;
}
/**
* 判断是否是字母
* @param charact
* @return
*/
private boolean isLetter(char charact) {
if ((charact >= 'A'&&charact <= 'Z') || (charact >= 'a'&&charact <= 'z'))
return true;
return false;
}
/**
* 判断是否是界符
* @param charact
* @return
*/
private boolean isBoundary(char charact) {
String string = "";
string += charact;
if (Boundary.contains(string))
return true;
return false;
}
/**
* 判断是否是运算符
* @param string
* @return
*/
private boolean isOperator(String string) {
if (Operator.contains(string))
return true;
return false;
}
/**
* 判断是否是保留字
* @param string
* @return
*/
private boolean isReserve(String string) {
if (Reserve.contains(string))
return true;
return false;
}
/**
* 判断是否是已存在的标识符
* @param string
* @return
*/
private boolean isSymbol(String string) {
if (Symbol.containsKey(string))
return true;
return false;
}
/**
* 判断是否是已存在的常数
* @param string
* @return
*/
private boolean isDigits(String string) {
if (Digits.containsKey(string))
return true;
return false;
}
/**
* 初始化,为三个集合赋值
*/
private void init() {
Reserve = new ArrayList<String>();
File filename = new File("G:\\eclipse\\LexicalAnalyzer\\Reserve.txt");
Filer(filename, Reserve);
Operator = new ArrayList<String>();
filename = new File("G:\\eclipse\\LexicalAnalyzer\\Operator.txt");
Filer(filename, Operator);
Boundary = new ArrayList<String>();
filename = new File("G:\\eclipse\\LexicalAnalyzer\\Boundary.txt");
Filer(filename, Boundary);
}
/**
* 展示各个集合
*/
private void show() {
System.out.println("------保留字------");
for (String reserve : Reserve) {
System.out.println(reserve );
}
System.out.println();
System.out.println("------运算符------");
for (String operator : Operator) {
System.out.println(operator);
}
System.out.println();
System.out.println("------界符------");
for (String boundary : Boundary) {
System.out.println(boundary);
}
System.out.println();
}
private void prin() {
for (Binary binary : binaries) {
System.out.println(binary);
}
}
/**
* 开始词法分析
* @param pathname
* @throws IOException
*/
public void Start(String pathname) throws IOException {
init();
show();
FileInit(pathname);
getChar();
prin();
}
}