简易词法分析器: 输入一段程序,分离出每一个符号串,并标记其类型
package com.parting_soul;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* 词法分析器
*
* @author parting_soul
*
*/
public class LexicalAnalyzer {
private String mText;
/**
* 当前指针的位置
*/
private int mCurrentPos;
private List<Constant<?>> constantList;
private List<Identify> identifyList;
private List<String> operatorLists;
private List<String> limitLists;
private List<String> keywordsLists;
private static Set<Character> operatorSet;
private static Set<Character> limitSet;
private static Set<String> keywordsSet;
private List<Result> lists = new ArrayList<>();
public String getText() {
return mText;
}
public void setText(String mText) {
this.mText = mText;
}
static {
operatorSet = new HashSet<>();
operatorSet.add('+');
operatorSet.add('-');
operatorSet.add('*');
operatorSet.add('/');
operatorSet.add('=');
limitSet = new HashSet<>();
limitSet.add(',');
limitSet.add(';');
limitSet.add('(');
limitSet.add(')');
limitSet.add('{');
limitSet.add('}');
keywordsSet = new HashSet<>();
keywordsSet.add("int");
keywordsSet.add("main");
keywordsSet.add("boolean");
keywordsSet.add("double");
keywordsSet.add("float");
keywordsSet.add("public");
keywordsSet.add("return");
keywordsSet.add("private");
keywordsSet.add("static");
keywordsSet.add("String");
}
public LexicalAnalyzer() {
constantList = new ArrayList<>();
identifyList = new ArrayList<>();
operatorLists = new ArrayList<>();
limitLists = new ArrayList<>();
keywordsLists = new ArrayList<>();
}
public void init() {
constantList.clear();
identifyList.clear();
operatorLists.clear();
limitLists.clear();
keywordsLists.clear();
mCurrentPos = 0;
}
public static void main(String[] args) {
// Scanner scanner = new Scanner(System.in);
LexicalAnalyzer analyzer = new LexicalAnalyzer();
String str = intPut();
analyzer.setText(str);
// while (!(analyzer.mText = scanner.nextLine()).equals("-1")) {
analyzer.analyze();
// analyzer.print();
analyzer.printToken();
// analyzer.init();
// }
// scanner.close();
}
/**
* 从文件读入源程序
*
* @return
*/
private static String intPut() {
String result = "";
BufferedReader br = null;
try {
FileReader reader = new FileReader(new File("test.txt"));
br = new BufferedReader(reader);
String str = null;
while ((str = br.readLine()) != null) {
result += str;
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (br != null) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
br = null;
}
}
}
return result;
}
private void print() {
System.out.print("常量:");
for (Constant<?> constant : constantList) {
System.out.print(constant + " ");
}
System.out.println();
System.out.print("标识符:");
for (Identify identify : identifyList) {
System.out.print(identify + " ");
}
System.out.println();
System.out.print("关键字:");
for (String s : keywordsLists) {
System.out.print(s + " ");
}
System.out.println();
System.out.print("界限符:");
for (String s : limitLists) {
System.out.print(s + " ");
}
System.out.println();
System.out.print("操作符:");
for (String s : operatorLists) {
System.out.print(s + " ");
}
}
public void printToken() {
System.out.println(mText);
for (Result res : lists) {
System.out.println(res);
}
}
/**
* 分析输入的源程序
*/
public void analyze() {
char currentC;
while (mCurrentPos + 1 <= mText.length()) {
currentC = mText.charAt(mCurrentPos);
Result res = new Result();
if (isSpace(currentC)) {
// mCurrentPos++;
} else if (isLetter(currentC)) {
String identify = getIdentifier();
if (isKeyWords(identify)) {
keywordsLists.add(identify);
res.descr = "关键字";
res.value = identify;
// System.out.print("关键字: " + identify + " ");
} else {
identifyList.add(new Identify(identify));
// System.out.print("标识符: " + identify + " ");
res.descr = "标志符";
res.value = identify;
}
lists.add(res);
} else if (isDigit(currentC)) {
Constant<?> con = getConstant();
constantList.add(con);
// System.out.print("常量: " + con.value + " ");
res.descr = "常量";
res.value = con.value + "";
lists.add(res);
} else if (isLimit(currentC)) {
// mCurrentPos++;
limitLists.add(currentC + "");
res.descr = "界限符";
res.value = currentC + "";
lists.add(res);
// System.out.print("界限符: " + currentC + " ");
} else if (isOperator(currentC)) {
// mCurrentPos++;
operatorLists.add(currentC + "");
// System.out.print("操作符: " + currentC + " ");
res.descr = "操作符";
res.value = currentC + "";
lists.add(res);
}
mCurrentPos++;
// System.out.println("analyze mcurrentPos " + mCurrentPos);
}
}
private boolean isKeyWords(String identify) {
return keywordsSet.contains(identify);
}
/**
* 是否为数字字符
*
* @param c
* @return
*/
public boolean isDigit(char c) {
if (c >= '0' && c <= '9') {
return true;
}
return false;
}
/**
* 判断是否为字母
*
* @param c
* @return
*/
public boolean isLetter(char c) {
if (c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z') {
return true;
}
return false;
}
/**
* 是否为空格
*
* @param c
* @return
*/
public boolean isSpace(char c) {
if (c == ' ' || c == '\t') {
return true;
}
return false;
}
/**
* 得到标志符
*
* @return
*/
public String getIdentifier() {
String result = "";
char currentC = mText.charAt(mCurrentPos);
// System.out.println(currentC);
if (isLetter(currentC) || currentC == '_') {
result += currentC;
mCurrentPos++;
currentC = mText.charAt(mCurrentPos);
while (isDigit(currentC) || isLetter(currentC)) {
result += currentC;
mCurrentPos++;
if (mCurrentPos + 1 > mText.length())
break;
currentC = mText.charAt(mCurrentPos);
}
// 跳出循环有两种情况,一种匹配完所有的串,还有一个为遇到非数字,字母
if (!isDigit(currentC) && !isLetter(currentC)) {
mCurrentPos--;
}
}
// System.out.println("getIdentifier mCurrentPos " + mCurrentPos);
return result;
}
/**
* 得到整数
*
* @return
*/
public Constant<Integer> getIntConstant() {
Constant<Integer> result = null;
char currentC = mText.charAt(mCurrentPos);
if (isDigit(currentC)) {
result = new Constant<>();
result.value = charToInt(currentC);
result.len++;
mCurrentPos++;
if (mCurrentPos + 1 < mText.length()) {
currentC = mText.charAt(mCurrentPos);
while (isDigit(currentC)) {
result.value = result.value * 10 + charToInt(currentC);
result.len++;
mCurrentPos++;
if (mCurrentPos + 1 > mText.length())
break;
currentC = mText.charAt(mCurrentPos);
}
if (!isDigit(currentC)) {
mCurrentPos--;
}
}
}
// System.out.println("getIntConstant mCurrentPos " + mCurrentPos);
return result;
}
/**
* 得到常量
*
* @return
*/
public Constant<?> getConstant() {
Constant<?> result = null;
Constant<Integer> result1 = getIntConstant();
boolean isHavePoint = false;
if (mCurrentPos + 1 < mText.length()) {
mCurrentPos++;
char currentC = mText.charAt(mCurrentPos);
if (currentC == '.') {
mCurrentPos++;
if (mCurrentPos + 1 < mText.length()) {
currentC = mText.charAt(mCurrentPos);
Constant<Integer> result2 = null;
if (isDigit(currentC)) {
result2 = getIntConstant();
if (result2 != null) {
double y = result2.value;
for (int i = 0; i < result2.len; i++) {
y /= 10.0;
}
Constant<Double> result3 = new Constant<Double>();
result3.value = (double) result1.value.intValue() + y;
result3.len = result1.len + result2.len;
result = result3;
isHavePoint = true;
}
}
}
} else {
// 不是小数点,回退一个字符
mCurrentPos--;
}
}
if (!isHavePoint) {
result = result1;
}
return result;
}
/**
* 字符转化为int型
*
* @param c
* @return
*/
public int charToInt(char c) {
return c - '0';
}
/**
* 是否为操作符
*
* @param c
* @return
*/
public boolean isOperator(char c) {
return operatorSet.contains(c);
}
/**
* 是否为界限符
*
* @param c
* @return
*/
public boolean isLimit(char c) {
return limitSet.contains(c);
}
class Constant<T> {
T value;
int len;
public Constant() {
len = 0;
}
@Override
public String toString() {
return "Constant [value=" + value + ", len=" + len + "]";
}
}
class Identify {
String name;
String id;
public Identify(String identifier) {
this.name = identifier;
}
@Override
public String toString() {
return "Identify [name=" + name + ", id=" + id + "]";
}
}
class Result {
String descr;
String value;
@Override
public String toString() {
return "Result [descr=" + descr + ", value=" + value + "]";
}
}
}