import java.util.ArrayList;
import java.util.List;
public class Lexer {
public static List<Token> lexResult;
public static int Index;
public static String currLine;
public static Character ReadChar() {
if (Index < currLine.length()) {
char c = currLine.charAt(Index);
return c;
} else {
return null;
}
}
public static List<Token> analyzeFromCode(String code) {
lexResult = new ArrayList();
String[] lines = code.split("\\n");
for(int i = 0; i < lines.length; ++i) {
currLine = lines[i].replace("\r", "");
analyzeFromLine();
}
lexResult.add(new Token(TokenType.EOF));
return lexResult;
}
public static void analyzeFromLine() {
while(ReadChar() != null) {
if (ReadChar() >= '0' && ReadChar() <= '9') {
extractNumberToken();
} else if (ReadChar() == '+') {
lexResult.add(new Token(TokenType.PLUS));
} else if (ReadChar() == '-') {
lexResult.add(new Token(TokenType.MINUS));
} else if (ReadChar() == '*') {
lexResult.add(new Token(TokenType.MUL));
} else if (ReadChar() == '/') {
lexResult.add(new Token(TokenType.DIV));
} else if (ReadChar() == '%') {
lexResult.add(new Token(TokenType.MODULAR));
} else if (ReadChar() == '&') {
lexResult.add(new Token(TokenType.AND));
} else if (ReadChar() == '|') {
lexResult.add(new Token(TokenType.OR));
} else if (ReadChar() == '>') {
if (ReadChar() == '=') {
lexResult.add(new Token(TokenType.COMP, ">="));
} else {
lexResult.add(new Token(TokenType.COMP, ">"));
}
} else if (ReadChar() == '<') {
if (ReadChar() == '=') {
lexResult.add(new Token(TokenType.COMP, "<="));
} else {
lexResult.add(new Token(TokenType.COMP, "<"));
}
} else if (ReadChar() == '!') {
if (ReadChar() == '=') {
lexResult.add(new Token(TokenType.COMP,"!="));
} else {
lexResult.add(new Token(TokenType.NOT));
}
} else if (ReadChar() == '(') {
lexResult.add(new Token(TokenType.LPAREN));
} else if (ReadChar() == ')') {
lexResult.add(new Token(TokenType.RPAREN));
} else if (ReadChar() == '{') {
lexResult.add(new Token(TokenType.LCB));
} else if (ReadChar() == '}') {
lexResult.add(new Token(TokenType.RCB));
} else if (ReadChar() == '=') {
if (ReadChar() != null && ReadChar() == '=') {
lexResult.add(new Token(TokenType.COMP));
} else {
lexResult.add(new Token(TokenType.ASSIGN));
}
} else if (ReadChar() == '#') {
lexResult.add(new Token(TokenType.DF));
} else {
StringBuilder buf;
String st;
buf = new StringBuilder();
buf.append(ReadChar());
st = buf.toString();
if (st.equals("int")) {
lexResult.add(new Token(TokenType.KW));
} else if (st.equals("float")) {
lexResult.add(new Token(TokenType.KW));
} else if (st.equals("string")) {
lexResult.add(new Token(TokenType.KW));
} else if (st.equals("bool")) {
lexResult.add(new Token(TokenType.KW));
} else if (!st.equals("true") && !st.equals("false")) {
if (st.equals("print")) {
lexResult.add(new Token(TokenType.PRINT));
} else if (st.equals("fun")) {
lexResult.add(new Token(TokenType.FUN));
} else if (st.equals("if")) {
lexResult.add(new Token(TokenType.IF));
} else if (st.equals("else")) {
lexResult.add(new Token(TokenType.ELSE));
} else if (st.equals("while")) {
lexResult.add(new Token(TokenType.WHILE));
} else if (st.equals("break")) {
lexResult.add(new Token(TokenType.BREAK));
} else if (st.equals("continue")) {
lexResult.add(new Token(TokenType.CONTINUE));
} else if (st.equals("return")) {
lexResult.add(new Token(TokenType.RETURN));
} else if (st.equals("back")) {
lexResult.add(new Token(TokenType.BACK));
} else if (st.equals("readInt")) {
lexResult.add(new Token(TokenType.READINT));
} else if (st.equals("readFloat")) {
lexResult.add(new Token(TokenType.READFLOAT));
} else if (st.equals("readString")) {
lexResult.add(new Token(TokenType.READSTRING));
} else if (st.equals("readBool")) {
lexResult.add(new Token(TokenType.READBOOL));
} else {
lexResult.add(new Token(TokenType.ID, st));
}
} else {
lexResult.add(new Token(TokenType.BOOLV, st));
}
}
}
}
public static void extractNumberToken() {
StringBuilder sb = new StringBuilder();
int dotCount = 0;
for(Character c = ReadChar(); c != null && (c >= '0' && c <= '9' || c == '.'); c = ReadChar()) {
if (c == '.') {
if (dotCount > 0) {
error("重复的小数点!");
} else {
dotCount = 1;
sb.append(c);
}
} else {
sb.append(c);
}
}
if (ReadChar() != null && (ReadChar() >= 'a' && ReadChar() <= 'z' || ReadChar() >= 'A' && ReadChar() <= 'Z')) {
error("非法的字符!");
}
if (dotCount == 0) {
lexResult.add(new Token(TokenType.INT, sb.toString()));
} else {
lexResult.add(new Token(TokenType.FLOAT, sb.toString()));
}
}
}
编译原理词法分析
最新推荐文章于 2024-10-09 11:13:12 发布