整体思路:
为了将文件读取和内容处理分开,将整个文件读取为字符串,然后对该字符串进行解析,各种保留字和符号单独为一个类别,其值为本身,标识符、整型数字,浮点型数字为个为一个类,其值为具体值。
先创建TOK类 ,设置词的类别,共32个类别。
package CMMLex;
/**
* Created by think on 2017/10/11.
*/
public class TOK {
//Tok类型
/* if */
public static final int IF = 0;
/* else */
public static final int ELSE = 1;
/* while */
public static final int WHILE = 2;
/* id */
public static final int READ = 3;
/* write */
public static final int WRITE = 4;
/* int */
public static final int INT = 5;
/* double */
public static final int DOUBLE = 6;
/* true */
public static final int TRUE = 7;
/* false */
public static final int FALSE = 8;
/* + */
public static final int PLUS = 9;
/* - */
public static final int MINUS = 10;
/* * */
public static final int MUL = 11;
/* / */
public static final int DIV = 12;
/* = */
public static final int ASSIGN = 13;
/* < */
public static final int LT = 14;
/* <= */
public static final int LQT = 15;
/* > */
public static final int GT = 16;
/* >= */
public static final int GQT= 17;
/* == */
public static final int EQ = 18;
/* != */
public static final int NEQ = 19;
/* ( */
public static final int LPARENT= 20;
/* ) */
public static final int RPARENT = 21;
/* ; */
public static final int SEMI = 22;
/* , */
public static final int COMMA = 23;
/* { */
public static final int LBRACE = 24;
/* } */
public static final int RBRACE = 25;
/* [ */
public static final int LBRACKET = 26;
/* ] */
public static final int RBRACKET = 27;
/* id */
public static final int ID = 28;
/* int型字面值 */
public static final int LITERAL_INT = 29;
/* real型字面值 */
public static final int LITERAL_DOUBLE = 30;
/* 文件结尾0*/
public static final int EOF=31;
//并存储tok类型的个数
public static final int TOK_NUM = 32;
//每个Tok类型对应的字符串类型
public static final String[] GET_STRS = { "IF","ELSE", "WHILE","READ","WRITE", "INT",
"DOUBLE","TRUE", "FALSE", "PLUS", "MINUS","MUL", "DIV", "ASSIGN", "LT","LQT",
"GT", "GQT", "EQ","NEQ", "LPARENT", "RPARENT", "SEMI", "COMMA","LBRACE",
"RBRACE", "LBRACKET","RBRACKET","ID", "LITERAL_INT","LITERAL_DOUBLE","EOF"};
//每个Tok类型对应的实际字符串
public static final String[] GET_LOCAL_STRS = { "if", "else", "while", "read","write", "int",
"double", "true", "false", "+","-", "*", "/", "=", "<", "<=", ">", ">=","==",
"!=", "(", ")", ";",",", "{", "}", "[", "]", "identifier", "integer", "double","end_of_file"};
//返回Tok类型字符串
public static String getTokTypeStr(int type) {
if (type < 0 || type > TOK_NUM)
return "undefine";
else
return GET_STRS[type];
}
//返回Tok类型实际字符串
public static String getTokTypeLocalStr(int type) {
if (type < 0 || type > TOK_NUM)
return "undefine";
else
return GET_LOCAL_STRS[type];
}
}
创建Token类,具有类别、值、行号、开始列号和结束列号等属性。并在其中声明所有的内置的32个Token类。
package CMMLex;
/**
* Created by think on 2017/10/12.
*/
public class Token {
/* if */
public static final Token IF = new Token(TOK.IF);
/* else */
public static final Token ELSE = new Token(TOK.ELSE);
/* while */