编译原理——实现简单json解析器

最新推荐文章于 2024-08-08 07:18:18 发布

不要清汤锅

最新推荐文章于 2024-08-08 07:18:18 发布

阅读量1.3k

点赞数

分类专栏：编译原理

本文链接：https://blog.csdn.net/hello_program_world/article/details/80633267

版权

本文详细介绍了编译原理在实现简单JSON解析器中的应用，包括词法分析阶段，如Tok和Token类的设计，词法错误识别；语法分析阶段，通过LR(1)文法构造LALR(1)分析表；文件读取和写入功能的实现；以及如何按路径获取JSON元素值的方法。

摘要由CSDN通过智能技术生成

一、词法分析阶段

1、创建Tok类，记录Token的类型以及将每个转换为相应的字符串值。

package Lex;

/**
 * Created by think on 2017/6/8.
 */
public class TOK {
    //Tok类型
    public static final int OBJB = 0;
    public static final int OBJE = 1;
    public static final int ARRB = 2;
    public static final int ARRE = 3;
    public static final int DESC = 4;
    public static final int SPLIT = 5;
    public static final int STR = 6;
    public static final int NUM = 7;
    public static final int TRUE = 8;
    public static final int FALSE = 9;

    public static final int NULL = 10;
    public static final int BGN = 11;
    public static final int EOF = 12;


     //并存储tok类型的个数
    public static final int TOK_NUM = 13;

   //每个Tok类型对应的字符串类型
    public static final String[] CAST_STRS = { "OBJB","OBJE", "ARRB","ARRE","DESC", "SPLIT",
            "STR", "NUM","TRUE", "FALSE",  "NULL","BGN","EOF" };


   //每个Tok类型对应的实际字符串
    public static final String[] CAST_LOCAL_STRS = { "{", "}", "[", "]",
            ":", ",", "s", "n", "true","false",  "null", "开始", "结束" };
    //返回Tok类型字符串
    public static String castTokTypeStr(int type) {
        if (type < 0 || type > TOK_NUM)
            return "undefine";
        else
            return CAST_STRS[type];
    }

    //返回Tok类型实际字符串
    public static String castTokTypeLocalStr(int type) {
        if (type < 0 || type > TOK_NUM)
            return "undefine";
        else
            return CAST_LOCAL_STRS[type];
    }
}

2、创建Token类，作为词法分析的结果，有type属性和value属性，其中只有Num类型和STR类型有真正的value值，其余的只有类型。

package Lex;

/**
 * Created by think on 2017/6/8.
 */
public class Token {

    public static final Token DESC = new Token(TOK.DESC);
    public static final Token SPLIT = new Token(TOK.SPLIT);
    public static final Token ARRB= new Token(TOK.ARRB);
    public static final Token OBJB = new Token(TOK.OBJB);
    public static final Token ARRE = new Token(TOK.ARRE);
    public static final Token OBJE = new Token(TOK.OBJE);
    public static final Token FALSE = new Token(TOK.FALSE);
    public static final Token TRUE = new Token(TOK.TRUE);
    public static final Token NULL = new Token(TOK.NULL);
    public static final Token BGN = new Token(TOK.BGN);
    public static final Token EOF = new Token(TOK.EOF);

    // 从TOK类中定义的类型
    private Integer type;
    // 该tok的值
    private String value;
    //行号
    private int lineNum;
    //开始列号
    private int beginCol;
    //结束列号
    private int endCol;

    public Token(int type) {
        this.type = type;
        this.value = null;
    }

    public Token(int type, String value) {
        this.type = type;
        this.value = value;
    }

    public int getType() {
        return type;
    }
    public void setType(int type) {
        this.type = type;
    }
    public String getValue() {
        return value;
    }
    public void setValue(String value) {
        this.value = value;
    }
    public int getLineNum(){
        return lineNum;
    }
    public void setLineNum(int lineNum) {
        this.lineNum = lineNum;
    }
    public int getBeginCol() {
        return beginCol;
    }
    public int getEndCol() {
        return endCol;
    }
    public void setEndCol(int endCol){
        this.endCol=endCol;
    }
    public void setBeginCol(int beginCol) {
        this.beginCol = beginCol;
    }
    public Object getRealValue(){
        Object curValue = null;
        switch(this.getType()){
            case TOK.TRUE:
                curValue = true;
                break;
            case TOK.FALSE:
                curValue = false;
                break;
            case TOK.NULL:
                curValue = null;
                break;
            case TOK.NUM:
                if(value.indexOf('.')>=0){
                    curValue = Double.parseDouble(value);
                }else{
                    curValue = Integer.parseInt(value);
                }
                break;
            case TOK.STR:
                curValue = "\""+this.getValue()+"\"";
                break;
        }
        return curValue;
    }

    //输出被中括号括起来的token值
    public String toString() {
        if (this.type != 6&&this.type!=7) {
            return "[" + TOK.castTokTypeStr(this.type) + "]";
        } else {
            return "[" + TOK.castTokTypeStr(this.type) + ":" + this.value
                    + "]";
        }
    }
    //输出每个Token实际对应的值
    public String toLocalString() {
        if (this.type != 6&&this.type!=7) {
            return   TOK.castTokTypeLocalStr(this.type);
        } else if(this.type == 6){
            return "\""+this.value + "\"";
        }else{
            return this.value;
        }
    }
}

3、创建JsonLex类进行词法分析，其中next()用来获取下一个的token，nextchar（）用于获取下一个的字符，revertchar()用于回退一个字符，其余进行词法解析。getWriteStr（）用来把词法分析的json字符串按照json文件的格式进行输出。

package Lex;
import MyException.UnexpectedException;

/**
 * Created by think on 2017/6/8.
 */
public class JsonLex {
    // 当前行号
    private int lineNum = 0;
    //每个Token的开始位置
    private int startCol = 0;
    // 当前字符游标
    private int cur = -1;
    // 保存当前要解析的字符串
    private String str = null;
    // 保存当前要解析的字符串的长度
    private int len = 0;
    //private int startLine=0;
    //private int lastEndCol;


    public JsonLex(String str) {
        if (str == null)
            throw new NullPointerException("词法解析构造函数不能传递null");
        this.str = str;
        this.len = str.length();
       //this.lastEndCol=0;
        this.startCol = 0;
        this.cur = -1;
        this.lineNum = 0;
    }

    public boolean isLetterUnderline(char c) {
        return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_');
    }

    public boolean isNumLetterUnderline(char c) {
        return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
                || (c >= '0' && c <= '9') || c == '_');
    }
    public boolean isNum(char c) {
        return (c >= '0' && c <= '9');
    }

    public boolean isPunctuation(char c){
        return (c==','||c=='.'||c=='-'||c=='('||c==')');
    }

    public static boolean isSpace(char c) {
        return (c == ' ' || c == '\t' || c == '\n'||c=='\r');
    }

    private void checkEnd() throws UnexpectedException {
        if (cur >= len - 1) {
            throw generateUnexpectedException("未预期的结束，字符串未结束");
        }
    }

    public UnexpectedException generateUnexpectedException(String str) {
        return new UnexpectedException(lineNum,startCol, str);
    }
   //获取下一个字符
    private char nextChar() {
        if (cur >= len - 1) {
            return 0;
        }
        cur++;
        startCol++;
        char c = str.charAt(cur);
        if (c == '\n'||c=='\r') {
            if(str.charAt(cur+1)!='\n'&&str.charAt(cur+1)!='\r'){
            lineNum++;
            //lastEndCol=startCol;
            startCol = 0;
            }
        }
        return c;
    }

   //回退一个字符
    private int revertChar() {
        if (cur <= 0) {
            return 0;
        }
        int rcur = cur--;
        char c = str.charAt(rcur);
        if (c == '\n') {
            lineNum--;
            //startCol=lastEndCol;
        }
        return rcur;
    }

    //解析获取字符串的值
    private String getStrValue(char s) throws UnexpectedException {
        int start = cur;
        char c;
        c=nextChar();
        while (c!=0) {
            if (isNumLetterUnderline(c)||isSpace(c)||isPunctuation(c)) {
                c = nextChar();
            } else if (s == c) {
                return str.substring(start + 1, cur);
            } else {
                throw generateUnexpectedException("字符串，需要\"结尾");

            }
        }

        return null;
    }
    //解析获取数字的值
    private String getNumValue() throws UnexpectedException {
        int start = cur;
        boolean hasPoint=false;
        boolean hasE=false;
        char c;
        while ((c=nextChar())!=0) {
            if(isNum(c)){}
            else if(c=='.'){
              if(hasPoint){
                  throw generateUnexpectedException("数字错误，多余的.");
              }else {
                  hasPoint=true;
              }
          }else if(c=='e'){
              if (hasE){
                  throw generateUnexpectedException("数字错误，多余的e");
              }else{
                  hasE=true;
              }
          }else if (isLetterUnderline(c)||c==' '){
                throw generateUnexpectedException("数字错误，未知标识符");
            }else {
                return str.substring(start, revertChar());
            }


        }
        checkEnd();
        return null;
    }
    //解析获取false true，null类型的Token
    private Token getDefToken() throws UnexpectedException {
        int start = cur;
        char c;
        while ((c = nextChar()) != 0) {
            if(cur==1245){
                System.out.print(1546);
            }
            if (!isNumLetterUnderline(c)) {
                String value = str.substring(start, revertChar());
                if ("true".equals(value)) {
                    return Token.TRUE;
                } else if ("false".equals(value)) {
                    return Token.FALSE;
                } else if ("null".equals(value)) {
                    return Token.NULL;
                } else {
                   throw generateUnexpectedException("错误标识符");
                }
            }
        }
        checkEnd();
        return null;
    }
    //解析获取标点符号的Token
    public Token parseSymbol(char c) {
        switch (c) {
            case '[':
                return Token.ARRB;
            case ']':
                return Token.ARRE;
            case '{':
                return Token.OBJB;
            case '}':
                return Token.OBJE;
            case ',':
                return Token.SPLIT;
            case ':':
                return Token.DESC;
        }
        return null;
    }

    //获取下一个Token
    public Token next() throws UnexpectedException {
        if (lineNum == 0) {
            lineNum = 1;
            return Token.BGN;
        }

        char c;
        while ((c = nextChar()) != 0) {
            if (c == '\"') {
                Token tempToken = new Token(TOK.STR);
                tempToken.setLineNum(lineNum);
                tempToken.setBeginCol(startCol);
                tempToken.setValue(getStrValue(c));
                tempToken.setEndCol(startCol);
                return tempToken;
            } else if (isLetterUnderline(c)) {
                Token tempToken= getDefToken();
                tempToken.setLineNum(lineNum);
                tempToken.setBeginCol(startCol-TOK.castTokTypeStr(tempToken.getType()).length());
                tempToken.setEndCol(startCol);
                return tempToken;
            } else if (isNum(c) || c == '-') {
                Token tempToken = new Token(TOK.NUM);
                tempToken.setLineNum(lineNum);
                tempToken.setBeginCol(startCol);
                tempToken.setValue(getNumValue());
                tempToken.setEndCol(startCol);
                return tempToken;
            } else if (isSpace(c)) {
                continue;
            } else {
                Token tempToken = parseSymbol(c);
                tempToken.setLineNum(lineNum);
                tempToken.setBeginCol(startCol);
                tempToken.setEndCol(startCol);
                return tempToken;
            }
        }
        if (c == 0) {
            return Token.EOF;
        }
        return null;
    }
    //获取待解析json字符串的标准格式
    public String getWriteStr() throws UnexpectedException {
        Token tk;
        String str = "";
        int goTimes = 0;
        this.next();
        while ((tk = this.next()) != Token.EOF) {
            if (tk.toLocalString() == "{" || tk.toLocalString() == "[") {
                str = str + tk.toLocalString();
                goTimes++;
                str = str + "\r" + "\n";
                for (int i = 0; i < goTimes * 4; i++) {
                    str = str + " ";
                }

            } else if (tk.toLocalString() == ",") {
                str = str + tk.toLocalString();
                str = str + "\r" + "\n";
                for (int i = 0; i < goTimes * 4; i++) {
                    str = str + " ";
                }
            } else if (tk.toLocalString() == "}" || tk.toLocalString() == "]") {
                goTimes--;
                str = str + "\r" + "\n";
                for (int i = 0; i < goTimes * 4; i++) {
                    str = str + " ";
                }
                str = str + tk.toLocalString();
            } else {
                str = str + tk.toLocalString();
            }
        }
        return str;
    }

}

4、创建UnexpectedException进行错误抛出。

package MyException;

/**
 * Created by think on 2017/6/8.
 */
public class UnexpectedException extends Exception {

    private Integer lineNum = null;

    private Integer colNum = null;

    private String desc = null;

    private Throwable cause = null;

    public UnexpectedException() {
        super();
    }

    public UnexpectedException(Integer lineNum, Integer colNum, String message) {

        this.colNum = colNum;
        this.lineNum = lineNum;
        this.desc = message;
    }

    public String getMessage()