模板引擎-语法解析-有限状态自动机

在设计MeteorTL([url]http://www.meteortl.org[/url])模板引擎时,语法树的解析用到了有限状态自动机,这里稍讲一下。
由于将归约算法滞后单独处理(便于测试),这里只实现有限状态自动机,实现序列化分割。
如果后期实现XSLT解析,可能会改为下推自动机实现。

主控程序:


package org.meteortl.core.engine.parser.automata;

import org.meteortl.core.util.Assert;

/**
*
* Deterministic Finite state Automata(DFA)实现
*
* @author liangfei0201@163.com
*
*/
public class StateAutomata {

// 状态图
private StateMap stateMap;

// 输入字符类型识别器
private TypeResolver typeResolver;

public StateAutomata(StateMap stateMap, TypeResolver typeResolver) {
this.stateMap = stateMap;
this.typeResolver = typeResolver;
}

public void scan(CharProvider charProvider, TokenReceiver tokenReceiver) {
try {

int state = StateMap.BEGIN; // 当前状态
int row = 0; // 所解释的char所在行
int column = 0; // 所解释的char所在列

StringBuffer buffer = new StringBuffer(); // 缓存
int bufferRow = 0; // 缓存开始char所在行
int bufferColumn = 0; // 缓存开始char所在列

while (charProvider.hasNextChar()) {
char ch = charProvider.nextChar(); // 字符输入带
if (ch == '\n') { // 记录位置
row ++;
column = 0;
} else {
column ++;
}
if (buffer.length() == 0) { // 记录缓存位置
bufferRow = row;
bufferColumn = column;
}
buffer.append(ch); // 将内容加入缓存
int type = typeResolver.getType(ch, buffer.toString()); // 获取字符类型
state = stateMap.getNextState(state, type); // 从状态机图中取下一状态
Assert.assertFalse(state == StateMap.ERROR, "表达式串语法错误, 字符:" + ch + " 位置:(" + row + "," + column + ")");
if (state == StateMap.END) { // 正常结束
tokenReceiver.receive(new Token(buffer.toString(), bufferRow, bufferColumn, row, column)); // 接收缓存中的内容
buffer.setLength(0); // 清空缓存
state = StateMap.BEGIN; // 回归到初始状态
} else if (state == StateMap.BREAK) { // 中断型结束
if (buffer.length() > 1) {
// 接收缓存中的内容,不包含最后一个字符
tokenReceiver.receive(new Token(buffer.toString().substring(0, buffer.length() - 1), bufferRow, bufferColumn, row, column));
}
buffer.setLength(0); // 清空缓存
buffer.append(ch); // 保留最后一个字符
state = stateMap.getNextState(StateMap.BEGIN, type); // 回归到初始状态,并立即开始
}
}
if (buffer.length() > 0) {
tokenReceiver.receive(new Token(buffer.toString(), bufferRow, bufferColumn, row, column)); // 接收最后缓存中的内容
}
} catch (Exception e) {
e.printStackTrace();
}
}

}


辅助SPI接口及类:


package org.meteortl.core.engine.parser.automata;

/**
*
* 状态图
* @author liangfei0201@163.com
*
*/
public interface StateMap {

public static final int BEGIN = 0; // 开始状态

public static final int END = -1; // 结束状态(包含最后一个字符)

public static final int BREAK = -2; // 中止状态(不包含最后一个字符)

public static final int ERROR = -3; // 错误状态

/**
* 获取下一状态
* @param 当前状态
* @param 输入字符类型
* @return
*/
public int getNextState(int state, int type);

}



package org.meteortl.core.engine.parser.automata;

/**
* 输入子带供应器
* (迭代子模式)
* @author liangfei0201@163.com
*
*/
public interface CharProvider {

/**
* 判定是否还有下一字符
* @return
*/
public boolean hasNextChar();

/**
* 取下一字符
* @return
*/
public char nextChar();

}



package org.meteortl.core.engine.parser.automata;

/**
* 输出片断信息
* (不变量-线程安全)
* @author liangfei0201@163.com
*
*/
public final class Token {

private String message;

private int beginRow;

private int beginColumn;

private int endRow;

private int endColumn;

public Token(String message, int beginRow, int beginColumn, int endRow, int endColumn) {
this.message = message;
this.beginRow = beginRow;
this.beginColumn = beginColumn;
this.endRow = endRow;
this.endColumn = endColumn;
}

/**
* 片断开始行数
* @return int
*/
public int getBeginRow() {
return beginRow;
}

/**
* 片断开始列数
* @return int
*/
public int getBeginColumn() {
return beginColumn;
}

/**
* 片断结束行数
* @return int
*/
public int getEndRow() {
return endRow;
}

/**
* 片断结束列数
* @return int
*/
public int getEndColumn() {
return endColumn;
}

/**
* 片断内容信息
* @return int
*/
public String getMessage() {
return message;
}

private String toString;

public String toString() {
if (toString == null) {
StringBuffer buffer = new StringBuffer();
buffer.append(message);
buffer.append("(");
buffer.append(beginRow);
buffer.append(",");
buffer.append(beginColumn);
buffer.append("-");
buffer.append(endRow);
buffer.append(",");
buffer.append(endColumn);
buffer.append(")");
toString = buffer.toString();
}
return toString;
}

}



package org.meteortl.core.engine.parser.automata;

/**
*
* 片断接收器
* @author liangfei0201@163.com
*
*/
public interface TokenReceiver {

/**
* 接收片断
* @param 片断
*/
public void receive(Token token);

}



package org.meteortl.core.engine.parser.automata;

/**
* 输入char的类型判定
* @author liangfei0201@163.com
*
*/
public interface TypeResolver {

/**
* 获取输入字节的类型
* @param 输入的字节
* @param 当前缓存中的内容
* @return 字节的类型
*/
public int getType(char ch, String buffer);

}


常用实现:


package org.meteortl.core.engine.parser.automata;

/**
* 使用数据作为状态图
* @author liangfei0201@163.com
*
*/
public class ArrayStateMap implements StateMap {

private int[][] states;

public ArrayStateMap(int[][] states) {
this.states = states;
}

public int getNextState(int state, int type) {
return states[state][type];
}

}



package org.meteortl.core.engine.parser.automata;

import java.util.NoSuchElementException;

/**
* 使用字符串作为输入子带
* @author liangfei0201@163.com
*
*/
public class StringCharProvider implements CharProvider {

private String source;

private int index;

public StringCharProvider(String source) {
this.source = source;
this.index = 0;
}

public boolean hasNextChar() {
if (source == null || source.length() == 0) {
return false;
}
return index < source.length();
}

public char nextChar() {
if (! hasNextChar()) {
throw new NoSuchElementException();
}
return source.charAt(index ++);
}

}



package org.meteortl.core.engine.parser.automata;

import java.io.IOException;
import java.io.Reader;
import java.util.NoSuchElementException;

/**
* 使用读取器作为子带供应
* @author liangfei0201@163.com
*
*/
public class ReaderCharProvider implements CharProvider {

private Reader reader;

private int next;

public ReaderCharProvider(Reader reader) {
this.reader = reader;
try {
next = reader.read();
} catch (IOException e) {
e.printStackTrace();
next = -1;
}
}

public boolean hasNextChar() {
return next != -1;
}

public char nextChar() {
if (! hasNextChar()) {
throw new NoSuchElementException();
}
char ch = (char)next;
try {
next = reader.read();
} catch (IOException e) {
e.printStackTrace();
next = -1;
}
return ch;
}

}



package org.meteortl.core.engine.parser.automata;

import java.util.ArrayList;
import java.util.List;

/**
* 将所有输出片断收集成列表
* @author liangfei0201@163.com
*
*/
public class ListTokenReceiver implements TokenReceiver {

private List tokens;

public ListTokenReceiver() {
tokens = new ArrayList();
}

public void receive(Token token) {
tokens.add(token);
}

public List getTokens() {
return tokens;
}

}


下面是Meteor模板alpha0.2.2版的语法解析:
指令解析:

package org.meteortl.core.engine.parser.directive;

import java.io.Reader;
import java.util.List;

/**
* 指令分解器
* @author liangfei0201@163.com
*
*/
public interface DirectiveTokenizer {

/**
* 将模板分解成指令片断
* @param Reader - 模板供给者
* @return List<Token> - 指令片断
*/
public List tokens(Reader templateProvider);

}



package org.meteortl.core.engine.parser.directive;

import java.io.Reader;
import java.util.List;

import org.meteortl.core.engine.parser.Syntax;
import org.meteortl.core.engine.parser.automata.ArrayStateMap;
import org.meteortl.core.engine.parser.automata.ListTokenReceiver;
import org.meteortl.core.engine.parser.automata.ReaderCharProvider;
import org.meteortl.core.engine.parser.automata.StateAutomata;
import org.meteortl.core.engine.parser.automata.StateMap;
import org.meteortl.core.engine.parser.automata.TypeResolver;

/**
* 使用DFA实现指令分割
* @author liangfei0201@163.com
*
*/
public class DirectiveTokenizerImpl implements DirectiveTokenizer {

// 状态机图
private static final int states[][] = {
/* 0.空格, 1.反斜杠, 2.@符, 3.字母, 4.{, 5.}, 6.其它符号 */
/* 0.起始 */{ 6, 1, 2, 6, 6, 6, 6 },
/* 1.转义 */{ 6, 6, 6, 6, 6, 6, 6 },
/* 2.指令 */{ 2, StateMap.ERROR, StateMap.ERROR, 3, 5, StateMap.ERROR, StateMap.ERROR },
/* 3.名称 */{ 4, StateMap.ERROR, StateMap.BREAK, 3, 5, StateMap.BREAK, StateMap.BREAK },
/* 4.中间 */{ 4, StateMap.ERROR, StateMap.BREAK, StateMap.BREAK, 5, StateMap.BREAK, StateMap.BREAK },
/* 5.表达 */{ 5, 5, 5, 5, StateMap.ERROR, StateMap.END, 5 },
/* 6.文本 */{ 6, 1, StateMap.BREAK, 6, 6, 6, 6 }
};

private StateAutomata stateAutomata;

public DirectiveTokenizerImpl(final Syntax syntax) {
stateAutomata = new StateAutomata(new ArrayStateMap(states), new TypeResolver() {
public int getType(char ch, String buffer) {
if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') return 0;
if (ch == syntax.getEscape()) return 1;
if (ch == syntax.getBegin()) return 2;
if ((ch >= '0' && ch <= '9') || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) return 3;
if (ch == syntax.getExpressionBegin()) return 4;
if (ch == syntax.getEnd()) return 5;
return 6;
}
});
}

public List tokens(Reader templateProvider) {
ListTokenReceiver listTokenReceiver = new ListTokenReceiver(); // 接收器
stateAutomata.scan(new ReaderCharProvider(templateProvider), listTokenReceiver);
return listTokenReceiver.getTokens();
}

}



package org.meteortl.core.engine.parser.expression;

import java.util.List;

/**
* 表达式分解器
* @author liangfei0201@163.com
*
*/
public interface ExpressionTokenizer {

/**
* 将表达式串分解成表达式片断
* @param String - 表达式串
* @return List<Token> - 表达式片断
*/
public List tokens(String expressionText);

}



package org.meteortl.core.engine.parser.expression;

import java.util.List;

import org.meteortl.core.engine.parser.automata.ArrayStateMap;
import org.meteortl.core.engine.parser.automata.ListTokenReceiver;
import org.meteortl.core.engine.parser.automata.StateAutomata;
import org.meteortl.core.engine.parser.automata.StateMap;
import org.meteortl.core.engine.parser.automata.StringCharProvider;
import org.meteortl.core.engine.parser.automata.TypeResolver;

/**
*
* 使用状态机实现表达式分割
*
* @author liangfei0201@163.com
*
*/
public class ExpressionTokenizerImpl implements ExpressionTokenizer {

// 状态机图
private static final int states[][] = {
/* 0.空格, 1.字母, 2.数字, 3.点, 4.引号, 5.反斜杠, 6.括号, 7.其它符号 */
/* 0.起始 */{ StateMap.BEGIN, 1, 2, 7, 4, StateMap.ERROR, 6, 7},
/* 1.变量 */{ StateMap.BREAK, 1, 1, StateMap.BREAK, StateMap.ERROR, StateMap.ERROR, StateMap.BREAK, StateMap.BREAK},
/* 2.数字 */{ StateMap.BREAK, StateMap.ERROR, 2, 3, StateMap.ERROR, StateMap.ERROR, StateMap.BREAK, StateMap.BREAK},
/* 3.小数 */{ StateMap.BREAK, StateMap.END, 3, StateMap.BREAK, StateMap.ERROR, StateMap.ERROR, StateMap.BREAK, StateMap.BREAK},
/* 4.字符 */{ 4, 4, 4, 4, StateMap.END, 5, 4, 4},
/* 5.转义 */{ 4, 4, 4, 4, 4, 4, 4, 4},
/* 6.括号 */{ StateMap.BREAK, StateMap.BREAK, StateMap.BREAK, StateMap.BREAK, StateMap.BREAK, StateMap.BREAK, StateMap.BREAK, StateMap.BREAK},
/* 7.操作 */{ StateMap.BREAK, StateMap.BREAK, StateMap.BREAK, 7, StateMap.BREAK, StateMap.ERROR, StateMap.BREAK, 7}
};

private static final StateAutomata stateAutomata = new StateAutomata(new ArrayStateMap(states), new TypeResolver() {
public int getType(char ch, String buffer) {
if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') return 0;
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) return 1;
if (ch >= '0' && ch <= '9') return 2;
// if (ch == '.') return 3;
if (ch == '\"' || ch == '\'') return 4;
if (ch == '\\') return 5;
if (ch == '(' || ch == ')' || ch == '[' || ch == ']') return 6;
return 7;
}
});

public List tokens(String expressionText) {
ListTokenReceiver listTokenReceiver = new ListTokenReceiver(); // 接收器
stateAutomata.scan(new StringCharProvider(expressionText), listTokenReceiver);
return listTokenReceiver.getTokens();
}

}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值