首先,假如我们自己建立了一门语言,如何才能让它能够编译运行呢。
第一步,我们要识别语言中的字符串,他们可能是:关键字,字符,数字 等。因此,我们要进行词法分析(LexicalAnalysis)。比如下列表格:我们要将它们归类,并告诉我们的编译器,这些是什么。
我们在这里可以使用自动状态机的模型,当scan 一个字符时,每次进行判断是否构成关键字,或者数字,或者仅仅是一个字符串,或者是运算符。我们在代码里将他们进行分类,为后面的编译服务:
然后,我们就可以动手写一个scanning的program了,假设我们的语言如下:
进而,我们针对每一个字符判断即可。下面,根据代码来讲解:
import java.util.*;
public class Scanner {
@SuppressWarnings("serial")
public static class LexicalException extends Exception {
int pos;
public LexicalException(String message, int pos) {
super(message);
this.pos = pos;
}
public int getPos() {
return pos;
}
}
public static enum Kind {
IDENTIFIER, INTEGER_LITERAL, BOOLEAN_LITERAL, STRING_LITERAL,
KW_x/* x */, KW_X/* X */, KW_y/* y */, KW_Y/* Y */, KW_r/* r */,
KW_R/* R */, KW_a/* a */, KW_A/* A */, KW_Z/* Z */, KW_DEF_X/* DEF_X */,
KW_DEF_Y/* DEF_Y */, KW_SCREEN/* SCREEN */, KW_cart_x/* cart_x */, KW_cart_y/* cart_y */,
KW_polar_a/* polar_a */, KW_polar_r/* polar_r */, KW_abs/* abs */, KW_sin/* sin */,
KW_cos/* cos */, KW_atan/* atan */, KW_log/* log */, KW_image/* image */, KW_int/* int */,
KW_boolean/* boolean */, KW_url/* url */, KW_file/* file */, OP_ASSIGN/* = */, OP_GT/* > */,
OP_LT/* < */, OP_EXCL/* ! */, OP_Q/* ? */, OP_COLON/* : */, OP_EQ/* == */, OP_NEQ/* != */,
OP_GE/* >= */, OP_LE/* <= */, OP_AND/* & */, OP_OR/* | */, OP_PLUS/* + */, OP_MINUS/* - */,
OP_TIMES/* * */, OP_DIV/* / */, OP_MOD/* % */, OP_POWER/* ** */, OP_AT/* @ */,
OP_RARROW/* -> */, OP_LARROW/* <- */, LPAREN/* ( */, RPAREN/* ) */, LSQUARE/* [ */,
RSQUARE/* ] */, SEMI/* ; */, COMMA/* , */, EOF;
}
HashMap<String,Kind> map_keywords=new HashMap<String,Kind>();//建立关键词的表
HashMap<Character,Kind> map_op=new HashMap<Character,Kind>();//建立运算符的表
public static enum State {
START, IN_DIGIT, IN_IDENT, IN_STRING, IN_COMMENT;
}//这个是我们的状态机的五个状态:起始,数字,变量,字母,评论
public class Token {
public final Kind kind;
public final int pos;
public final int length;
public final int line;
public final int pos_in_line;//每一个token(指对象),有五个属性
public Token(Kind kind, int pos, int length, int line, int pos_in_line) {
super();
this.kind = kind;
this.pos = pos;
this.length = length;
this.line = line;
this.pos_in_line = pos_in_line;
}
public String getText() {
if (kind == Kind.STRING_LITERAL) {
return chars2String(chars, pos, length);
} else
return String.copyValueOf(chars, pos, length);
}
/**
* To get the text of a StringLiteral, we need to remove the enclosing "
* characters and convert escaped characters to the represented
* character. For example the two characters \ t in the char array
* should be converted to a single tab character in the returned String
*
* @param chars
* @param pos
* @param length
* @return
*/
private String chars2String(char[] chars, int pos, int length) {
StringBuilder sb = new StringBuilder();
for (int i = pos + 1; i < pos + length - 1; ++i) {// omit initial
// and final "
char ch = chars[i];