文法
//首先程序由一个块组成,块中包含可选声明和语句。未消除左递归
programs -> block
block -> { decls stmts}
//声明包括类型,id以及分号
decls -> decls decl | ε
decls -> type id;
//类型可以是数组和基础数据类型
type -> type [ num] | basic
stmts -> stmts stmt | ε
//stmt 可以是赋值和if嵌套以及do-while嵌套。
stmt -> loc = bool;
| if ( bool ) stmt //bool表示bool值
| if ( bool ) stmt else stmt
| while ( bool ) stmt
| do stmt while ( bool );
| break;
| block
//stmt的左值是数组的某个元素的引用或id
loc -> loc [ bool ] | id
//划分了|| 和 &&的优先级(&&的优先级高于||)
bool -> bool || join | join
join -> join && equality | equality
equality -> equality == rel | equality != rel | rel
rel -> expr < expr | expr <= expr | expr >=expr | expr > expr | expr
expr -> expr + term | expr - term | term
unary -> !unary | -unary | factory
factory -> ( bool ) | loc | num | real | true | false
词法分析器
类图
词法分析案例
本章的词法分析器,和案例十分类似,只需要重写书写scan的匹配双目运算符,并加上关键字在words里面就可以了。
package Project.lexer;
/**
* 定义可以识别的类型
*/
public class Tag {
//INDEX,MINUS,TEMP不是词法单元,将用于抽象语法树。
//eq == ,le <=,ge>=,ne !=
//变量的各种类型通过整型数字标识。
public final static int
AND = 256, BASIC = 257, BREAK = 258, DO = 259, ELSE = 260,
EQ = 261, FALSE = 262, GE = 263, ID = 264, IF = 265,
INDEX = 266, LE = 267, MINUS = 268, NE = 269, NUM = 270,
OR = 271, REAl = 272, TEMP = 273, TRUE = 274, WHILE = 275;
}
package Project.lexer;
import Project.lexer.TokenImpl.Num;
import Project.lexer.TokenImpl.Real;
import Project.Symbols.Type;
import Project.lexer.TokenImpl.Word;
import java.io.IOException;
import java.util.*;
public class Lexer {
//把字符串映射为字word,存储关键字和被定义的变量
public Map<String, Token> words = new HashMap<>();
//记录下一个char或空白。
public char peek = ' ';
//行数
public static int line = 0;
public Lexer(){
reserve(new Word("if", Tag.IF) );
reserve(new Word("else", Tag.ELSE) );
reserve(new Word("while",Tag.WHILE) );
reserve(new Word("do", Tag.DO) );
reserve(new Word("break",Tag.BREAK) );
reserve(Word.True);
reserve(Word.False);
reserve(Type.Char );
reserve(Type.Int);
reserve(Type.Bool);
reserve(Type.Float);
}
/**
* read a word from input stream
* @throws IOException
*/
void readch() throws IOException{
peek = (char) System.in.read();
}
/**
*
* @param c 期待读入的下一个字符
* @return 下一个字符与c是否相等
* @throws IOException
*/
public boolean readch(char c) throws IOException{
readch();
if(peek!=c)
return false;
peek = ' ';
return true;
}
/***
* 把关键字添加到words里面
* @param w 保留关键字
*/
private void reserve (Word w){
words.put(w.lexeme,w);
}
/**
*
* @return 返回一个词素,只有在是字符串时才会加入map集合中
*/
public Token scan() throws IOException {
//清除空白及换行
clearTab();
//分析双目运算符
Token cur = analysisSymbol();
if(cur!=null){
return cur;
}
//分析识别数字
cur = analysisNum();
if(cur!=null){
return cur;
}
//分析识别字符串
cur = analysisWord();
if(cur!=null){
return cur;
}
//peek中的任意字符都被当作词法单元返回
Token t = new Token(peek);
peek = ' ';
return t;
}
/**
*
* @return token of symbol or null
* @throws IOException
*/
public Token analysisSymbol() throws IOException{
switch (peek){
case '&':
if(readch('&'))
return Word.and;
else
return new Token('&');
case '|':
if(readch('|'))
return Word.or;
else
return new Token('|');
case '=':
if(readch('='))
return Word.eq;
else
return new Token('=');
case '!':
if(readch('='))
return Word.ne;
else
return new Token('!');
case '<':
if(readch('='))
return Word.le;
else
return new Token('<');
case '>':
if(readch('='))
return Word.ge;
else
return new Token('>');
default:
return null;
}
}
/**
*
* @return Token of word or null
*/
public Token analysisWord() throws IOException{
if(Character.isLetter(peek)){
StringBuffer sb = new StringBuffer();
do{
sb.append(peek);
readch();
}while (Character.isLetterOrDigit(peek));
String str = sb.toString();
Word w = (Word) words.get(str);
if(w!=null){//区分保留字与描述符
return w;
}
w = new Word(str,Tag.ID);
words.put(str,w);
return w;
}
return null;
}
/**
*
* @return Token of Num or null
*/
public Token analysisNum() throws IOException{
if(Character.isDigit(peek)){
int value = 0;
do{
value = 10*value+Character.digit(peek,10);
readch();
}while (Character.isDigit(peek));
if(peek!='.')
return new Num(value);
//处理浮点数
float x = value;
float d = 10;
for (;;){
readch();
if(!Character.isDigit(peek))
break;
x = x+Character.digit(peek,10)/d;
d=d*10;
}
return new Real(x);
}
return null;
}
/**
* 清除空格以及制表符
*/
public void clearTab() throws IOException{
//剔除空白,制表符,换行符
for (;;readch()){
if(peek==' '||peek=='\t')
continue;
else if(peek=='\n')
line++;
else break;
}
}
}
package Project.lexer;
/**
* 记录词素的类型,所有关键字,保留字,标识符皆继承此类
*/
public class Token {
//记录变量类型
public final int tag;
public Token(int t){
tag=t;
}
@Override
public String toString() {
return ""+(char)tag;
}
}
package Project.lexer.TokenImpl;
import Project.lexer.Token;
import Project.lexer.Tag;
public class Num extends Token {
//记录整数
public final int value;
public Num(int v){
super(Tag.NUM);
value=v;
}
@Override
public String toString() {
return "" + value;
}
}
package Project.lexer.TokenImpl;
import Project.lexer.Tag;
import Project.lexer.Token;
/**
* 用于记录浮点数
*/
public class Real extends Token {
//记录浮点数
public final float value;
public Real(float v){
super(Tag.REAl);
value = v;
}
@Override
public String toString() {
return "" + value;
}
}
package Project.lexer.TokenImpl;
import Project.lexer.Tag;
import Project.lexer.Token;
/**
* word用于保存保留字、标识符、和像&&这样的复合词法单元
*/
public class Word extends Token {
//词素,即变量名
public String lexeme = "";
public Word(String str,int t){
super(t);
lexeme = str;
}
public static final Word
and = new Word("&&", Tag.AND),or = new Word("||",Tag.OR),
eq = new Word("==",Tag.EQ),ne = new Word("!=",Tag.NE),
le = new Word("<=",Tag.LE),ge = new Word(">=",Tag.GE),
minus = new Word("minus",Tag.MINUS),
True = new Word("true",Tag.TRUE),
False = new Word("false",Tag.FALSE),
temp = new Word("t",Tag.TEMP);
@Override
public String toString() {
return lexeme;
}
}
package Project.Symbols;
import Project.lexer.Tag;
import Project.lexer.TokenImpl.Word;
/**
* 添加width的记录
*/
public class Type extends Word {
//width用于存储分配
public int width = 0;
public Type(String s,int tag,int w){
super(s,tag);
width=w;
}
public static final Type
Int = new Type("int", Tag.BASIC,4),
Float = new Type("float",Tag.BASIC,8),
Char = new Type("char",Tag.BASIC,1),
Bool = new Type("bool",Tag.BASIC,1);
/**
* 判断是否是数字
* @param p 要判断单词的类型
* @return true or false
*/
public static boolean numeric(Type p){
if(Type.Char==p||Type.Int==p||Type.Float==p)
return true;
return false;
}
/**
* 可用于类型转换时进行判断
* @param p1 compare type 1
* @param p2 compare type 2
* @return the max type of params
*/
public static Type max(Type p1,Type p2){
if(!numeric(p1)||!numeric(p2))
return null;
else if(p1==Type.Float||p2 == Type.Float)
return Type.Float;
else if(p1==Type.Int||p2 == Type.Int)
return Type.Int;
else
return Type.Char;
}
}
package Project.Symbols;
import Project.lexer.Tag;
public class Array extends Type {
//数组元素类型
public Type of;
//元素个数
public int size = 1;
public Array(int sz,Type p){
super("[]", Tag.INDEX,sz*p.width);
size = sz;
of = p;
}
@Override
public String toString() {
return "["+size+"]"+of.toString();
}
}
符号表
package Project.Symbols;
import Project.inter.Id;
import java.util.HashMap;
import java.util.Map;
public class Env {
//把字符串词法单元映射为Id的对象
public Map<String, Id> table ;
protected Env prev;
public Env(Env p){
table = new HashMap<>();
prev = p;
}
public void put(String s,Id sym){
table.put(s,sym);
}
public Id get(String s){
for (Env e = this;e!=null;e = e.prev){
Id found = e.table.get(s);
if(found!=null)
return found;
}
return null;
}
}
public class Id extends Expr{
//相对地址
public int offset;
public Id(Word id, Type p,int b){
super(id,p);
offset = b;
}
}
语法分析
语法分析时根据文法实现的对文件的匹配,参考文法和函数很容易理解。
语法分析的过程中在stmt的不断调用与返回过程中已经构建了一颗语法分析树
package Project.parser;
import Project.Symbols.*;
import Project.inter.*;
import Project.lexer.*;
import Project.lexer.TokenImpl.*;
import java.io.IOException;
/**
* 编译器
*/
public class Parser {
//词法分析的分析器
private Lexer lexer;
//向前看词法单元
private Token look;
//当前或顶层的符号表
Env top = null;
//用于变量声明的存储位置
int used = 0;
/**
* 构建Parser
* @param l 词法分析器
* @throws IOException
*/
public Parser(Lexer l )throws IOException{
lexer = l;
move();
}
/**
* read next look char
* @throws IOException
*/
public void move()throws IOException{
look = lexer.scan();
}
/**
* 异常处理
* @param s 异常信息
*/
private void error(String s){
throw new Error("near line "+lexer.line+": "+s);
}
/**
* 类型匹配
* @param t 希望看见的类型
* @throws IOException 如果出现不匹配,则是语法错误
* 匹配到#则退出程序
*/
private void match(int t) throws IOException{
if(look.tag == t)
move();
else {
error("syntax error");
}
}
/**
* program -> block
* @throws IOException
*/
public void program() throws IOException{
//语法分析
Stmt s = block();
//用于生成中间代码
int begin = s.newLabel();
int after = s.newLabel();
System.out.println();
s.emitLabel(begin);
s.gen(begin,after);
s.emitLabel(after);
}
/**
* block -> { decls stmts}
* @return Stmt
* @throws IOException
*/
private Stmt block() throws IOException{
match('{');
Env savedEnv = top;
//当前块的符号表
top = new Env(top);
decls();
Stmt s = stmts();
match('}');
top = savedEnv;
return s;
}
/**
* D -> type ID ;
* @throws IOException
*/
private void decls()throws IOException{
while (look.tag == Tag.BASIC){
Type p = type();
Token tok = look;
match(Tag.ID);
match(';');
Id id = new Id((Word) tok,p,used);
//存在问题,已解决。
top.put(id.toString(),id);
used = used+p.width;
}
}
/**
* type -> type [num] | basic
* 识别定义变量类型的关键字
* @return type[num] | basic
* @throws IOException
*/
private Type type() throws IOException{
Type p = (Type) look;
match(Tag.BASIC);//期望look.tag == Tag.BASIC
if(look.tag!='[')//T -> basic
return p;
else
return dims(p);//返回数组类型
}
/**
* type -> type [num] 识别数组
* @param p 定义变量类型 的关键字
* @return 数组
* @throws IOException
*/
private Type dims(Type p) throws IOException{
match('[');
Token tok = look;
match(Tag.NUM);
match(']');
if(look.tag=='[')//匹配多维数组
p=dims(p);
return new Array(((Num)tok).value,p);
}
/**
* stmts -> stmts stmt|ε
* @return stmt
* @throws IOException
*/
private Stmt stmts() throws IOException{
if(look.tag =='}')
return Stmt.Null;
else
return new Seq(stmt(),stmts());
}
private Stmt stmt() throws IOException{
Expr x;
Stmt s1,s2;
//用于为break语句保存外层的循环语句
Stmt savedStmt;
switch (look.tag){
case ';':
move();
return Stmt.Null;
case Tag.IF:
match(Tag.IF);
match('(');
x=bool();
match(')');
s1=stmt();
if(look.tag!=Tag.ELSE)
return new If(x,s1);
match(Tag.ELSE);
s2 =stmt();
return new Else(x,s1,s2);
case Tag.WHILE:
While whileNode = new While();
savedStmt =Stmt.Enclosing;//保存外层循环
Stmt.Enclosing = whileNode;//保存当前循环
match(Tag.WHILE);
match('(');
x = bool();
match(')');
s1 = stmt();
whileNode.init(x,s1);
Stmt.Enclosing = savedStmt;//重置Stmt.Enclosing
return whileNode;
case Tag.DO:
Do doNode = new Do();
savedStmt = Stmt.Enclosing;
Stmt.Enclosing = doNode;
match(Tag.DO);
s1 = stmt();
match(Tag.WHILE);
match('(');
x= bool();
match(')');
match(';');
doNode.init(s1,x);
Stmt.Enclosing = savedStmt;//重置Stmt.Enclosing
return doNode;
case Tag.BREAK:
match(Tag.BREAK);
match(';');
return new Break();
case '{':
return block();
default:
return assign();
}
}
/**
* loc = bool;
* 赋值语句
* @return
* @throws IOException
*/
private Stmt assign() throws IOException{
Stmt stmt;
Token t = look;
//assign的左部必须是ID类型
match(Tag.ID);
Id id=top.get(t.toString());
if(id==null){
error(t.toString()+" undeclared");
}
if(look.tag=='='){//s -> id = E;
move();
Expr t2 = bool();
stmt = new Set(id,t2);
}else {// s -> L = E;
Access x = offset(id);
match('=');
stmt = new SetElem(x,bool());
}
match(';');
return stmt;
}
/**
* bool -> bool ||join |join
* @return
* @throws IOException
*/
private Expr bool()throws IOException{
Expr x = join();
while (look.tag == Tag.OR){
Token tok = look;
move();
x=new Or(tok,x,join());
}
return x;
}
/**
* join -> join &&equality | equality
* @return
* @throws IOException
*/
private Expr join() throws IOException{
Expr x = equality();
while (look.tag==Tag.AND){
Token tok = look;
move();
x = new And(tok,x,equality());
}
return x;
}
/**
* equality -> equality == rel |equality !=rel|rel
* @return
* @throws IOException
*/
private Expr equality() throws IOException{
Expr x = rel();
while (look.tag == Tag.EQ||look.tag == Tag.NE){
Token tok = look;
move();
x = new Rel(tok,x,rel());
}
return x;
}
/**
* rel -> expr < expr | expr <= expr | expr>=expr
* |expr>expr |expr
*
* @return
* @throws IOException
*/
private Expr rel() throws IOException{
Expr x = expr();
switch (look.tag){
case '<':
case Tag.LE:
case Tag.GE:
case '>':
Token tok = look;
move();
return new Rel(tok,x,expr());
default:
return x;
}
}
/**
* expr -> expr +expr|expr - term |term
* @return
* @throws IOException
*/
private Expr expr()throws IOException{
Expr x = term();
while (look.tag=='+'||look.tag=='-'){
Token tok = look;
move();
x=new Arith(tok,x,term());
}
return x;
}
/**
* term -> term * unary |term / unary |unary
* @return
* @throws IOException
*/
private Expr term() throws IOException{
Expr x = unary();
while (look.tag=='*'||look.tag=='/'){
Token tok = look;
move();
x=new Arith(tok,x,unary());
}
return x;
}
/**
* unary -> !unary | -unary |factory
* @return
* @throws IOException
*/
private Expr unary() throws IOException{
if(look.tag=='-'){
move();
return new Unary(Word.minus,unary());
}
else if(look.tag=='!'){
Token tok = look;
move();
return new Not(tok,unary());
}
else
return factory();
}
/**
* factory -> (bool) |loc|num|real|true|false
* @return
* @throws IOException
*/
private Expr factory()throws IOException{
Expr x = null;
switch (look.tag){
case '(':
move();
x=bool();
match(')');
return x;
case Tag.NUM:
x= new Constant(look,Type.Int);
move();
return x;
case Tag.REAl:
x= new Constant(look,Type.Float);
move();
return x;
case Tag.TRUE:
x= Constant.True;
move();
return x;
case Tag.FALSE:
x= Constant.False;
move();
return x;
case Tag.ID:
String s = look.toString();
Id id = top.get(s);
if(id==null){
error(s+ " undeclared");
}
move();
if(look.tag!='[')
return id;
else
return offset(id);
default:
error("syntax error");
return x;
}
}
private Access offset(Id a) throws IOException{//I ->[E]|[E] I
Expr i;
Expr w;
Expr t1,t2;
Expr loc;
Type type = a.type;
match('[');
i=bool();//第一个下标I -> [E]
match(']');
type = ((Array)type).of;
w = new Constant(type.width);
t1 = new Arith(new Token('*'),i,w);
loc=t1;
while (look.tag=='['){//多维下标,I -> [E]I
match('[');
i = bool();
match(']');
type = ((Array)type).of;
w = new Constant(type.width);
t1 = new Arith(new Token('*'),i,w);
t2 = new Arith(new Token('+'),loc,t1);
loc=t2;
}
return new Access(a,loc,type);
}
}