考虑该怎么样设计Yacc类。
显然,Yacc面临的第1个问题就是分析rule_file的内容。Yacc类本身不应该实现这个功能,因为还有一个功能是生成DFA,这是两个没有多大关系的功能,按照SRP(单一职责原则),不应该在一个类里实现。
按照这个设计原则,很容易做出的决定,需要一个类Vocab识别rule_file定义的所有符号(TERMINAL,NONTERMINAL,EOF,START_SYMBOL)。另外需要一个类识别每一个Rule定义。
这两个类的功能很单一,接口也不会太复杂。
class TestCompiler < Test::Unit::TestCase
def test_vocab
vocab = Vocab.new
assert( vocab.identify("nil") == Vocab::NULL )
assert( vocab.identify("$") == Vocab::EOF )
assert( vocab.identify("function") == Vocab::UNKNOWN )
vocab.add_terminal("%token )")
assert( vocab.identify(")") == Vocab::TERMINAL )
vocab.add_terminal("%token function id")
assert( vocab.identify("function") == Vocab::TERMINAL )
assert( vocab.identify("id") == Vocab::TERMINAL )
assert( vocab.identify("ids") == Vocab::UNKNOWN )
vocab.add_nonterminal("proc")
assert( vocab.identify("proc") == Vocab::NONTERMINAL )
vocab.add_nonterminals(%w{kick sanf})
assert( vocab.identify("kick") == Vocab::NONTERMINAL )
assert( vocab.identify("sanf") == Vocab::NONTERMINAL )
end
def test_rule
rule = Rule.parse("function_decl := /
function function_name ( argument_list ) ; : decl")
assert(rule, "parse rule failed")
assert(rule.vocabs.include?("function_decl"))
assert(rule.vocabs.include?("function"))
assert(rule.vocabs.include?("function_name"))
assert(rule.vocabs.include?("argument_list"))
assert(rule.lt == "function_decl")
assert(rule.rt == %w{function function_name ( argument_list ) ;})
assert(rule.action == "decl")
end
end
同样,实现他们也很简单。
###### File : algo.rb #############
##############################
# Vocab
# 该类会存储一个syntax define中的
# 所有符号,包括terminal, nonterminal
# nil(空), $(结束)
##############################
class Vocab
### @types
TERMINAL = 1
NONTERMINAL = 2
NULL = 3
EOF = 4
UNKNOWN = 5
### @vocabs list
@@nulls = ["nil"]
@@eofs = ["$"]
###
@@terminal_match = /^%token/s+(.*)$/
# @terminals 终结符的集合
# @nonterminals 非终结符的集合
def initialize
@terminals = Array.new
@nonterminals = Array.new
end
# @identify
# 判断一个符号名字属于哪一种符号
def identify(name)
return TERMINAL if @terminals.include?(name)
return NULL if @@nulls.include?(name)
return EOF if @@eofs.include?(name)
return NONTERMINAL if @nonterminals.include?(name)
return UNKNOWN
end
def Vocab.type_name(type)
Vocab.constants.each do |x|
return x if eval(x) == type
end
return "error type"
end
def Vocab.nulls
@@nulls
end
def Vocab.eofs
@@eofs
end
# 分析一个token定义语句并将其定义的所有符号加入集合
# 如果定义语句有错误,返回nil
def add_terminal(term_def_text)
# %token term1, term2, term3 ...
matches = @@terminal_match.match(term_def_text.strip())
return nil if !matches
# then tokens--matches[1] be (term1, term2, term3 ...)
tokens = matches[1].strip()
# erase all whitespaces in tokens
#tokens.gsub!(//s+/, "")
# split to singleton token
@terminals.concat(tokens.split(//s+/))
@terminals.uniq!
@terminals
end
# 加入非终结符集合
def add_nonterminal(name)
@nonterminals.push(name) if identify(name) == UNKNOWN &&
!@nonterminals.include?(name)
@nonterminals.uniq!
@nonterminals
end
def add_nonterminals(tokens)
tokens.each {|x| add_nonterminal(x)}
end
def tokens
return @terminals + @nonterminals + @@nulls + @@eofs
end
## traverse vocabs methods.
def each_terminal(&block)
@terminals.each(&block)
end
def each_nonterminal(&block)
@nonterminals.each(&block)
end
def each_token(&block)
tokens().each(&block)
end
end # end Vocab
将"%token id , ( )"这一行内容识别为四个TERMINAL是由函数add_terminal完成的,它使用了正则表达式。容易推测,Rule也使用了这种方法:
###### File : algo.rb #############
##################################
# 一个Rule对象即代表一个语法规则(生成式)
##################################
class Rule
# lt : Nonterminal & NULL
# rt : sequence of Vocab
@@match_rule = /(/w+)/s*:=/s*(.*):(.*)/
def initialize(lt, rt, action)
@lt, @rt, @action = lt, rt, action
end
def Rule.parse(rule_plain_text)
matches = @@match_rule.match(rule_plain_text)
return nil if !matches
begin
lts = matches[1]
rts = matches[2].strip()
action = matches[3].strip()
rta = rts.split(//s+/)
return Rule.new(lts, rta, action)
rescue
return nil
end
end
def vocabs
tokens = Array.new
tokens.push(@lt)
tokens.concat(@rt)
tokens.uniq!
return tokens
end
def to_s
"#{@lt} = #{@rt.join(" ")} : #{@action}"
end
def eql?(other)
return @lt.eql?(other.lt) && @rt.eql?(other.rt)
end
alias :== eql?
attr_reader :lt, :rt, :action
end
显然,Yacc面临的第1个问题就是分析rule_file的内容。Yacc类本身不应该实现这个功能,因为还有一个功能是生成DFA,这是两个没有多大关系的功能,按照SRP(单一职责原则),不应该在一个类里实现。
按照这个设计原则,很容易做出的决定,需要一个类Vocab识别rule_file定义的所有符号(TERMINAL,NONTERMINAL,EOF,START_SYMBOL)。另外需要一个类识别每一个Rule定义。
这两个类的功能很单一,接口也不会太复杂。
class TestCompiler < Test::Unit::TestCase
def test_vocab
vocab = Vocab.new
assert( vocab.identify("nil") == Vocab::NULL )
assert( vocab.identify("$") == Vocab::EOF )
assert( vocab.identify("function") == Vocab::UNKNOWN )
vocab.add_terminal("%token )")
assert( vocab.identify(")") == Vocab::TERMINAL )
vocab.add_terminal("%token function id")
assert( vocab.identify("function") == Vocab::TERMINAL )
assert( vocab.identify("id") == Vocab::TERMINAL )
assert( vocab.identify("ids") == Vocab::UNKNOWN )
vocab.add_nonterminal("proc")
assert( vocab.identify("proc") == Vocab::NONTERMINAL )
vocab.add_nonterminals(%w{kick sanf})
assert( vocab.identify("kick") == Vocab::NONTERMINAL )
assert( vocab.identify("sanf") == Vocab::NONTERMINAL )
end
def test_rule
rule = Rule.parse("function_decl := /
function function_name ( argument_list ) ; : decl")
assert(rule, "parse rule failed")
assert(rule.vocabs.include?("function_decl"))
assert(rule.vocabs.include?("function"))
assert(rule.vocabs.include?("function_name"))
assert(rule.vocabs.include?("argument_list"))
assert(rule.lt == "function_decl")
assert(rule.rt == %w{function function_name ( argument_list ) ;})
assert(rule.action == "decl")
end
end
同样,实现他们也很简单。
###### File : algo.rb #############
##############################
# Vocab
# 该类会存储一个syntax define中的
# 所有符号,包括terminal, nonterminal
# nil(空), $(结束)
##############################
class Vocab
### @types
TERMINAL = 1
NONTERMINAL = 2
NULL = 3
EOF = 4
UNKNOWN = 5
### @vocabs list
@@nulls = ["nil"]
@@eofs = ["$"]
###
@@terminal_match = /^%token/s+(.*)$/
# @terminals 终结符的集合
# @nonterminals 非终结符的集合
def initialize
@terminals = Array.new
@nonterminals = Array.new
end
# @identify
# 判断一个符号名字属于哪一种符号
def identify(name)
return TERMINAL if @terminals.include?(name)
return NULL if @@nulls.include?(name)
return EOF if @@eofs.include?(name)
return NONTERMINAL if @nonterminals.include?(name)
return UNKNOWN
end
def Vocab.type_name(type)
Vocab.constants.each do |x|
return x if eval(x) == type
end
return "error type"
end
def Vocab.nulls
@@nulls
end
def Vocab.eofs
@@eofs
end
# 分析一个token定义语句并将其定义的所有符号加入集合
# 如果定义语句有错误,返回nil
def add_terminal(term_def_text)
# %token term1, term2, term3 ...
matches = @@terminal_match.match(term_def_text.strip())
return nil if !matches
# then tokens--matches[1] be (term1, term2, term3 ...)
tokens = matches[1].strip()
# erase all whitespaces in tokens
#tokens.gsub!(//s+/, "")
# split to singleton token
@terminals.concat(tokens.split(//s+/))
@terminals.uniq!
@terminals
end
# 加入非终结符集合
def add_nonterminal(name)
@nonterminals.push(name) if identify(name) == UNKNOWN &&
!@nonterminals.include?(name)
@nonterminals.uniq!
@nonterminals
end
def add_nonterminals(tokens)
tokens.each {|x| add_nonterminal(x)}
end
def tokens
return @terminals + @nonterminals + @@nulls + @@eofs
end
## traverse vocabs methods.
def each_terminal(&block)
@terminals.each(&block)
end
def each_nonterminal(&block)
@nonterminals.each(&block)
end
def each_token(&block)
tokens().each(&block)
end
end # end Vocab
将"%token id , ( )"这一行内容识别为四个TERMINAL是由函数add_terminal完成的,它使用了正则表达式。容易推测,Rule也使用了这种方法:
###### File : algo.rb #############
##################################
# 一个Rule对象即代表一个语法规则(生成式)
##################################
class Rule
# lt : Nonterminal & NULL
# rt : sequence of Vocab
@@match_rule = /(/w+)/s*:=/s*(.*):(.*)/
def initialize(lt, rt, action)
@lt, @rt, @action = lt, rt, action
end
def Rule.parse(rule_plain_text)
matches = @@match_rule.match(rule_plain_text)
return nil if !matches
begin
lts = matches[1]
rts = matches[2].strip()
action = matches[3].strip()
rta = rts.split(//s+/)
return Rule.new(lts, rta, action)
rescue
return nil
end
end
def vocabs
tokens = Array.new
tokens.push(@lt)
tokens.concat(@rt)
tokens.uniq!
return tokens
end
def to_s
"#{@lt} = #{@rt.join(" ")} : #{@action}"
end
def eql?(other)
return @lt.eql?(other.lt) && @rt.eql?(other.rt)
end
alias :== eql?
attr_reader :lt, :rt, :action
end