用Ruby写个NFA

    今天有点空闲,想想用Ruby写个NFA试试。从正则表达式构造NFA采用经典的Thompson算法:正则表达式 -> 后缀表达式 -> 构造NFA。构造了NFA后,用之匹配字符串。一句话,写了个玩具的正则表达式引擎,支持concatenation、alternation以及 *、?、+量词,不支持反向引用和转义符。测试了下与Ruby自带的正则表达式引擎的性能对比,慢了3倍。构造NFA没什么问题,主要是匹配运行写的烂,有空再改改。

 

nfa.rb

module NFA
  class NFA
    def initialize(state)
      @state=state
    end
    def step(clist,c)
      return clist if clist.size==0;
      nlist=[] 
      allNull = true
      matched = false
      clist.each do |t|
        if !t.nil?
          allNull = false if t.c!=-1
          if t.c == c && t.end.type ==1 then
            matched = true
            nlist.push(t.end.out1) if !t.end.out1.end.nil? 
            nlist.push(t.end.out2) if !t.end.out2.end.nil?
          elsif (t.c == c && t.end.type == 0) then
            matched = true;
            return ListUitls.new_list(t);
          elsif (t.c == -1 && !t.end.nil?) then
            nlist.push(t.end.out1);
            nlist.push(t.end.out2);
          end
        end
      end        
      return step(nlist, c) if (allNull)
      return step(nlist, c) if (!matched)
      nlist
    end
    def test?(s)
      match(@state,s)
    end
    def match(state,s)
      clist =[]
      clist.push(state.out1);
      clist.push(state.out2);
      s.each_byte do |c|
	c =c&0xFF;
	clist = step(clist, c);
        return false if clist.size==0
      end
      return is_match?(clist)
    end
    def is_match?(clist)
      clist.each  do |t|
        return true if !t.nil? and t.c==-1 and t.end and t.end.is_matched? 
      end
      false
    end
  end
  class Paren
    attr_accessor:n_alt,:n_atom
  end
  class State
    attr_accessor :out1,:out2,:type
    def initialize(out1,out2)
      @out1=out1
      @out2=out2
      @type=1
    end
    def is_matched?
      return @type==0
    end
  end
  class Transition
    attr_accessor :c,:end
    def initialize(c)
      @c=c
    end   
  end
  class Frame
    attr_accessor :start,:outs
    def initialize(start,outs)
      @start=start
      @outs=outs
    end
  end
  class ListUitls
    def self.link(list,state)
      list.each{|t| t.end=state}
    end
    def self.append(list1,list2)
      list1+list2
    end
    def self.new_list(out)
      result=[]
      result.push(out)
      result      
    end
  end
  def self.compile(re)
    post = re2post(re)
    raise ArgumentError.new,"bad regexp!" if post.nil?
    state = post2nfa(post);
    raise RuntimeError.new,"construct nfa from postfix fail!" if state.nil?        
    return NFA.new(state);
  end
  def self.post2nfa(postfix)
    stack=[]
    s=nil
    t=t1=t2=nil 
    e1=e2=e=nil 
    return nil if postfix.nil?
    postfix.each_byte do |p|
      case p.chr
      when '.':
        e2 = stack.pop() 
        e1 = stack.pop() 
        ListUitls.link(e1.outs, e2.start) 
        stack.push(Frame.new(e1.start, e2.outs)) 
      when '|':
        e2 = stack.pop() 
        e1 = stack.pop() 
        t1 = Transition.new(-1)
        t2 = Transition.new(-1) 
        t1.end = e1.start 
        t2.end = e2.start 
        s = State.new(t1, t2) 
        stack.push(Frame.new(s, ListUitls.append(e1.outs, e2.outs))) 
      when '?':
        e = stack.pop() 
        t1 = Transition.new(-1)
        t2 = Transition.new(-1) 
        t1.end = e.start 
        s = State.new(t1, t2) 
        stack.push(Frame.new(s, ListUitls.append(e.outs, ListUitls.new_list(t2)))) 
      when '*':
        e = stack.pop() 
        t1 = Transition.new(-1)
        t2 = Transition.new(-1)
        t1.end = e.start 
        s = State.new(t1, t2) 
        ListUitls.link(e.outs, s) 
        stack.push(Frame.new(s, ListUitls.new_list(s.out2))) 
      when '+':
        e = stack.pop() 
        t1 = Transition.new(-1) 
        t2 = Transition.new(-1)
        t1.end = e.start 
        s = State.new(t1, t2) 
        ListUitls.link(e.outs, s) 
        stack.push(Frame.new(e.start, ListUitls.new_list(t2))) 
      else
        t = Transition.new(p) 
        s = State.new(t, Transition.new(-1)) 
        stack.push(Frame.new(s, ListUitls.new_list(s.out1))) 
      end
    end
    e = stack.pop() 
    return nil if stack.size()>0
    end_state = State.new(nil, nil) 
    end_state.type=0
    e.outs.each do |tran|
      if tran.c!=-1
        t1 = Transition.new(-1)
        t2 = Transition.new(-1) 
        s=State.new(t1,t2)
        tran.end=s
        s.out1.end=end_state
        s.out2.end=end_state
      else
        tran.end=end_state         
      end
    end
    start = e.start 
    return start 
  end
  def self.re2post(re)
    n_alt = n_atom = 0 
    result=""
    paren=[]
    re.each_byte do |c|
      case c.chr  
      when '(' then
        if (n_atom > 1) then
          n_atom-=1 
          result<<"."
        end
        p =Paren.new 
        p.n_alt = n_alt 
        p.n_atom = n_atom 
        paren.push(p) 
        n_alt = n_atom = 0
      when '|' then
        if (n_atom == 0)
          return nil
        end
        while (n_atom-=1) > 0 
          result<<"."
        end
        n_alt+=1
      when ')' then
        if (paren.size() == 0)
          return nil
        end                
        if (n_atom == 0)
          return nil 
        end
        while (n_atom-=1)>0 
          result<<"." 
        end
        while(n_alt>0)  
          result<<"|" 
          n_alt-=1
        end
        p = paren.pop()
        n_alt = p.n_alt 
        n_atom = p.n_atom 
        n_atom+=1
      when '*','+','?':
        if (n_atom == 0)
          return nil 
        end
        result<<c 
      else 
        if (n_atom > 1) 
          n_atom-=1 
          result<<"."
        end
        result<<c 
        n_atom+=1
      end
    end
    return nil if paren.size()>0
    while ( (n_atom-=1)> 0)
      result<<"." 
    end
    while(n_alt>0)
      n_alt-=1
      result<<"|" 
    end
    result
  end
end

 

使用的话:

 

 nfa = NFA::compile("a(bb)+a(cdf)*")
 assert nfa.test?("abba")
 assert nfa.test?("abbbba")
 assert !nfa.test?("a") 
 assert !nfa.test?("aa") 
 assert nfa.test?("abbacdf")
 assert nfa.test?("abbbbacdfcdf")
 assert !nfa.test?("bbbbacdfcdf")
 assert !nfa.test?("abbbacdfcdf")
 assert !nfa.test?("abbbbacdfdf")
 assert !nfa.test?("abbbbacdfdfg")
 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值