#include <algorithm>
#include <cctype>
#include <cstddef>
#include <cstring>
#include <deque>
#include <iostream>
#include <queue>
#include <stack>
#include <utility>
#include <vector>
template <size_t _MaxStat>
class _state_extractor {
public:
typedef unsigned state_type;
class state_overflow {};
_state_extractor():_next_m(0) {}
state_type get() {
if(!_reclaimer_m.empty()) {
state_type _x=_reclaimer_m.front();
_reclaimer_m.pop();
return _x;
}
if(_next_m==_MaxStat) throw state_overflow();
return _next_m++;
}
void get_back(state_type _x) {
_reclaimer_m.push(_x);
}
private:
std::queue<state_type> _reclaimer_m;
unsigned _next_m;
};
template <size_t _MaxStat>
class regular_expression {
public:
typedef unsigned state_type;
typedef int trans_type;
typedef std::vector<state_type> nfa[_MaxStat][256];
typedef _state_extractor<_MaxStat> state_extractor;
enum {empty};
virtual ~regular_expression() {}
state_type get_begstat() const {
return _begin_m;
}
state_type get_endstat() const {
return _end_m;
}
std::vector<std::vector<state_type> *> &get_endptr_set() {
return _endptr_set_m;
}
void set_begstat(nfa _nfa, state_type _stat) {
for(int _x=0; _x<256; ++_x)
if(!_nfa[_begin_m][_x].empty()) {
_nfa[_stat][_x]=_nfa[_begin_m][_x];
_nfa[_begin_m][_x].clear();
}
_begin_m=_stat;
}
void set_endstat(nfa _nfa, state_type _stat) {
for(std::vector<std::vector<state_type> *>::const_iterator _i=_endptr_set_m.begin(); _i!=_endptr_set_m.end(); ++_i) {
(*_i)->erase(std::find((*_i)->begin(), (*_i)->end(), _end_m));
(*_i)->push_back(_stat);
}
_end_m=_stat;
}
protected:
regular_expression(state_type _begstat, state_type _endstat)
:_begin_m(_begstat)
,_end_m(_endstat) {}
state_type _begin_m, _end_m;
std::vector<std::vector<state_type> *> _endptr_set_m;
private:
regular_expression(const regular_expression &);
};
template <size_t _MaxStat>
class alpha:public regular_expression<_MaxStat> {
typedef regular_expression<_MaxStat> _base_type;
public:
alpha(typename _base_type::nfa _nfa, typename _base_type::state_extractor &_se, typename _base_type::trans_type _trans)
:_base_type(_se.get(), _se.get()) {
_nfa[this->_begin_m][_trans].push_back(this->_end_m);
this->_endptr_set_m.push_back(&_nfa[this->_begin_m][_trans]);
}
};
template <size_t _MaxStat>
class _and:public regular_expression<_MaxStat> {
typedef regular_expression<_MaxStat> _base_type;
public:
_and(typename _base_type::nfa _nfa, typename _base_type::state_extractor &_se, _base_type *_left, _base_type *_right)
:_base_type(_left->get_begstat(), _right->get_endstat()) {
_se.get_back(_left->get_endstat());
_left->set_endstat(_nfa, _right->get_begstat());
this->_endptr_set_m=_right->get_endptr_set();
}
~_and() {
delete _left_m;
delete _right_m;
}
private:
_base_type *_left_m, *_right_m;
};
template <size_t _MaxStat>
class _or:public regular_expression<_MaxStat> {
typedef regular_expression<_MaxStat> _base_type;
public:
_or(typename _base_type::nfa _nfa, typename _base_type::state_extractor &_se, _base_type *_left, _base_type *_right)
:_base_type(_se.get(), _se.get()), _left_m(_left), _right_m(_right) {
_nfa[this->_begin_m][_base_type::empty].push_back(_left_m->get_begstat());
_nfa[this->_begin_m][_base_type::empty].push_back(_right_m->get_begstat());
_nfa[_left_m->get_endstat()][_base_type::empty].push_back(this->_end_m);
_nfa[_right_m->get_endstat()][_base_type::empty].push_back(this->_end_m);
this->_endptr_set_m.push_back(&_nfa[_left_m->get_endstat()][_base_type::empty]);
this->_endptr_set_m.push_back(&_nfa[_right_m->get_endstat()][_base_type::empty]);
}
~_or() {
delete _left_m;
delete _right_m;
}
private:
_base_type *_left_m, *_right_m;
};
template <size_t _MaxStat>
class closure:public regular_expression<_MaxStat> {
typedef regular_expression<_MaxStat> _base_type;
public:
closure(typename _base_type::nfa _nfa, typename _base_type::state_extractor &_se, _base_type *_original)
:_base_type(_se.get(), _se.get()), _original_m(_original) {
_nfa[this->_begin_m][_base_type::empty].push_back(_original_m->get_begstat());
_nfa[this->_begin_m][_base_type::empty].push_back(this->_end_m);
_nfa[_original_m->get_endstat()][_base_type::empty].push_back(_original_m->get_begstat());
_nfa[_original_m->get_endstat()][_base_type::empty].push_back(this->_end_m);
this->_endptr_set_m.push_back(&_nfa[_original_m->get_endstat()][_base_type::empty]);
}
~closure() {
delete _original_m;
}
private:
_base_type *_original_m;
};
class lexer {
public:
class bad_token {};
lexer(const std::string &_src):_src_m(_src), _iter_m(_src_m.begin()) {}
bool is_already_over() {
leave_out_space();
return _iter_m==_src_m.end();
}
lexer &operator>>(char &_c) {
leave_out_space();
if(!isalnum(*_iter_m)&&*_iter_m!='|'&&*_iter_m!='('&&*_iter_m!=')'&&*_iter_m!='*') throw bad_token();
_c=*_iter_m++;
return *this;
}
private:
void leave_out_space() {
std::string::const_iterator _enditer;
while(_iter_m!=_enditer&&isspace(*_iter_m)) ++_iter_m;
}
std::string _src_m;
std::string::const_iterator _iter_m;
};
class post_regular_expression_factor {
public:
std::vector<char> generic(const std::string &_src) const {
std::vector<char> _result;
std::stack<char> _operator_stack;
lexer _lxr(_src);
char _c;
while(!_lxr.is_already_over()) {
_lxr>>_c;
switch(_c) {
case '(':
_operator_stack.push('(');
break;
case ')':
while(_operator_stack.top()!='(') {
_result.push_back(_operator_stack.top());
_operator_stack.pop();
}
_operator_stack.pop();
break;
case '*':
_result.push_back('*');
break;
case '|':
while(!_operator_stack.empty()&&_operator_stack.top()!='(') {
_result.push_back(_operator_stack.top());
_operator_stack.pop();
}
_operator_stack.push('|');
break;
default:
_result.push_back(_c);
}
}
while(!_operator_stack.empty()) {
_result.push_back(_operator_stack.top());
_operator_stack.pop();
}
return _result;
}
};
class regular_expression_nfa_factor {
public:
template <size_t _MaxStat>
std::pair<
typename regular_expression<_MaxStat>::state_type,
typename regular_expression<_MaxStat>::state_type> generic(typename regular_expression<_MaxStat>::nfa _nfa, const std::vector<char> &_vec) const {
typename regular_expression<_MaxStat>::state_extractor _se;
std::deque<regular_expression<_MaxStat> *> _stack;
for(std::vector<char>::const_iterator _i=_vec.begin(); _i!=_vec.end(); ++_i) {
if(isalnum(*_i))
_stack.push_back(new alpha<_MaxStat>(_nfa, _se, *_i));
else if(*_i=='*')
_stack.back()=new closure<_MaxStat>(_nfa, _se, _stack.back());
else if(*_i=='|') {
regular_expression<_MaxStat> *_second=_stack.back();
_stack.pop_back();
regular_expression<_MaxStat> *_first=_stack.back();
_stack.pop_back();
_stack.push_back(new _or<_MaxStat>(_nfa, _se, _first, _second));
}
}
while(_stack.size()>=2) {
regular_expression<_MaxStat> *_first=_stack.front();
_stack.pop_front();
regular_expression<_MaxStat> *_second=_stack.front();
_stack.pop_front();
_stack.push_front(new _and<_MaxStat>(_nfa, _se, _first, _second));
}
std::pair<typename regular_expression<_MaxStat>::state_type,
typename regular_expression<_MaxStat>::state_type> _result(_stack.back()->get_begstat(),
_stack.back()->get_endstat());
delete _stack.back();
return _result;
}
};
class regular_expression_matcher {
public:
template <size_t _MaxStat>
bool match(const char *_srcstr, const char *_patstr) {
post_regular_expression_factor _pref;
std::vector<char> _vec=_pref.generic(_srcstr);
typename regular_expression<_MaxStat>::nfa _nfa;
regular_expression_nfa_factor _renf;
std::pair<typename regular_expression<_MaxStat>::state_type,
typename regular_expression<_MaxStat>::state_type> _r=_renf.generic<_MaxStat>(_nfa, _vec);
return match_impl<_MaxStat>(_nfa, _r.first, _r.second, _patstr);
}
private:
template <size_t _MaxStat>
bool match_impl(typename regular_expression<_MaxStat>::nfa _nfa,
typename regular_expression<_MaxStat>::state_type _beg,
typename regular_expression<_MaxStat>::state_type _end,
const char *_patstr) {
static bool _state_map[_MaxStat];
memset(_state_map, 0, sizeof(bool)*_MaxStat);
std::vector<typename regular_expression<_MaxStat>::state_type> _old_states;
std::vector<typename regular_expression<_MaxStat>::state_type> _new_states;
match_impl_aux<_MaxStat>(_nfa, _beg, _old_states, _state_map);
for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::const_iterator _i=_old_states.begin(); _i!=_old_states.end(); ++_i)
_state_map[*_i]=false;
for(; *_patstr; ++_patstr) {
for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::iterator _i=_old_states.begin(); _i!=_old_states.end(); ) {
for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::const_iterator _j=_nfa[*_i][*_patstr].begin(); _j!=_nfa[*_i][*_patstr].end(); ++_j)
match_impl_aux<_MaxStat>(_nfa, *_j, _new_states, _state_map);
_i=_old_states.erase(_i);
}
for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::iterator _i=_new_states.begin(); _i!=_new_states.end(); ) {
_old_states.push_back(*_i);
_state_map[*_i]=false;
_i=_new_states.erase(_i);
}
}
return std::find(_old_states.begin(), _old_states.end(), _end)!=_old_states.end();
}
template <size_t _MaxStat>
void match_impl_aux(typename regular_expression<_MaxStat>::nfa _nfa,
typename regular_expression<_MaxStat>::state_type _cur,
std::vector<typename regular_expression<_MaxStat>::state_type> &_states,
bool _state_map[_MaxStat]) {
_states.push_back(_cur);
_state_map[_cur]=true;
for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::const_iterator _i=_nfa[_cur][regular_expression<_MaxStat>::empty].begin(); _i!=_nfa[_cur][regular_expression<_MaxStat>::empty].end(); ++_i)
if(!_state_map[*_i])
match_impl_aux<_MaxStat>(_nfa, *_i, _states, _state_map);
}
};
不确定有穷自动机正则表达式求值
最新推荐文章于 2024-09-18 19:26:48 发布