过滤词是做游戏服务器必须面对的一个问题,选一个高效的方法尤其重要。下面以C++实现的一种过滤词算法,过滤效率还是很高的。
算法输入为:
abcd
acb
acd
bcd
ac
ab
构建的树结构图,红色字母表示是一个终点。
测试字符串:输出结果
abc : **c
acd : ***
acdef : ***ef
efacd : ef***
efacdefacd : ef***ef***
ac : **
acef : **ef
efac : ef**
efacefac : ef**ef**
头文件 wfilter_mgr.h
class wfilter_mgr
{
private:
class wnode
{
public:
typedef std::map<wchar_t, wnode*> wnext;
typedef wnext::const_iterator const_iterator;
typedef wnext::iterator iterator;
public:
wnode();
~wnode();
void set_flag() { m_flag = true; }
bool get_flag()const { return m_flag; }
wnode** add_node(wchar_t c, wnode* p);
void longest_match(const wchar_t* c, int index, int& last_index);
bool shortest_match(const wchar_t* c, int index);
static wnode* construct();
static void release(wnode* p);
public:
iterator begin() { return m_next.begin(); }
iterator find(wchar_t c) { return m_next.find(c); }
iterator end() { return m_next.end(); }
bool empty() { return m_next.empty(); }
const_iterator begin()const { return m_next.begin(); }
const_iterator find(wchar_t c) const { return m_next.find(c); }
const_iterator end() const { return m_next.end(); }
bool empty() const { return m_next.empty(); }
private:
wnext m_next; //下一节点指针
bool m_flag; //是否达到节点的结尾
};
public:
wfilter_mgr();
~wfilter_mgr();
void destroy();
bool empty() const;
void add_word(std::wstring::const_iterator _first, std::wstring::const_iterator _last);
void load(const std::wstring& wstr);
void unload();
bool has_filter(const std::wstring& wstr) const;
void parse(std::wstring& wstr);
private:
wnode* m_node;
};
cpp文件 wfilter_mgr.cpp
wfilter_mgr::wnode::wnode() : m_flag(false)
{
}
wfilter_mgr::wnode::~wnode()
{
for (wnext::iterator it = begin(); it != end(); ++it)
{
if (it->second)
{
delete it->second;
}
}
}
wfilter_mgr::wnode** wfilter_mgr::wnode::add_node(wchar_t c, wnode* p)
{
std::pair<wnext::iterator, bool> pi = m_next.insert(std::make_pair(c, p));
if (pi.second)
{
return &(pi.first->second);
}
return NULL;
}
void wfilter_mgr::wnode::longest_match(const wchar_t* c, int index, int& last_index)
{
for (wnode* node = this;;++c, ++index)
{
if (node->get_flag())
{
last_index = index;
}
if (L'\0' == *c)
{
return;
}
const_iterator it = node->find(*c);
if (node->end() != it)
{
if (!it->second)
{
last_index = index + 1;
return;
}
node = it->second;
continue;
}
break;
}
}
bool wfilter_mgr::wnode::shortest_match(const wchar_t* c, int index)
{
for (wnode* node = this;;++c, ++index)
{
if (node->get_flag())
{
return true;
}
if (L'\0' == *c)
{
return false;
}
const_iterator it = node->find(*c);
if (node->end() != it)
{
if (!it->second)
{
return true;
}
node = it->second;
continue;
}
break;
}
return false;
}
wfilter_mgr::wnode* wfilter_mgr::wnode::construct()
{
return new wnode();
}
void wfilter_mgr::wnode::release(wnode* p)
{
if (NULL == p)
return;
delete p;
}
wfilter_mgr::wfilter_mgr() : m_node(NULL)
{
}
wfilter_mgr::~wfilter_mgr()
{
}
void wfilter_mgr::destroy()
{
unload();
}
bool wfilter_mgr::empty() const
{
return m_node == NULL;
}
void wfilter_mgr::add_word(std::wstring::const_iterator _first, std::wstring::const_iterator _last)
{
wnode** node = &m_node;
int flag = 0;
for (std::wstring::const_iterator itr = _first; itr != _last; ++itr)
{
if (!(*node))
{
*node = wnode::construct();
}
wnode::iterator it = (*node)->find(*itr);
if ((*node)->end() == it)
{
if (flag == 1)
{
(*node)->set_flag();
flag = 2;
}
node = (*node)->add_node(*itr, NULL);
}
else
{
node = &(it->second);
if (flag == 0)
{
flag = 1;
}
}
}
if (*node)
{
(*node)->set_flag();
}
}
void wfilter_mgr::load(const std::wstring& wstr)
{
std::wstring::const_iterator _first;
bool flag = false;
wchar_t c;
for (std::wstring::const_iterator cit = wstr.begin(); cit != wstr.end(); ++cit)
{
c = *cit;
if (c == 0)
{
break;
}
if (c == '\r' || c == '\n')
{
if (flag)
{
add_word(_first, cit);
flag = false;
}
}
else if (!flag)
{
_first = cit;
flag = true;
}
}
if (flag)
{
add_word(_first, wstr.end());
}
}
void wfilter_mgr::unload()
{
wnode::release(m_node);
m_node = NULL;
}
bool wfilter_mgr::has_filter(const std::wstring& wstr) const
{
if (!m_node || wstr.empty())
{
return false;
}
const wchar_t* sBuf = wstr.data();
for (int i = 0; L'\0' != sBuf[i];)
{
if (m_node->shortest_match(sBuf + i, i))
{
return true;
}
else
{
i++;
}
}
return false;
}
void wfilter_mgr::parse(std::wstring& str)
{
if (!m_node || str.empty())
{
return;
}
const wchar_t* sBuf = str.data();
for (int i = 0; L'\0' != sBuf[i];)
{
int last_index = 0;
m_node->longest_match(sBuf + i, i, last_index);
if (last_index > i)
{
for (int j = i;j < last_index;j++)
{
str[j] = '*';
}
i = last_index;
}
else
{
i++;
}
}
}