时间高效过滤词算法

过滤词是做游戏服务器必须面对的一个问题,选一个高效的方法尤其重要。下面以C++实现的一种过滤词算法,过滤效率还是很高的。


算法输入为:

abcd
acb
acd
bcd
ac
ab


构建的树结构图,红色字母表示是一个终点。



测试字符串:输出结果

abc : **c
acd : ***
acdef : ***ef
efacd : ef***
efacdefacd : ef***ef***
ac : **
acef : **ef
efac : ef**
efacefac : ef**ef**



头文件 wfilter_mgr.h

class wfilter_mgr
	{
	private:
		class wnode
		{
		public:
			typedef std::map<wchar_t, wnode*> wnext;
			typedef wnext::const_iterator const_iterator;
			typedef wnext::iterator iterator;

		public:
			wnode();
			~wnode();

			void set_flag() { m_flag = true; }
			bool get_flag()const { return m_flag; }
			wnode** add_node(wchar_t c, wnode* p);

			void longest_match(const wchar_t* c, int index, int& last_index);
			bool shortest_match(const wchar_t* c, int index);

			static wnode* construct();
			static void release(wnode* p);

		public:
			iterator begin() { return m_next.begin(); }
			iterator find(wchar_t c) { return m_next.find(c); }
			iterator end() { return m_next.end(); }
			bool empty() { return m_next.empty(); }

			const_iterator begin()const { return m_next.begin(); }
			const_iterator find(wchar_t c) const { return m_next.find(c); }
			const_iterator end() const { return m_next.end(); }
			bool empty() const { return m_next.empty(); }

		private:
			wnext		m_next;		//下一节点指针
			bool		m_flag;		//是否达到节点的结尾
		};
	public:
		wfilter_mgr();
		~wfilter_mgr();

		void destroy();

		bool empty() const;
		void add_word(std::wstring::const_iterator _first, std::wstring::const_iterator _last);
		void load(const std::wstring& wstr);
		void unload();
		bool has_filter(const std::wstring& wstr) const;
		void parse(std::wstring& wstr);
		
	private:
		wnode*			m_node;
	};



cpp文件 wfilter_mgr.cpp

wfilter_mgr::wnode::wnode() : m_flag(false)
	{

	}

	wfilter_mgr::wnode::~wnode()
	{
		for (wnext::iterator it = begin(); it != end(); ++it)
		{
			if (it->second)
			{
				delete it->second;
			}
		}
	}

	wfilter_mgr::wnode** wfilter_mgr::wnode::add_node(wchar_t c, wnode* p)
	{
		std::pair<wnext::iterator, bool> pi = m_next.insert(std::make_pair(c, p));
		if (pi.second)
		{
			return &(pi.first->second);
		}
		return NULL;
	}
	
	void wfilter_mgr::wnode::longest_match(const wchar_t* c, int index, int& last_index)
	{
		for (wnode* node = this;;++c, ++index)
		{
			if (node->get_flag())
			{
				last_index = index;
			}

			if (L'\0' == *c)
			{
				return;
			}

			const_iterator it = node->find(*c);
			if (node->end() != it)
			{
				if (!it->second)
				{
					last_index = index + 1;
					return;
				}
				node = it->second;
				continue;
			}
			break;
		}
	}

	bool wfilter_mgr::wnode::shortest_match(const wchar_t* c, int index)
	{
		for (wnode* node = this;;++c, ++index)
		{
			if (node->get_flag())
			{
				return true;
			}

			if (L'\0' == *c)
			{
				return false;
			}

			const_iterator it = node->find(*c);
			if (node->end() != it)
			{
				if (!it->second)
				{
					return true;
				}
				node = it->second;
				continue;
			}
			break;
		}

		return false;
	}

	wfilter_mgr::wnode* wfilter_mgr::wnode::construct()
	{
		return new wnode();
	}
	void wfilter_mgr::wnode::release(wnode* p)
	{
		if (NULL == p)
			return;
		delete p;
	}

	wfilter_mgr::wfilter_mgr() : m_node(NULL)
	{

	}

	wfilter_mgr::~wfilter_mgr()
	{
	}

	void wfilter_mgr::destroy()
	{
		unload();
	}

	bool wfilter_mgr::empty() const
	{
		return m_node == NULL; 
	}

	void wfilter_mgr::add_word(std::wstring::const_iterator _first, std::wstring::const_iterator _last)
	{
		wnode** node = &m_node;
		int flag = 0;
		for (std::wstring::const_iterator itr = _first; itr != _last; ++itr)
		{
			if (!(*node))
			{
				*node = wnode::construct();
			}
			wnode::iterator it = (*node)->find(*itr);
			if ((*node)->end() == it)
			{
				if (flag == 1)
				{
					(*node)->set_flag();
					flag = 2;
				}
				node = (*node)->add_node(*itr, NULL);
			}
			else
			{
				node = &(it->second);
				if (flag == 0)
				{
					flag = 1;
				}
			}
		}
		if (*node)
		{
			(*node)->set_flag();
		}
	}

	void wfilter_mgr::load(const std::wstring& wstr)
	{
		std::wstring::const_iterator _first;
		bool flag = false;
		wchar_t c;
		for (std::wstring::const_iterator cit = wstr.begin(); cit != wstr.end(); ++cit)
		{
			c = *cit;
			if (c == 0)
			{
				break;
			}
			if (c == '\r' || c == '\n')
			{
				if (flag)
				{
					add_word(_first, cit);
					flag = false;
				}
			}
			else if (!flag)
			{
				_first = cit;
				flag = true;
			}
		}
		if (flag)
		{
			add_word(_first, wstr.end());
		}
	}

	void wfilter_mgr::unload()
	{
		wnode::release(m_node);

		m_node = NULL;
	}

	bool wfilter_mgr::has_filter(const std::wstring& wstr) const
	{
		if (!m_node || wstr.empty())
		{
			return false;
		}
		const wchar_t* sBuf = wstr.data();
		for (int i = 0; L'\0' != sBuf[i];)
		{
			if (m_node->shortest_match(sBuf + i, i))
			{
				return true;
			}
			else
			{
				i++;
			}
		}
		return false;
	}

	void wfilter_mgr::parse(std::wstring& str)
	{
		if (!m_node || str.empty())
		{
			return;
		}
		const wchar_t* sBuf = str.data();
		for (int i = 0; L'\0' != sBuf[i];)
		{
			int last_index = 0;
			m_node->longest_match(sBuf + i, i, last_index);
			if (last_index > i)
			{
				for (int j = i;j < last_index;j++)
				{
					str[j] = '*';
				}
				i = last_index;
			}
			else
			{
				i++;
			}
		}
	}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值