C++/QT敏感词过滤支持多级匹配

C++/QT敏感词过滤支持多级匹配

敏感词过滤

第一步

  1. 创建一个字典树
    1.内容需要包括存储格式、添加删除节点、是否为敏感词最后一位、是否存在子节点。。。代码如下
    	class TireNode {
    	public:
    	  unsigned short end = 0; //是不是关键词的结尾/关键词等级
    
    	  QMap<QString, TireNode*> subNodes; //当前节点下所有的子节点
    
    	    void addSubNode(QString key, TireNode* node) { //添加节点
       		 subNodes.insert(key, node);
    	 }
    
    	 TireNode* getSubNode(QString key) { //得到节点
       		 return subNodes.value(key);
    	  }
    
    	 unsigned short isKeywordEnd() { //是否指向敏感词的最后一个字符
       		 return end;
    	  }
    
    		bool hasChildren() {	//是否存在子节点
      		  return subNodes.size();
    	 }
    
    	 void setkeywordEnd(unsigned short end) { //设置新添加的敏感词最后一个字符为end
        	this->end = end;
    	 }
    };
    
  2. 定义一个操作类
	.h文件
class Filter
   {
   public:
 	  Filter();
  	 void UpdateFilterWord();
  	 void addWord(QString,unsigned short err = 0);
  	 void filter(QString&,unsigned short& err);
   private:
  	 TireNode *rootNode;
  	 QMap<QString ,unsigned short> errLevelMap;
  	 QMutex mutex;
   };
.cpp文件
#include "filter.h"
#include <QAction>
Filter::Filter()
{
  rootNode = new TireNode;
}

void Filter::UpdateFilterWord()
{
  QMutexLocker lock(&mutex);
  rootNode = nullptr;
}

void Filter::addWord(QString lineTxt, unsigned short err)
{
  TireNode *tempNode = rootNode;
  for (int i = 0;i < lineTxt.length();i++) {
      QString c = lineTxt.mid(i,1);
      TireNode *node = tempNode->getSubNode(c);
      if (node == nullptr) {
          node = new TireNode();
          tempNode->addSubNode(c, node);
      }
      tempNode = node;
      if (i == lineTxt.length() - 1) {
          tempNode->setkeywordEnd(err);
      }
  }
  errLevelMap[lineTxt] = err;
}

void Filter::filter(QString& lineTxt,unsigned short& err)
{
  QMutexLocker lock(&mutex);
  if (lineTxt.isEmpty()) {
      return;
  }
  bool childrenFilter = false;
  err = 0;
  //unsigned short errLevel;
  QString result;
  QString replacement = "***"; //打码
  TireNode* tempNode = rootNode; //从根节点出发
  unsigned short begin = 0; //从句子的第一个字符开始
  unsigned short position = 0; //指向当前位置
  unsigned short position_max = 0; //指向当前位置
  while (position < lineTxt.length()) {
      QString c = lineTxt.mid(position, 1);
      if (childrenFilter)
          c = lineTxt.mid(position_max, 1);
      tempNode = tempNode->getSubNode(c);
      if (tempNode == nullptr) {
          if (childrenFilter)
          {
              if (position_max == lineTxt.length()-1)
              {
                  result.append(replacement);
                  position = position + 1;
                  begin = position ;

              }
              else
              {
                  //发现敏感词
                  result.append(replacement);
                  position += 1;
                  position_max = position;
                  begin = position;
                  tempNode = rootNode;
                  childrenFilter = false;
              }

          }
          else
          {
              result.append(lineTxt.mid(begin, 1));
              position_max = begin + 1;
              position = position_max;
              begin = position;
              tempNode = rootNode;
              childrenFilter = false;
          }

      }
      else if (tempNode->isKeywordEnd()) {
          unsigned short s = tempNode->isKeywordEnd();
          err > s ? err = err: err = s;
          //err = err | s;
          if (s <= 1)
          {
              if (!childrenFilter)
              {
                  position++;
              }
              position_max++;
          }
          else
          {
              if (tempNode->hasChildren())
              {
                  position = position_max;
                  position_max++;
                  childrenFilter = true;
              }
              else
              {
                  //发现敏感词
                  result.append(replacement);
                  position_max += 1;
                  position = position_max;
                  begin = position;
                  tempNode = rootNode;
                  childrenFilter = false;
              }
          }


      }
      else {
          if (!childrenFilter)
          {
              position++;
          }
          position_max++;
      }
  }
  result.append(lineTxt.mid(begin));
  lineTxt = result;
}


源项目链接: link.

效果:
添加以及匹配
添加以及匹配
匹配字符串结果以及敏感词等级(一级为不屏蔽)
匹配字符串以及敏感词等级

已标记关键词 清除标记
©️2020 CSDN 皮肤主题: 大白 设计师:CSDN官方博客 返回首页