DFA算法过滤敏感词,替换为*

import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.Properties;

public class test {      
    /** 
     * 根节点 
     */  
    private TreeNode rootNode = new TreeNode();  
      
    /** 
     * 关键词缓存 
     */  
    private ByteBuffer keywordBuffer = ByteBuffer.allocate(1024);     
      
    /** 
     * 关键词编码 
     */  
    private String charset = "utf-8";  
  
    /** 
     * 创建DFA 
     * @param keywordList 
     * @throws UnsupportedEncodingException  
     */  
    public void createKeywordTree(List<String> keywordList) throws UnsupportedEncodingException{  
        for (String keyword : keywordList) {  
            if(keyword == null) continue;  
            keyword = keyword.trim();  
            byte[] bytes = keyword.getBytes(charset);  
            TreeNode tempNode = rootNode;  
            for (int i = 0; i < bytes.length; i++) {  
                int index = bytes[i] & 0xff;   
                TreeNode node = tempNode.getSubNode(index);  
                if(node == null){  
                    node = new TreeNode();  
                    tempNode.setSubNode(index, node);  
                }  
                tempNode = node;  
                if(i == bytes.length - 1){  
                    tempNode.setKeywordEnd(true);    
                }  
            }  
        }
    }  
      
   
    public String searchKeyword(String text) throws UnsupportedEncodingException{  
        return searchKeyword(text.getBytes(charset));  
    }  
   
    public String searchKeyword(byte[] bytes){  
        StringBuilder words = new StringBuilder();  
        if(bytes == null || bytes.length == 0){  
            return words.toString();  
        }  
        TreeNode tempNode = rootNode;  
        int rollback = 0;   
        int position = 0; 
        while (position < bytes.length) {  
            int index = bytes[position] & 0xFF;  
            keywordBuffer.put(bytes[position]); 
            tempNode = tempNode.getSubNode(index);  
            if(tempNode == null){
                position = position - rollback;
                rollback = 0;  
                tempNode = rootNode;      
                keywordBuffer.clear();  
            }  
            else if(tempNode.isKeywordEnd()){  
                keywordBuffer.flip();  
                for (int i = 0; i <= rollback; i++) {
                		bytes[position-i] = 42;
				}
                keywordBuffer.limit(keywordBuffer.capacity());  
                rollback = 1;  
            }else{   
                rollback++; 
            }  
            position++;  
        }  
        String result = null;
         try {
        	 result  =  new String(bytes,"utf-8");  
			
		} catch (Exception e) {
			e.printStackTrace();
		}
		return result;
    }  
      
    public void setCharset(String charset) {  
        this.charset = charset;  
    } 
}

 

import java.util.ArrayList;
import java.util.List;

public class TreeNode {  
    private static final int NODE_LEN = 256;  
      
    /** 
     * true 关键词的终结 ; false 继续 
     */  
    private boolean end = false;   
      
    private List<TreeNode> subNodes = new ArrayList<TreeNode>(NODE_LEN);  
      
    public TreeNode(){  
        for (int i = 0; i < NODE_LEN; i++) {  
            subNodes.add(i, null);  
        }  
    }  
      
    /** 
     * 向指定位置添加节点树 
     * @param index 
     * @param node 
     */  
    public void setSubNode(int index, TreeNode node){  
        subNodes.set(index, node);  
    }  
      
    public TreeNode getSubNode(int index){  
        return subNodes.get(index);  
    }  
      
  
    public boolean isKeywordEnd() {  
        return end;  
    }  
  
    public void setKeywordEnd(boolean end) {  
        this.end = end;  
    }  
}  
 
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值