java实现敏感字过滤工具类

实现敏感字的过滤思路:

1. 读取敏感词库;

2. 确认字符串中是否有敏感词库中的敏感词

以下提供判断是否有敏感词的方法boolean checkSenstiveWord()和将敏感词转换为*字符的方法String filterInfoAfter().

工具类:



import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * 
 * blog.csdn.net/hubiao_0618/article/details/45076871
 * 
 *
 */
public class SensitiveWord {
	private StringBuilder replaceAll;// 初始化
	private String encoding = "UTF-8";
	private String replceStr = "*";
	private int replceSize = 500;
	private static final String fileName = "CensorWords.txt";
	private List<String> arrayList;
	public Set<String> sensitiveWordSet;
	public List<String> sensitiveWordList;

	public SensitiveWord(String replceStr, int replceSize) {
		this.replceStr = fileName;
		this.replceSize = replceSize;
	}

	public SensitiveWord() {
	}

	public StringBuilder getReplaceAll() {
		return replaceAll;
	}

	public void setReplaceAll(StringBuilder replaceAll) {
		this.replaceAll = replaceAll;
	}

	public String getReplceStr() {
		return replceStr;
	}

	public void setReplceStr(String replceStr) {
		this.replceStr = replceStr;
	}

	public int getReplceSize() {
		return replceSize;
	}

	public void setReplceSize(int replceSize) {
		this.replceSize = replceSize;
	}

	public List<String> getArrayList() {
		return arrayList;
	}

	public void setArrayList(List<String> arrayList) {
		this.arrayList = arrayList;
	}

	public String getEncoding() {
		return encoding;
	}

	public void setEncoding(String encoding) {
		this.encoding = encoding;
	}

	/**
	 * 将敏感字转换为*符号
	 * 
	 * @param str
	 * @return
	 */
	public String filterInfo(String str) {
		sensitiveWordSet = new HashSet<String>();
		sensitiveWordList = new ArrayList<>();
		StringBuilder buffer = new StringBuilder(str);
		HashMap<Integer, Integer> hash = new HashMap<Integer, Integer>(arrayList.size());
		String temp;
		for (int x = 0; x < arrayList.size(); x++) {
			temp = arrayList.get(x);
			int findIndexSize = 0;
			for (int start = -1; (start = buffer.indexOf(temp, findIndexSize)) > -1;) {
				findIndexSize = start + temp.length();
				Integer mapStart = hash.get(start);
				if (mapStart == null || (mapStart != null && findIndexSize > mapStart)) {
					hash.put(start, findIndexSize);
				}
			}
		}
		Collection<Integer> values = hash.keySet();
		for (Integer startIndex : values) {
			Integer endIndex = hash.get(startIndex);
			String sensitive = buffer.substring(startIndex, endIndex);
			if (!sensitive.contains("*")) {
				sensitiveWordSet.add(sensitive);
				sensitiveWordList.add(sensitive);
			}
			buffer.replace(startIndex, endIndex, replaceAll.substring(0, endIndex - startIndex));
		}
		hash.clear();
		return buffer.toString();
	}

	/**
	 * 初始化读取铭感文件库
	 */
	public void InitializationWork() {
		replaceAll = new StringBuilder(replceSize);
		for (int x = 0; x < replceSize; x++) {
			replaceAll.append(replceStr);
		}
		arrayList = new ArrayList<String>();
		InputStreamReader read = null;
		BufferedReader bufferedReader = null;
		try {
			read = new InputStreamReader(SensitiveWord.class.getClassLoader().getResourceAsStream(fileName), encoding);
			bufferedReader = new BufferedReader(read);
			for (String txt = null; (txt = bufferedReader.readLine()) != null;) {
				if (!arrayList.contains(txt))
					arrayList.add(txt);
			}
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if (null != bufferedReader)
					bufferedReader.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
			try {
				if (null != read)
					read.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

	/**
	 * 判断是否有敏感词汇
	 * 
	 * @param str
	 * @return
	 */
	public static boolean checkSenstiveWord(String str) {
		// 初始敏感词库
		SensitiveWord sw = new SensitiveWord();
		sw.InitializationWork();
		str = sw.filterInfo(str);
		if (str.contains("*")) {
			return true;
		}
		return false;
	}

	public static String filterInfoAfter(String str) {
		// 初始敏感词库
		SensitiveWord sw = new SensitiveWord();
		sw.InitializationWork();
		str = sw.filterInfo(str);
		return str;
	}

	
}

测试类



public class WordTest {
	public static void main(String[] args) {
		long startNumer = System.currentTimeMillis();
		String str = "玉蒲团哦检票口还是看黑白电视不娱乐透视";
		boolean flag = SensitiveWord.checkSenstiveWord(str);
		System.out.println("字符串的长度为:" + str.length());
		str = SensitiveWord.filterInfoAfter(str);
		System.out.println("含有敏感词汇:" + flag);
		long endNumber = System.currentTimeMillis();
		System.out.println("消耗时间为" + (endNumber - startNumer) + "ms");
		System.out.println("转换后的字符串为:\n" + str);
	}

}

敏感词库放的位置是再src根目录下. 

敏感词汇可以去这个位置下载链接:

https://pan.baidu.com/s/1fbmRFhiIdFsMOz5eOJa7eA 密码:qi2b

  • 7
    点赞
  • 30
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值