java敏感词过滤,支持由空格逗号分分隔的敏感词

import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class SensitiveWordsUtil {
	// 敏感词
	@SuppressWarnings("rawtypes")
	private static Map sensitiveWords = null;
	// 分隔符,即敏感词分隔的字符,比如:敏感词"中国",输入"中     国"或者"中,国"也能匹配
	private static List<Character> separatorWords = null;

	// list是敏感词集合,separator是分隔符集合
	@SuppressWarnings({ "rawtypes", "unchecked" })
	public static void init(List<String> list, List<Character> separator) {
		separatorWords = separator;

		if (list != null && !list.isEmpty()) {
			// 初始化敏感词,减少不必要的扩容
			sensitiveWords = new HashMap<>(list.size());
			
			// 敏感词对象
			Map nowMap = null;

			// 构建敏感词map
			for (String key : list) {
				nowMap = sensitiveWords;
				// 遍历敏感词
				for (int i = 0; i < key.length(); i++) {
					char keyChar = key.charAt(i);
					// 初始化下一个词
					if (!nowMap.containsKey(keyChar)) {
						Map newWorMap = new HashMap<>();
						newWorMap.put("isEnd", "0");
						nowMap.put(keyChar, newWorMap);
					}
					nowMap = (Map) nowMap.get(keyChar);

					// 敏感词结束时,标识此条构建结束
					if (i == key.length() - 1)
						nowMap.put("isEnd", "1");
				}
			}
		}

	}

	// 替换敏感词
	public static String replaceSensitiveWord(String txt) {
		if (sensitiveWords != null) {
			StringBuilder sb = new StringBuilder(txt);
			// 遍历文本内容
			for (int i = 0, size = txt.length(); i < size; i++) {
				// 检测敏感词长度
				int length = CheckSensitiveWord(txt, i);
				if (length > 0) {
					int end = i + length;
					// 敏感词替换成*
					sb.replace(i, end, star(length));
					// 跳过敏感词,从下一个开始遍历,减少不必要的循环
					i = end - 1;
				}
			}

			return sb.toString();
		}
		return txt;
	}

	@SuppressWarnings({ "rawtypes" })
	private static int CheckSensitiveWord(String txt, int begin) {
		// temp匹配到的长度,match实际长度,适用长匹配,即敏感词"中国","中国人"时,不会在匹配到中国后退出,而是匹配最长敏感词
		int temp = 0, match = 0;
		Map nowMap = sensitiveWords;
		// 从索引开始匹配文本
		for (int i = begin; i < txt.length(); i++) {
			char word = txt.charAt(i);
			// 字符是分隔符,则匹配下个字符,并标识匹配的长度+1
			if (separatorWords != null && separatorWords.contains(word)) {
				temp++;
				continue;
			}
			// 获取敏感词map中该字符
			nowMap = (Map) nowMap.get(word);
			if (nowMap != null) {
				temp++;
				// 查询到isEnd,表示有该敏感词,则修改敏感词实际长度
				if ("1".equals(nowMap.get("isEnd").toString())) {
					match = temp;
				}
			} else
				break;
		}
		return match;
	}

	// 替换同样长度字符为*
	private static String star(int size) {
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < size; i++) {
			sb.append("*");
		}
		return sb.toString();
	}

}

已标记关键词 清除标记
相关推荐
©️2020 CSDN 皮肤主题: 大白 设计师:CSDN官方博客 返回首页