首先导入hutool的依赖
然后需要写一个NLPUtil工具类。
工具类代码:
public class NLPUtil {
// 敏感词、违禁词 Map
private static final WordTree BADWORDS_TREE = new WordTree();
/**
* 加载敏感词、违禁词
*/
public static void loadBadwords() throws IOException {
String path = "badwords.dic";
ClassPathResource resource = new ClassPathResource(path);
InputStream inputStream = resource.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
String line = null;
while((line = br.readLine())!=null) {
BADWORDS_TREE.addWord(line.trim());
}
br.close();
}
/**
* 敏感词,违禁词检测
*/
public static List<String> checkBadwords(String content) {
if(BADWORDS_TREE.size() == 0) {
try {
loadBadwords();
} catch (IOException e) {
e.printStackTrace();
return new ArrayList<>();
}
}
// 匹配到最长关键词,跳过已经匹配的关键词
String sentence = HtmlUtil.cleanHtmlTag(content);
List<String> words = BADWORDS_TREE.matchAll(sentence, -1, false, true);
List<String> newWords = new ArrayList<>();
// 过滤该词在某个单词里面
for(String word : words) {
if(Validator.isWord(word) && !content.contains(" " + word) && !content.contains(word + " ")) {
continue;
}
newWords.add(word);
}
return newWords;
}
}
关于使用,调用该方法就行:
/**
* 检测文本的敏感词、违禁词
*/
public AjaxResult checkBadwords(@RequestBody KsComment ksComment) {
String content = ksComment.getContent();
if(StringUtils.isEmpty(content)) {
return AjaxResult.error("文本内容不能为空");
}
List<String> badwordsSet = NLPUtil.checkBadwords(content);
return AjaxResult.success(badwordsSet);
}