package cn.yzh.ws.utils;
import java.io.*; import java.util.ArrayList; import java.util.List;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.LineIterator; import org.apache.commons.lang.StringUtils;
/**
Authorl:Mr.王
DateTime:2018/3/23 0023上午 11:19
Function: */ public class BadWord { private final static File wordfilter = new File(PropertiesUtils.get("txt").toString());//词汇库txt地址,自行更改
private static long lastModified = 0L; private static List words = new ArrayList();
private static void checkReload() { if (wordfilter.lastModified() > lastModified) { synchronized (BadWord.class) { try { lastModified = wordfilter.lastModified(); LineIterator lines = FileUtils.lineIterator(wordfilter, "utf-8"); while (lines.hasNext()) { String line = lines.nextLine(); if (StringUtils.isNotBlank(line)) words.add(StringUtils.trim(line).toLowerCase()); } } catch (IOException e) { e.printStackTrace(); } } } }
/**
检查敏感字内容
@param contents */ public static String check(String... contents) { if (!wordfilter.exists()) return null; checkReload(); for (String word : words) { for (String content : contents) if (content != null && content.indexOf(word) >= 0) return word; } return null; }
/**
检查字符串是否包含敏感词
@param content
@return */ public static boolean isContain(String content) { if (!wordfilter.exists()) return false; checkReload(); for (String word : words) { if (content != null && content.indexOf(word) >= 0) return true; } return false; }
/**
替换掉字符串中的敏感词
@param str 等待替换的字符串
@param replaceChar 替换字符
@return */ public static String replace(String str, String replaceChar) { checkReload(); for (String word : words) { if (str.indexOf(word) >= 0) { String reChar = ""; for (int i = 0; i < word.length(); i++) { reChar += replaceChar; } str = str.replaceAll(word, reChar); } } return str; }
public static List lists() { checkReload(); return words; }
/**
添加敏感词
@param word
@throws IOException */ public static void add(String word) throws IOException { word = word.toLowerCase(); if (!words.contains(word)) { words.add(word); FileWriter fw = null; try { //如果文件存在,则追加内容;如果文件不存在,则创建文件 File f = new File(PropertiesUtils.get("txt").toString()); fw = new FileWriter(f, true); } catch (IOException e) { e.printStackTrace(); } PrintWriter pw = new PrintWriter(fw); pw.println(word); pw.flush(); try { fw.flush(); pw.close(); fw.close(); } catch (IOException e) { e.printStackTrace(); } lastModified = wordfilter.lastModified(); } }
/**
删除敏感词
@param word
@throws IOException */ public static void delete(String word) throws IOException { word = word.toLowerCase(); words.remove(word); FileUtils.writeLines(wordfilter, "UTF-8", words); lastModified = wordfilter.lastModified(); }
/**
功能:Java读取txt文件的内容 步骤:
1:先获得文件句柄
2:获得文件句柄当做是输入一个字节码流,需要对这个输入流进行读取
3:读取到输入流后,需要读取生成字节流
4:一行一行的输出。readline()。 备注:需要考虑的是异常情况
Function:去除Txx文档中的重复内容 */ public static void readTxtFile() { try { String encoding = "UTF-8"; File file = new File(PropertiesUtils.get("txt").toString()); if (file.isFile() && file.exists()) { // 判断文件是否存在 InputStreamReader read = new InputStreamReader( new FileInputStream(file), encoding);// 考虑到编码格式 BufferedReader bufferedReader = new BufferedReader(read); String lineTxt = null; ArrayList ayyarsListText = new ArrayList(); // 得到文件中的数据 while ((lineTxt = bufferedReader.readLine()) != null) { ayyarsListText.add(lineTxt); } read.close(); ArrayList resultList = new ArrayList();//结果集将被保存在这个集合中 // 去除文件中的重复数据 for (String item : ayyarsListText) { if (!resultList.contains(item)) { resultList.add(item); } } for (Object resultLists : resultList) { BadWord.addHeavy(String.valueOf(resultLists)); } } else { System.err.println("找不到指定的文件"); } } catch (Exception e) { System.err.println("读取文件内容出错"); e.printStackTrace(); } }
/**
Authorl:Mr.王
DateTime:2018/3/23 0023 下午 18:02
Function:去重成功以后,调用该方法重新写入,请勿自主调用该方法.
Parameter: */ public static void addHeavy(String word) throws IOException { word = word.toLowerCase(); if (!words.contains(word)) { words.add(word); FileUtils.writeLines(wordfilter, "UTF-8", words); lastModified = wordfilter.lastModified(); } }
public static void main(String[] args) throws Exception { // System.out.println(BadWord.replace("钓鱼岛是中国的","*")); // BadWord.add("钓鱼岛"); // BadWord.readTxtFile(); } }