中文分词之过滤候选词以及去重
一、 过滤单个词,特殊符号
代码实现:
/**
* 过滤特殊词(单字动词,特殊符号)
* @param inputPath
* @param outPath
* @throws Exception
*/
public static void filterWordFile2File(String inputFileStr,String outFileStr) throws Exception{
File inputFile = new File(inputFileStr);
File outFile = new File(outFileStr);
if(!inputFile.exists()){
throw new Exception("can not read the file");
}
if(!outFile.exists()){
outFile.createNewFile();
}
FileWriter fw=null;
BufferedReader br=null;
try {
br= new BufferedReader(new FileReader(inputFile));
String temp ="";
StringBuilder sb = new StringBuilder();
while((temp = br.readLine()) !=null){
String[] temps &