/**
* Created by liuwei on 2017/8/24.
*/
object AnsjTest {
def main(args: Array[String]): Unit = {
val string = "test环境服务器启动方式更新为supervisor启动"
val t = tfidfAnsj(string, List.empty[String], List.empty[String])
println(t)
}
//分词方法
def tfidfAnsj(content: String, stopWordList: List[String], stopNatureList: List[String]): List[String] = {
if (StringUtils.isEmpty(content))
return List.empty[String]
val stopWordListBuffer = ListBuffer.empty[String]
stopWordListBuffer.append(null, "了", "的") //获取系统停用词
stopWordList.foreach(f => stopWordListBuffer.append(f))
val stopWordAll = stopWordListBuffer.toList
val stopNaturesFromDictListBuffer = ListBuffer.empty[String]
stopNaturesFromDictListBuffer.append(null, "w", "m") //获取系统停用词性
stopNatureList.foreach(
f => stopNaturesFromDictListBuffer.append(f)
)
val stopNatureAll = stopNaturesFromDictListBuffer.toList
/*
val stopWordFromDictList = List() //获取系统停用词
val stopWordList_ = stopWordList ::: stopWordFromDictList*/
//加入停用词
FilterModifWord.insertStopWords(stopWordAll.asJava)
/*val stopNaturesFromDictList = List(null, "w", "m") //获取系统停用词性
val stopNatureList_ = stopNatureList ::: stopNaturesFromDictList
*/
//加入停用词性
FilterModifWord.insertStopNatures(stopNatureAll: _*)
val temp = ToAnalysis.parse(content)
//过滤停用
val wordtemp = FilterModifWord.modifResult(temp)
val result = wordtemp.toArray[Term](new Array[Term](0)).map(_.getName).toList
result
}
}
Ansj分词
最新推荐文章于 2020-10-29 14:24:49 发布