1、IKAnalyzer中文分词功能来自于 org.apache.lucene 库
2、java编写
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public static void main(String[] args) throws IOException {
Analyzer analyzer=new IKAnalyzer();
TokenStream tokenStream = analyzer.tokenStream("", "通往事故核心现场的道路于22日被紧急修通。持续降雨下,路面湿滑,记者发现,较大的泥坑处没过脚背有二三十厘米。为防滑,一块块竹架板铺在地上。");
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()){
System.out.println(charTermAttribute.toString());
}
tokenStream.close();
}
输出结果
3、scala编写
val analyzer = new IKAnalyzer()
val ts:TokenStream = analyzer.tokenStream("", "通往事故核心现场的道路于22日被紧急修通。持续降雨下,路面湿滑,记者发现,较大的泥坑处没过脚背有二三十厘米。为防滑,一块块竹架板铺在地上。")
val ter:CharTermAttribute = ts.addAttribute(classOf[CharTermAttribute])
ts.reset()while (ts.incrementToken ) {
println(ter.toString)
}
ts.close()