所需jar包:lucene-core-3.6.0.jar mmseg4j-all-1.8.5-with-dic.jar
package com.test.com; import java.io.IOException; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import com.chenlb.mmseg4j.analysis.MaxWordAnalyzer; import com.chenlb.mmseg4j.analysis.TokenUtils; public class AnalyzerTest { public static void main(String[] args) { Analyzer analyzer = new MaxWordAnalyzer(); try { String txt = "天气晴朗,马路上的行人缓缓行走."; // ComplexSeg.setShowChunk(true); System.out.println("---------" + txt.length() + "\n" + txt); TokenStream ts = analyzer.tokenStream("text", new StringReader(txt)); for (Token t = new Token(); (t = TokenUtils.nextToken(ts, t)) != null;) { System.out.println(t); } } catch (IOException e) { e.printStackTrace(); } } }
结果:
Java分词
最新推荐文章于 2024-07-06 11:24:09 发布