使用 mmseg4j 1.9.1 使用的是 lucene 4.3 没问题。
package com.artbulb.search.utils;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import com.chenlb.mmseg4j.analysis.MaxWordAnalyzer;
public class Test {
public static void main(String[] args)throws Exception {
//下面这个分词器,是经过修改支持同义词的分词器
//Analyzer analyzer = new ComplexAnalyzer();
//Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
// Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_47);
Analyzer analyzer = new MaxWordAnalyzer();
String text="goods_zone_bate";
TokenStream ts=analyzer.tokenStream("field", new StringReader(text));
CharTermAttribute term=ts.addAttribute(CharTermAttribute.class);
ts.reset();//重置做准备
while(ts.incrementToken()){
System.out.println(term.toString());
}
ts.end();//
ts.close();//关闭流
}
}
如果使用lucene 4.7 需要注释 ts.reset();
package com.artbulb.search.utils;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import com.chenlb.mmseg4j.analysis.MaxWordAnalyzer;
public class Test {
public static void main(String[] args)throws Exception {
//下面这个分词器,是经过修改支持同义词的分词器
//Analyzer analyzer = new ComplexAnalyzer();
//Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
// Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_47);
Analyzer analyzer = new MaxWordAnalyzer();
String text="goods_zone_bate";
TokenStream ts=analyzer.tokenStream("field", new StringReader(text));
CharTermAttribute term=ts.addAttribute(CharTermAttribute.class);
// ts.reset();//重置做准备
while(ts.incrementToken()){
System.out.println(term.toString());
}
ts.end();//
ts.close();//关闭流
}
}