package com.zsj.test.jcseg;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import com.webssky.jcseg.core.JcsegTaskConfig;
import com.webssky.jcseg.lucene.JcsegAnalyzer4X;
/**
* 中文分词简单测试
* @author hadoop
*
*/
public class Jcseg {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
Analyzer analyzer = new JcsegAnalyzer4X(JcsegTaskConfig.COMPLEX_MODE);
JcsegAnalyzer4X jcseg = (JcsegAnalyzer4X) analyzer;
JcsegTaskConfig jcsegTaskConfig = jcseg.getTaskConfig();
jcsegTaskConfig.setAppendCJKPinyin(true);
jcsegTaskConfig.setAppendCJKSyn(true);
try {
TokenStream tokenStream = analyzer.tokenStream(null,
new StringReader("中华人民共和国成立了 welcome to china"));
org.apache.lucene.analysis.tokenattributes.CharTermAttribute charTermAttribute = tokenStream
.addAttribute(CharTermAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
System.out.println(charTermAttribute.toString());
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}