/**
* 经过该方法可以把分词后的结果输出
* @param analyzer
* @param text
* @throws Exception
*/
private void testAnalyzer(Analyzer analyzer,String text)throws Exception{
TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(text));
tokenStream.addAttribute(TermAttribute.class);
while(tokenStream.incrementToken()){
TermAttribute termAttribute = tokenStream.getAttribute(TermAttribute.class);
System.out.println(termAttribute.term());
}
}
/**
* 主要針對汉语
* 英语
*
* 汉语
* @author Administrator
*
*/
public class AnalyzerTest {
@Test
public void testEn() throws Exception{
/**
* Creates a searcher searching the index in the named directory
*/
/**
* 1、切分关键词
* 2、去掉停用词
* 3、把大写转化成小写
*/
String text = "Creates a searcher searching the index in the named directory";
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
this.testAnalyzer(analyzer, text);
}
@Test
public void testZH() throws Exception{
/**
* 单字分词
*/
Analyzer analyzer = new ChineseAnalyzer();
String text = "传智播客的黎活明是UFO";
this.testAnalyzer(analyzer, text);
}
@Test
public void testZH2() throws Exception{
/**
* 单字分词
*/
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
String text = "传智播客的黎活明是UFO";
this.testAnalyzer(analyzer, text);
}
@Test
public void testZH3() throws Exception{
Analyzer analyzer = new IKAnalyzer();
String text = "北京美女";
this.testAnalyzer(analyzer, text);
}
/**
* 经过该方法可以把分词后的结果输出
* @param analyzer
* @param text
* @throws Exception
*/
private void testAnalyzer(Analyzer analyzer,String text)throws Exception{
TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(text));
tokenStream.addAttribute(TermAttribute.class);
while(tokenStream.incrementToken()){
TermAttribute termAttribute = tokenStream.getAttribute(TermAttribute.class);
System.out.println(termAttribute.term());
}
}
}