package com.zsj.test;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
/**
* Lu
* @author hadoop
*
*/
public class FirstLucene {
public static void main(String[] args) throws IOException {
/**
* 标准分析器是Lucene内置的分析器,会将语汇单元转成小写形式,
* 并去除停用词及标点符号,很明显也是不适合于中文环境
*/
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
TokenStream tokenStream = analyzer.tokenStream("",
"this is my first lucene");
CharTermAttribute charTermAttribute = tokenStream
.addAttribute(CharTermAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
System.out.println(charTermAttribute.toString());
}
tokenStream.end();
tokenStream.close();
}
}