本人最喜欢使用IK分词,可能也是习惯问题吧。mess4j分词也还不错,个人喜好吧,下面简单分享下:
package buildindex;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import utils.ConstantUtil;
import java.io.*;
/**
* Created with IntelliJ IDEA.
* User: wxshi
* Date: 15-2-5
* Time: 下午8:02
* To change this template use File | Settings | File Templates.
*/
public class MYIndex {
private IndexWriter indexWriter = null;
Analyzer analyzer = null;
//构造函数主要获取IndexWriter对象
public MYIndex(){
try {
//索引地址
Directory indexDir = FSDirectory.open(new File(ConstantUtil.INDEX_STORE_PATH));
//分词器使用IK分词器
analyzer = new IKAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
indexWriter = new IndexWriter(indexDir,iwc);
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
//将文档写入索引 建索
public void buildIndex(){
try {
indexWriter.deleteAll();
File index_files = new File(ConstantUtil.INDEX_FILE_PATH);
if(index_files.isDirectory()){
String[] files = index_files.list();
for(String file : files){
File indexFile = new File(index_files, file);
Document doc = getDocument(indexFile);//对文档进行路径和内容处理
System.out.println("正在建立索引 : " + file + "");
indexWriter.addDocument(doc);//构造索引
}
indexWriter.commit();
}
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
//获取建索文档
private Document getDocument(File file){
try {
Document doc = new Document();
FileInputStream fis = new FileInputStream(file);
Reader reader = new BufferedReader(new InputStreamReader(fis,"GBK"));
// TokenStream tokenStream = new IKTokenizer(reader,false); //ik分词流,不采用智能切分
doc.add(new StringField("path" , file.getAbsolutePath(),Field.Store.YES));//添加文档路径
doc.add(new StringField("title" , file.getName(),Field.Store.YES));
doc.add(new TextField("content",reader)); //文本内容,默认不存储
doc.add(new LongField("size",file.length(), Field.Store.YES));
return doc;
} catch (Exception e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
return null;
}
//关闭流
public void close(){
try {
indexWriter.close();
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
}