1.创建索引实例代码
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.swing.filechooser.FileFilter;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class Indexer {
private IndexWriter writer;
private Analyzer analyzer;
List<Document> documents = new ArrayList<Document>();
public static void main(String[] args) {
String dataDir = "E:/lucene/data";
String indexDir = "E:/lucene/index";
try {
Indexer indexer = new Indexer(indexDir);
indexer.index(dataDir, new TextFilesFilter());
indexer.writer.commit();
System.out.println(indexer.writer.numDocs());
indexer.writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public Indexer(String indexDir) throws IOException{
Directory dir = FSDirectory.open(new File(indexDir));
analyzer = new IKAnalyzer();
LogMergePolicy mergePolicy = new LogDocMergePolicy();
// 索引基本配置
// 设置segment添加文档(Document)时的合并频率
// 值较小,建立索引的速度就较慢
// 值较大,建立索引的速度就较快,>10适合批量建立索引
mergePolicy.setMergeFactor(30);
// 设置segment最大合并文档(Document)数
// 值较小有利于追加索引的速度
// 值较大,适合批量建立索引和更快的搜索
mergePolicy.setMaxMergeDocs(5000);
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
//IndexWriterConfig indexWriterConfig=new IndexWriterConfig(Version.LUCENE_43,new StandardAnalyzer(Version.LUCENE_43));
indexWriterConfig.setMaxBufferedDocs(10000);
indexWriterConfig.setMergePolicy(mergePolicy);
indexWriterConfig.setRAMBufferSizeMB(64);
// /设置索引的打开模式 创建或者添加索引
indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
writer = new IndexWriter(dir, indexWriterConfig);
}
//将File信息写入document
private Document getDocument(File f) throws IOException{
Document document = new Document();
document.add(new StringField("name", f.getName(), Store.YES));
document.add(new TextField("content", "我爱你中国", Store.YES));
document.add(new StringField("fullpath", f.getCanonicalPath(),Store.YES));
document.add(new StringField("updateTime", String.valueOf(f.lastModified()),Store.YES));
return document;
}
private List<Document> getDocuments(File [] files, FileFilter filesFilter) throws IOException{
for(File f : files){
if(f.isDirectory()){
getDocuments(f.listFiles(),filesFilter);
}else{
if(!f.isHidden() && f.canRead() && (filesFilter != null && filesFilter.accept(f))){
documents.add(getDocument(f));
}
}
}
return documents;
}
//写入索引
private void indexFile(File [] files, FileFilter filesFilter) throws IOException{
List<Document> documents = getDocuments(files, filesFilter);
writer.addDocuments(documents);
}
private void index(String dataDri, TextFilesFilter filesFilter){
File [] files = new File(dataDri).listFiles();
try {
indexFile(files, new TextFilesFilter());
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 过滤器,只索引txt格式文件
* @author ANWJ
*
*/
private static class TextFilesFilter extends FileFilter{
@Override
public boolean accept(File f) {
// TODO Auto-generated method stub
return f.getName().toLowerCase().endsWith(".txt");
}
@Override
public String getDescription() {
// TODO Auto-generated method stub
return null;
}
}
}
2.检索索引实例代码
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class Searcher {
public static void search(String indexDir, String key) throws IOException, ParseException{
Directory directory = FSDirectory.open(new File(indexDir));
SearcherManager sm = new SearcherManager(directory,new SearcherFactory());
IndexSearcher searcher = sm.acquire();
Analyzer analyzer = new IKAnalyzer();
QueryParser parser = new QueryParser(Version.LUCENE_43, "content", analyzer);
Query query = parser.parse(key);
TopDocs hits = searcher.search(query, 10);
for(ScoreDoc doc : hits.scoreDocs){
Document document = searcher.doc(doc.doc);
System.out.println(document.get("content"));
}
}
public static void main(String[] args) {
String indexDir = "E:/lucene/index";
String key = "中国";
try {
search(indexDir, key);
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
}