源文件结构:
CreateIndex.java:
package com.sxt.lucene;
import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class CreateIndex {
public static final String indexDir="E:/study/Eclipse_Project/lucene/index";
public static final String dataDir="E:/study/Eclipse_Project/lucene/data";
public void createIndex() throws IOException{
//索引文件目录
Directory directory=FSDirectory.open(FileSystems.getDefault().getPath(indexDir));
//分词器
Analyzer analyzer=new StandardAnalyzer();
//建立索引的配置文件
IndexWriterConfig config=new IndexWriterConfig(analyzer);
/*
* 追加索引还是创建索引
* 若为创建索引,则当文件夹中有索引时,会抛出异常
*/
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
//创建一个IndexWriter对象为待搜索的文档创建索引文件
IndexWriter writer=new IndexWriter(directory , config);
//遍历源文件目录
File file=new File(dataDir);
File files[]=file.listFiles();
for(File f:files){
Document doc=new Document();
/*其实Document对象相当于数据库中的表,为它添加的Field(域)相当于数据表中的字段。在写入索引
* 文件indexWriter.addDocument(document)操作时,相当于为单词词库中的关键词建立相
* 应倒排文件的指针,产生一种映射关系。在这里"content"域静态变量可以设置为TYPE_NOT_STORED,
* 因为倒排文件已经保存了"filepath",搜索过后可以通过路径再到硬盘中得到文本内容。
*/
//为文档添加域
doc.add(new StringField("filename", f.getName(), Field.Store.YES));
doc.add(new TextField("content", FileUtils.readFileToString(f), Field.Store.YES));
doc.add(new StringField("lastModify", f.lastModified()+"", Field.Store.YES));
//将文档添加到IndexWriter对象中创建索引文件
writer.addDocument(doc);
}
writer.close();
}
public static void main(String[] args) throws IOException {
CreateIndex test=new CreateIndex();
test.createIndex();
}
}
SearchIndex.java:
package com.sxt.lucene;
import java.io.IOException;
import java.nio.file.FileSystems;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class SearchIndex {
public static final String indexDir="E:/study/Eclipse_Project/lucene/index";
public static final String dataDir="E:/study/Eclipse_Project/lucene/data";
public void search() throws Exception{
//索引文件目录
Directory directory=FSDirectory.open(FileSystems.getDefault().getPath(indexDir));
//创建IndexSearcher对象
IndexReader reader=DirectoryReader.open(directory);
IndexSearcher searcher=new IndexSearcher(reader);
//创建查询对象Query
//第一个参数为搜索的属性,第二个参数为分词器
QueryParser qp=new QueryParser("content", new StandardAnalyzer());
//参数为需要搜索的词
Query query=qp.parse("java");
// 10为返回的最大值,在分页的时候使用
TopDocs docs=searcher.search(query, 10);
//得到搜索结果
ScoreDoc[] scoreDoc=docs.scoreDocs;
for(ScoreDoc sc:scoreDoc){
// 取对象document的对象id
int docID = sc.doc;
// 根据ID去document对象
Document document = searcher.doc(docID);
System.out.println(document.get("filename"));
}
reader.close();
}
public static void main(String[] args) throws Exception {
SearchIndex test=new SearchIndex();
test.search();
}
}