lucenen是一个实现高性能、进行全文索引和搜索功能的开源库,它是搜索引擎领域的重要组成部分。
以下是关于如何使用lucene实现一个简单的文档索引的一个demo示例,基于lucenen4.x版本,code 如下:
public class Indexer {
private IndexWriter indexWriter ;
public static void main(String[] args) throws IOException {
String indexDir = "/home/drainli/file/index" ;
String dataDir = "/home/drainli/file" ;
int numIndexed ;
Indexer indexer = new Indexer(indexDir) ;
try {
numIndexed = indexer.index(dataDir,new TextFilter());
}catch (Exception e){
System.out.println("exception:" + e.getMessage());
e.printStackTrace();
}finally {
indexer.close();
}
}
public Indexer(String indexDir) throws IOException {
Directory dir = FSDirectory.open(new File(indexDir));
Analyzer analyzer = new StandardAnalyzer() ;
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_0,analyzer) ;
indexWriter = new IndexWriter(dir,config);
}
private Document getDocument(File file) throws IOException {
Document document = new Document();
document.add(new TextField("文档",new FileReader(file)));
document.add(new TextField("文件名",file.getName(),Field.Store.YES));
document.add(new TextField("路径名",file.getCanonicalPath(),Field.Store.YES));
return document ;
}
private int index(String dataDir, FileFilter fileFilter) throws IOException {
File dataFile = new File(dataDir) ;
File[] listFiles = dataFile.listFiles() ;
for (File file : listFiles){
if (!file.isDirectory()
&& file.canRead()
&& !file.isHidden()
&& file.exists()
&& (fileFilter == null || fileFilter.accept(file))){
indexFile(file) ;
}
}
return indexWriter.numDocs();
}
private void indexFile(File file) throws IOException {
System.out.println("indexing file : " + file.getCanonicalPath());
Document document = getDocument(file);
indexWriter.addDocument(document);
}
private void close() throws IOException {
indexWriter.close();
}
}
class TextFilter implements FileFilter {
@Override
public boolean accept(File pathname) {
String fileName = pathname.getName();
return fileName.endsWith(".doc") || fileName.endsWith("docx") ;
}
}
程序运行截图: