一、建立索引部分
XMLFilesIndexer.java
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
public class XMLFilesIndexer {
public void createIndex(String indexFileDir, String sourceDir) {
try {
IndexWriter writer = new IndexWriter(indexFileDir,
new StandardAnalyzer(), true);
System.out.println("Indexing to directory '" + indexFileDir
+ "'...");
indexDocs(writer, new File(sourceDir));
System.out.println("Optimizing...");
writer.optimize();
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
void indexDocs(IndexWriter writer, File file) throws IOException {
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]));
}
}
} else {
if (file.getName().endsWith("xml")) {
System.out.println("adding " + file);
try { writer.addDocument(XMLDocument.documetnByNode(file));
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
}
}
二、检索部分
XMLFilesSearcher.java
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
public class XMLFilesSearcher {
public void search(String keyword, String indexDir)
throws Exception {
String field = "contents";
IndexReader reader = IndexReader.open(indexDir);
Searcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer();
QueryParser parser = new QueryParser(field, analyzer);
Query query = parser.parse(keyword);
Hits hits = searcher.search(query);
for (int i = 0; i < hits.length(); i++) {
Document doc = hits.doc(i);
String path = doc.get("path");
if (path != null) {
System.out.println((i + 1) + ". " + path);
} else {
System.out.println((i + 1) + ". " + "No path for this document");
}
}
reader.close();
}
}
三、应用部分
public class Main {
public static void main(String[] args) {
// XMLFilesIndexer xmlFilesIndexer = new XMLFilesIndexer();
// xmlFilesIndexer.createIndex("Data//index", "Data//data");
XMLFilesSearcher xmlFilesSearcher = new XMLFilesSearcher();
try {
xmlFilesSearcher.search("钱 OR 陈", "Data//index");
} catch (Exception e) {
e.printStackTrace();
}
}
}
XML辅助类:XMLDocument
import java.io.CharArrayReader;
import java.io.File;
import java.io.FileReader;
import nu.xom.Builder;
import nu.xom.Nodes;
import nu.xom.Document;
import nux.xom.pool.XQueryFactory;
import nux.xom.xquery.XQuery;
import org.apache.lucene.document.Field;
public class XMLDocument {
public static org.apache.lucene.document.Document documentByFile(File file){
org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
try{
doc.add(new Field("path", file.getPath(), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field("contents", new FileReader(file)));
}catch(Exception e){
e.printStackTrace();
}
return doc;
}
}