lucene in action的第一章的例子 ,初体验 哈哈
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.*;
/**
* lucene 学习的一个例子s
* User: zhangyong
* Date: 12-7-12
* Time: 下午9:35
* To change this template use File | Settings | File Templates.
*/
public class Indexer {
private IndexWriter writer;
public Indexer(String indexDir) throws IOException {
Directory dir = FSDirectory.open(new File(indexDir));
writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_36), true, IndexWriter.MaxFieldLength.UNLIMITED);
}
public void close() throws IOException { //关闭 indexWriter
writer.close();
}
public int index(String dataDir, FileFilter filter) throws Exception {
File[] files = new File(dataDir).listFiles();
for (File f : files) {
if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()) {
indexFile(f);
}
}
return writer.numDocs();
}
public static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return path.getName().toLowerCase().endsWith(".txt");
}
}
protected Document getDocument(File f) throws Exception {
Document doc = new Document();
// doc.add(new Field("content", new FileReader(f)));
doc.add(new Field("content", new InputStreamReader(new FileInputStream(f.getCanonicalPath()), "utf-8")));
doc.add(new Field("fileName", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
return doc;
}
public void indexFile(File f) throws Exception {
System.out.println("indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);
}
public static void main(String[] args) throws Exception {
String dir = "E:\\lucene";
String dataDir = "E:\\lucene\\data";
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(dir);
int numIndexed;
try {
numIndexed = indexer.index(dataDir, new TextFilesFilter());
} finally {
indexer.close();
}
long end = System.currentTimeMillis();
System.out.println("cost time==" + (end - start));
}
}
package com.diyicai.share.search.test;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
/**
* 例子 1.2
* User: zhangyong
* Date: 12-7-14
* Time: 下午7:37
* To change this template use File | Settings | File Templates.
*/
public class Searcher {
public static void main(String[] args) throws IOException, ParseException {
String indexDir = "E:\\lucene";
String q = "start";
search(indexDir,q);
}
public static void search(String indexDir, String q) throws IOException, ParseException {
Directory dir = FSDirectory.open(new File(indexDir));
IndexSearcher is = new IndexSearcher(dir);
QueryParser parser = new QueryParser(Version.LUCENE_36, "content", new StandardAnalyzer(Version.LUCENE_36));
Query query = parser.parse(q);
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
System.out.println("find " + hits.totalHits);
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get("fileName"));
}
}
}