开始学习lucene

lucene in action的第一章的例子 ,初体验 哈哈


import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import java.io.*;

/**
* lucene 学习的一个例子s
* User: zhangyong
* Date: 12-7-12
* Time: 下午9:35
* To change this template use File | Settings | File Templates.
*/
public class Indexer {

private IndexWriter writer;

public Indexer(String indexDir) throws IOException {
Directory dir = FSDirectory.open(new File(indexDir));
writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_36), true, IndexWriter.MaxFieldLength.UNLIMITED);
}

public void close() throws IOException { //关闭 indexWriter
writer.close();
}

public int index(String dataDir, FileFilter filter) throws Exception {
File[] files = new File(dataDir).listFiles();
for (File f : files) {
if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()) {
indexFile(f);
}
}
return writer.numDocs();
}

public static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return path.getName().toLowerCase().endsWith(".txt");
}
}

protected Document getDocument(File f) throws Exception {
Document doc = new Document();
// doc.add(new Field("content", new FileReader(f)));
doc.add(new Field("content", new InputStreamReader(new FileInputStream(f.getCanonicalPath()), "utf-8")));
doc.add(new Field("fileName", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
return doc;
}

public void indexFile(File f) throws Exception {
System.out.println("indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);
}

public static void main(String[] args) throws Exception {
String dir = "E:\\lucene";
String dataDir = "E:\\lucene\\data";
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(dir);
int numIndexed;

try {
numIndexed = indexer.index(dataDir, new TextFilesFilter());
} finally {
indexer.close();
}
long end = System.currentTimeMillis();
System.out.println("cost time==" + (end - start));
}

}





package com.diyicai.share.search.test;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import java.io.File;
import java.io.IOException;

/**
* 例子 1.2
* User: zhangyong
* Date: 12-7-14
* Time: 下午7:37
* To change this template use File | Settings | File Templates.
*/
public class Searcher {

public static void main(String[] args) throws IOException, ParseException {
String indexDir = "E:\\lucene";
String q = "start";
search(indexDir,q);
}

public static void search(String indexDir, String q) throws IOException, ParseException {

Directory dir = FSDirectory.open(new File(indexDir));

IndexSearcher is = new IndexSearcher(dir);

QueryParser parser = new QueryParser(Version.LUCENE_36, "content", new StandardAnalyzer(Version.LUCENE_36));

Query query = parser.parse(q);

long start = System.currentTimeMillis();

TopDocs hits = is.search(query, 10);

long end = System.currentTimeMillis();

System.out.println("find " + hits.totalHits);

for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get("fileName"));
}
}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值