lucene入门代码一

[b]本代码涉及到的关键类有:
[color=black][size=large]IndexWriter
Directory
Analyzer
Document
Field
IndexSearcher
Term
Query
TermQuery
TopDocs[/size][/color][/b]

代码示例:

package com.yale.lucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
* 创建一个索引
*
*
*
*/
public class Indexer
{
public static void main(String[] args) throws Exception
{
// 创建索引放在这个目录
String indexDir = "F://新建文件夹//luceneTest//indexFile";
// 要索引的文件存在的目录
String dataDir = "F://新建文件夹//luceneTest//dataSource";
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(indexDir);
int numIndexed;
try
{
numIndexed = indexer.index(dataDir, new TextFilesFilter());
}
finally
{
indexer.close();
}
long end =System.currentTimeMillis();
System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");

}

private IndexWriter writer;

public Indexer(String indexDir) throws Exception
{
Directory dir = FSDirectory.open(new File(indexDir));
// 创建IndexWriter
writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30),
true, IndexWriter.MaxFieldLength.LIMITED);

}

// 关闭IndexWriter
public void close() throws Exception
{
writer.close();
}

// 开始索引
public int index(String dir, FileFilter filter) throws Exception
{
File[] files = new File(dir).listFiles();
for (File f : files)
{
if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
&& (filter == null || filter.accept(f)))
{
indexFile(f);
}
}
return writer.numDocs();
}

// 索引文件
private void indexFile(File f) throws Exception
{
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);

}

// 文档设置
private Document getDocument(File f) throws Exception
{
Document doc = new Document();
doc.add(new Field("contents", File2Reader(f), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("filename", f.getName(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES,
Field.Index.NOT_ANALYZED));

return doc;
}

public static String File2Reader(File f) throws Exception
{
BufferedReader bf = new BufferedReader(new InputStreamReader(
new FileInputStream(f)));
StringBuffer sb = new StringBuffer();
for (String line = null; (line = bf.readLine()) != null;)
{
sb.append(line).append("\n");
}
return sb.toString();
}

// 文件类型过滤
private static class TextFilesFilter implements FileFilter
{
@Override
public boolean accept(File pathname)
{

return pathname.getName().toLowerCase().endsWith(".txt");
}
}
}



package com.yale.lucene;

import java.io.File;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Searcher
{
public static void main(String[] args) throws Exception
{
// 索引放在这个目录
String indexDir = "F://新建文件夹//luceneTest//indexFile";
// 要查询的字符串
String queryString = "Apache";
search(indexDir, queryString);
}
//开始搜索
public static void search(String indexDir, String queryString)
throws Exception
{
Directory dir = FSDirectory.open(new File(indexDir));

IndexSearcher is = new IndexSearcher(dir);

QueryParser parser = new QueryParser(Version.LUCENE_30, "contents",
new StandardAnalyzer(Version.LUCENE_30));

Query query = parser.parse(queryString);

long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
System.err.println("找到 " + hits.totalHits + "个文件 在" + (end - start)
+ "毫秒匹配 要查询的字符串 '" + queryString + "'");

for(ScoreDoc scoreDoc :hits.scoreDocs)
{
Document doc = is.doc(scoreDoc.doc);

System.out.println(doc.get("fullpath"));

System.out.println(doc.get("filename"));

System.out.println(doc.get("contents"));
}
is.close();
}
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值