java lucene 3.0.3,Lucene 3.0.3 入门小例

视觉志

于 2021-03-13 14:07:00 发布

阅读量124

点赞数

文章标签： java lucene 3.0.3

常量接口

public interface Constants {

public static String indexDir = "D:\\lucene\\index";

public static String dataDir = "D:\\lucene\\test";

}

建立索引

import java.io.File;

import java.io.FileFilter;

import java.io.FileReader;

import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

public class Indexer {

private IndexWriter writer;

public Indexer(String indexDir) throws IOException {

//创建IndexWriter(写索引)

Directory dir = FSDirectory.open(new File(indexDir));

writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30),

true, IndexWriter.MaxFieldLength.UNLIMITED);

}

public void close() throws IOException {

//关闭IndexWriter

writer.close();

}

public int index(String dataDir, FileFilter filter) throws IOException {

File[] files = new File(dataDir).listFiles();

for (File f : files) {

if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()

&& (filter == null || filter.accept(f))) {

indexFile(f);

}

return writer.numDocs();

}

public void indexFile(File f) throws IOException {

System.out.println("Indexing " + f.getCanonicalPath());

Document doc = getDocument(f);

writer.addDocument(doc);

}

public Document getDocument(File f) throws IOException {

Document doc = new Document();

//索引文件内容

doc.add(new Field("contents", new FileReader(f)));

//索引文件名

doc.add(new Field("filename", f.getName(), Field.Store.YES,

Field.Index.NOT_ANALYZED));

//索引文件完整路径

doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES,

Field.Index.NOT_ANALYZED));

return doc;

}

public static class TextFilesFilter implements FileFilter {

public boolean accept(File path) {

return path.getName().toLowerCase().endsWith(".txt");

}

public static void main(String[] args) throws IOException {

long start = System.currentTimeMillis();

Indexer indexer = new Indexer(Constants.indexDir);

int numIndexed;

numIndexed = indexer.index(Constants.dataDir, new TextFilesFilter());

indexer.close();

long end = System.currentTimeMillis();

System.out.println("Indexing " + numIndexed + " files took "

+ (end - start) + " milliseconds");

}

2. 搜索索引

import java.io.File;

import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.queryParser.ParseException;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

public class Searcher {

public static void search(String indexDir, String q) throws IOException,

ParseException {

//打开索引文件

Directory dir = FSDirectory.open(new File(Constants.indexDir));

IndexSearcher is = new IndexSearcher(dir);

//解析查询字符串

QueryParser parser = new QueryParser(Version.LUCENE_30, "contents",

new StandardAnalyzer(Version.LUCENE_30));

Query query = parser.parse(q);

//搜索索引

long start = System.currentTimeMillis();

TopDocs hits = is.search(query, 10);

long end = System.currentTimeMillis();

//显示查询结果

System.out.println("Found " + hits.totalHits + " document(s) (in "

+ (end - start) + " milliseconds) that matched query'" + q

+ "':");

for(ScoreDoc scoreDoc : hits.scoreDocs){

Document doc = is.doc(scoreDoc.doc);

System.out.println(doc.get("fullpath"));

}

//关闭IndexSearcher

is.close();

}

public static void main(String[] args) throws IOException, ParseException {

String indexDir = Constants.indexDir;

String q = "中华";

search(indexDir, q);

}

视觉志

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫