初识Lucene6.6.0

最新推荐文章于 2020-11-25 16:49:31 发布

Rj08zhou

最新推荐文章于 2020-11-25 16:49:31 发布

阅读量1.3k

点赞数

分类专栏： Java-Mind&View 文章标签： lucene 索引

本文链接：https://blog.csdn.net/rj08zhou/article/details/74906502

版权

Java-Mind&View 专栏收录该内容

17 篇文章 0 订阅

订阅专栏

Lucene是简单而功能强大的基于Java的搜索库。它可以用于任何应用程序的搜索功能。它是可扩展的，高性能的库用于索引和搜索几乎任何类型的文本。

项目中使用Lucene做业务菜单的搜索功能。客户可以通过输入业务菜单的部分文字，通过Lucene检索，查询到相符合的菜单目录进行业务操作。闲话不说，本人根据项目中Lucene的使用情况结合新版的（6.6）Lucene使用情况写了个DEMO用于学习。

首先是DEMO中Lucene使用的公共常量类。

/**
 * lucene常量类
 * @author zhouyi
 *
 */
public class LuceneConstants {
	
	public static final String CONTENTS = "contents" ;
	public static final String FILE_NAME = "filename";
	public static final String FILE_PATH = "filepath" ;
	public static final int MAX_SEARCH = 10 ; //搜索数目为10条
	
}

然后对需要索引的文件做类别区分，这里暂时只对TXT文件进行索引。

import java.io.File;
import java.io.FileFilter;


public class TextFileFilter implements FileFilter {
	
    @Override
    public boolean accept(File pathname) {
       return pathname.getName().toLowerCase().endsWith(".txt");
    }
   
}

下面开始对需要检索的文件建立索引。注意：新版的Lucene使用了NIO2中一系列方式，摈弃了File等IO的方式。

import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import cn.zhouyi.demo.lucene.LuceneConstants;

public class Indexer {
	//索引创建类
	private IndexWriter writer ;
	
	public Indexer(String indexDirectoryPath) throws IOException{
		//读取需要索引的文件到Lucene的目录类中，新版的Lucene只支持IO2中的Path类型的变量了。
		Directory indexDirectory = FSDirectory.open(Paths.get(indexDirectoryPath)) ;
		//创建分词器，这个分词器必须和IndexSearcher中的一致。
		Analyzer analyzer = new StandardAnalyzer() ;
		//新版的Lucene中索引创建类只接收IndexWriterConfig配置。
		IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
		writer = new IndexWriter(indexDirectory, iwc);
	}
	
	public void close() throws CorruptIndexException, IOException{
		writer.close();
	}
	
	//给文件创建索引
	private void indexFile(Path path) throws IOException{
		//因为使用了Java7的try()，所以文件流的操作必须在try()中写完，否则会自动关闭流。
		try(InputStream stream = Files.newInputStream(path)){
			//建立Lucene文档
			Document document = new Document() ;
			Field contentField = new TextField(LuceneConstants.CONTENTS, new BufferedReader(new InputStreamReader(stream,StandardCharsets.UTF_8))) ;
			Field fileNameField = new StringField(LuceneConstants.FILE_NAME, path.getFileName().toString(), Field.Store.YES);
			Field filePathField = new StringField(LuceneConstants.FILE_PATH, path.toString(), Field.Store.YES); 
			document.add(contentField);
			document.add(fileNameField);
			document.add(filePathField);
			System.out.println("Indexing "+path.toString());
			//写入文档到索引创建类中
			writer.addDocument(document) ;			
		}
	}
	//遍历文件目录下的文件，给这些文件加索引
	public int createIndex(String docPath, FileFilter filter) throws IOException{
		Path path = Paths.get(docPath) ;
		if(!Files.isReadable(path)){
			System.out.println("Document Directory '"+path.toAbsolutePath()+ "'is not readable or is not exist");
			System.exit(1);			
		}
		if(Files.isDirectory(path)){
			//NIO2中优雅地遍历文件
			Files.walkFileTree(path, new SimpleFileVisitor<Path>(){				
				@Override
				public FileVisitResult visitFile(Path file, BasicFileAttributes attrs){
					try{
						if(filter.accept(file.toFile())){
							indexFile(file) ;						
						}
					}catch(IOException ex){
						ex.printStackTrace();
					}
					return FileVisitResult.CONTINUE; 
				}
			}) ;
		}else{
			if(filter.accept(path.toFile())){
				indexFile(path) ;						
			}			
		}				
		return writer.numDocs() ;
	}
	
}

上面给对应目录的文件创建好了分词索引后，下面开始读取索引进行搜索。

import java.io.IOException;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import cn.zhouyi.demo.lucene.LuceneConstants;

public class Searcher {
	//索引搜索类
	private IndexSearcher indexSearcher ;
	//索引读取类
	private IndexReader reader ;
	//将用户的搜索条件封装成Lucene的query条件
	private QueryParser queryParser ;
	private Query query ;
	
	public Searcher(String indexDirectoryPath) throws IOException{
		//将索引文件读取到lucene的索引读取类中
		Directory directory = FSDirectory.open(Paths.get(indexDirectoryPath));
		reader = DirectoryReader.open(directory);
		//创建索引搜索类
		indexSearcher = new IndexSearcher(reader) ;
		//此处分词器需要和索引类中的一致
		Analyzer analyzer = new StandardAnalyzer();
		queryParser = new QueryParser(LuceneConstants.CONTENTS, analyzer);		
	}	
	//根据用户的搜索条件返回lucene搜索的文档
	public TopDocs search(String searchQuery) throws ParseException, IOException{
		query = queryParser.parse(searchQuery);
		return indexSearcher.search(query, LuceneConstants.MAX_SEARCH) ;
	}
	//根据文档的id获取文档,注scoreDoc=TopDocs.scoreDocs[i]
	public Document getDocument(ScoreDoc scoreDoc) throws IOException{
		return indexSearcher.doc(scoreDoc.doc);
	}
	
	public void close() throws IOException{
		reader.close();
	}
	
	
}

以上简单的索引类和搜索类已经写好了，下面写一个测试类来测试一下lucene的功能。

import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import cn.zhouyi.demo.lucene.LuceneConstants;

public class LuceneTester {

	String indexDir = "E:\\code\\lucence\\index" ;
	String dataDir = "E:\\code\\lucence\\docs" ;
	
	Indexer indexer ;
	Searcher searcher ;
	
	public static void main(String args[]){
		LuceneTester tester ;
		try{
			tester = new LuceneTester() ;
			tester.createIndex();
			tester.search("you");
		}catch(Exception ex){
			ex.printStackTrace();
		}
	}
	
	private void createIndex() throws IOException{
		indexer = new Indexer(indexDir) ;
		int numIndexed ;
		long startTime = System.currentTimeMillis() ;
		
		//numIndexed = indexer.createIndex(dataDir, new TextFileFilter()) ;
		//使用一下Java8的新特性来实现一下文件的筛选。
		numIndexed = indexer.createIndex(dataDir, (pathname)->{
			return pathname.getName().toLowerCase().endsWith(".txt");
		});
		
		long endTime = System.currentTimeMillis() ;
		indexer.close();
		System.out.println(numIndexed+" File indexed, time taken: "+(endTime-startTime)+" ms");	
	}
	
	private void search(String searchQuery) throws IOException, ParseException{
		searcher = new Searcher(indexDir);
		long startTime = System.currentTimeMillis();
	    TopDocs hits = searcher.search(searchQuery);
	    long endTime = System.currentTimeMillis();
	   
	    System.out.println(hits.totalHits+" documents found. Time :" + (endTime - startTime));
	    for(ScoreDoc scoreDoc : hits.scoreDocs) {
	       Document doc = searcher.getDocument(scoreDoc);
	          System.out.println("File: " + doc.get(LuceneConstants.FILE_PATH));
	    }
	    searcher.close();
	}
}

执行上面测试类得到的结果：

Indexing E:\code\lucence\docs\doc1.txt
1 File indexed, time taken: 105 ms
1 documents found. Time :24
File: E:\code\lucence\docs\doc1.txt

收工完毕。

Rj08zhou

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录