Lucene简单文件夹索引和查询案例

最新推荐文章于 2019-09-26 19:30:52 发布

雨夜星辰03

最新推荐文章于 2019-09-26 19:30:52 发布

阅读量574

点赞数

分类专栏： javaweb javaEE 文章标签： apache lucene

本文链接：https://blog.csdn.net/QWERTY1994/article/details/78654099

版权

javaweb 同时被 2 个专栏收录

21 篇文章 0 订阅

订阅专栏

javaEE

19 篇文章 0 订阅

订阅专栏

使用的是apache Lucene7.0 ，jdk 8.0

1.遍历文件夹建立索引

package com.kyd.demo.standardAnaly;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class FileIndexDemo {
	private static List<String> textList=  new ArrayList<>();
	static {
		textList.add(".txt");
		textList.add(".java");
		textList.add(".html");
		textList.add(".htm");
		textList.add(".java");
		textList.add(".css");
		textList.add(".js");
		textList.add(".xml");
		textList.add(".properties");
		textList.add(".tld");
		
	}
	
	public static void main(String[] args) {
		Directory directory =null;
		IndexWriter indexWriter = null;
		try {
			/**
			 * 1.选择索引保存方式  此处是保存到硬盘
			 * 	还有内存等等
			 */
			directory = FSDirectory.open(Paths.get("file_index"));
			/**
			 * 2.选择分词器
			 * 	此处是Lucene标准分词器
			 */
			Analyzer analyzer = new StandardAnalyzer();
			/**
			 * 3.配置索引配置
			 */
			IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
			indexWriterConfig.setOpenMode(OpenMode.CREATE);
			/**
			 * 4.创建索引写类
			 */
			indexWriter = new IndexWriter(directory, indexWriterConfig);
			
			String scanDir ="D:\\360安全浏览器下载";
			/**
			 * 5.做循环遍历文件夹创建索引
			 */
			indexDocs(indexWriter,scanDir);
			
			
			
			
			indexWriter.flush();
			indexWriter.commit();
			
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			if(directory != null) {
				try {
					directory.close();
				} catch (IOException e) {
					directory =null;
					e.printStackTrace();
				}
			}
			
		}
		
	}
	/**
	 * 
	 * @param indexWriter
	 * @param scanDir
	 * @throws IOException 
	 */
	private static void indexDocs(IndexWriter indexWriter, String scanDir) throws IOException {
		if(Files.isDirectory(Paths.get(scanDir))) {
			Files.walkFileTree(Paths.get(scanDir), new SimpleFileVisitor<Path>() {

				@Override
				public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
					System.out.println(file.toString());
					if(Files.exists(file)) {
						indexDoc(indexWriter,file);
					}
					
					return FileVisitResult.CONTINUE;
				}
				
			});
		}else {
			indexDoc(indexWriter,Paths.get(scanDir));
		}
		
	}
	/**
	 * 
	 * @param indexWriter
	 * @param scanDir
	 * @throws IOException 
	 */
	private static void indexDoc(IndexWriter indexWriter, Path filsPath) throws IOException {
		if(isTextFile(filsPath)) {
			try(InputStream stream = Files.newInputStream(filsPath)){
				Document document = new Document();
				IndexableField lastModifiedTime = new LongPoint("lastModifiedTime", Files.getLastModifiedTime(filsPath).toMillis());
				document.add(lastModifiedTime);
				
				
				IndexableField path = new StringField("path", filsPath.toString(), Store.YES);
				document.add(path);
				
				IndexableField content = new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)));
				document.add(content);
				
				indexWriter.addDocument(document);
			}
			
		}
		
	}
	/**
	 * 判断是否是文本文件 假如是文本文件就直接索引
	 * @param scanDir
	 * @return
	 */
	private static boolean isTextFile(Path scanDir) {
		int i =-1;
		if(( i =scanDir.toString().lastIndexOf("."))!=-1) {
			String ext= scanDir.toString().substring(i);
			return textList.contains(ext);
		}else {
			return true;
		}
	}
}

2.查询案例

package com.kyd.demo.standardAnaly;

import java.io.IOException;
import java.nio.file.Paths;


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;

public class SearchDemo {
	public static void main(String[] args) {
		try {
			/**
			 * 1.打开索引文件
			 */
			IndexReader indexReader = DirectoryReader.open(FSDirectory.open(Paths.get("file_index")));
			/**
			 * 2.创建索引查询类
			 */
			IndexSearcher indexSearcher = new IndexSearcher(indexReader);
			/**
			 * 3.创建分词器
			 */
			Analyzer analyzer = new StandardAnalyzer();
			/**
			 * 4.构建查询语句
			 */
			
			QueryParser queryParser = new QueryParser("contents",analyzer);
			Query query = queryParser.parse("Lucene");
			/**
			 * 5.执行查询
			 *   查询前100条记录
			 */
			TopDocs topDocs = indexSearcher.search(query, 100);
			/**
			 * 6.遍历结果
			 */
			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
			System.out.println("一共匹配到:"+topDocs.totalHits+"条记录");
			for(int i=0;i<scoreDocs.length;i++) {
					Document document = indexSearcher.doc(scoreDocs[i].doc);
					System.out.println(i+":"+document.get("path"));
			}
			
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (ParseException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
	}
}