Lucene 7 入门案例

最新推荐文章于 2023-06-07 16:58:39 发布

qq_36470908

最新推荐文章于 2023-06-07 16:58:39 发布

阅读量1.6k

点赞数 1

分类专栏： Lucene

本文链接：https://blog.csdn.net/qq_36470908/article/details/79628033

版权

Lucene 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

1 开发准备：jdk 1.8

所需jar包：

commons-io-2.6.jar，

lucene-analyzers-common-7.2.1.jar，

lucene-core-7.2.1.jar，

lucene-queryparser-7.2.1.jar

2 流程：

2.1 创建索引

2.2 指定索引查询

3.代码：

3.1工具类：

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;

public class IndexUtils {
	// 索引源，即源数据目录
	public static String searchSource = "F:\\lucene\\searchsource";
	// 索引目标地址
	public static String indexFolder = "F:\\lucene\\indexdata";

	// 从文件创建Document
	public static List<Document> file2Document(String folderPath) throws IOException {
		List<Document> list = new ArrayList<Document>();
		File folder = new File(folderPath);
		if (!folder.isDirectory()) {
			return null;
		}
		// 获取目录 中的所有文件
		File[] files = folder.listFiles();
		for (File file : files) {
			String fileName = file.getName();
			if (file.isFile()) {
				// 文件内容
				String fileContent = FileUtils.readFileToString(file);
				// 文件路径
				String filePath = file.getAbsolutePath();
				// 文件大小
				long fileSize = FileUtils.sizeOf(file);
				// 创建文档
				Document doc = new Document();
				// 创建各各Field域
				// 文件名
				Field field_fileName = new StringField("fileName", fileName, Store.YES);
				// 文件内容
				Field field_fileContent = new TextField("fileContent", fileContent, Store.YES);
				// 文件大小
				Field field_fileSize = new TextField("fileSize", String.valueOf(fileSize), Store.YES);
				// 文件路径
				Field field_filePath = new StoredField("filePath", filePath);

				// 自定义检索条件
				Field auto_field = new TextField("auto", "auto", Store.YES);
				// 将各各Field添加到文档中
				doc.add(field_fileName);
				doc.add(field_fileContent);
				doc.add(field_fileSize);
				doc.add(field_filePath);
				doc.add(auto_field);
				list.add(doc);
			}
		}
		return list;

	}

	public static void printDocumentOfFile(Document doc) {
		System.out.println("文件名称 =" + doc.get("fileName"));
		System.out.println("文件大小 =" + doc.get("fileSize"));
		System.out.println("文件内容 =" + doc.get("fileContent"));
	}

}

3.2 创建索引类：

import java.io.IOException;
import java.nio.file.FileSystems;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class IndexTest {

	public static void testCreateIndex() {
		try {
			// 从目录中读取文件内容并创建Document文档
			List<Document> docs = IndexUtils.file2Document(IndexUtils.searchSource);
			// 创建分析器，standardAnalyzer标准分析器
			Analyzer standardAnalyzer = new StandardAnalyzer();
			// 指定索引存储目录
			Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(IndexUtils.indexFolder));
			// 创建索引操作配置对象
			IndexWriterConfig indexWriterConfig = new IndexWriterConfig(standardAnalyzer);
			// 定义索引操作对象indexWriter
			IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
			// 遍历目录 下的文件生成的文档，调用indexWriter方法创建索引
			for (Document document : docs) {
				indexWriter.addDocument(document);
			}
			// 索引操作流关闭
			indexWriter.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
		System.out.println("索引创建完成");

	}

	public static void main(String[] args) {
		testCreateIndex();
	}

}

3.3 查询类：

import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class SearchTest {
	// 索引目录地址
	// 查询方法
	public static void testTermQuery() throws IOException {
		// 创建查询对象，根据文件名称域搜索匹配文件名称的文档
		//Query query = new TermQuery(new Term("fileName", "aa.txt"));
		Query query = new TermQuery(new Term("auto", "auto"));
		// 指定索引目录
		Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(IndexUtils.indexFolder));
		// 定义IndexReader
		IndexReader reader = DirectoryReader.open(directory);
		// 创建indexSearcher
		IndexSearcher indexSearcher = new IndexSearcher(reader);
		// 执行搜索
		TopDocs topDocs = indexSearcher.search(query, 100);
		// 提取搜索结果
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		System.out.println("共搜索到总记录数：" + topDocs.totalHits);
		for (ScoreDoc scoreDoc : scoreDocs) {
			// 文档id
			int docID = scoreDoc.doc;
			// 得到文档
			Document doc = indexSearcher.doc(docID);
			// 输出 文件内容
			IndexUtils.printDocumentOfFile(doc);
		}

	}

	public static void main(String[] args) throws IOException {
		testTermQuery();
	}
}

4.更新删除提交方法（只供参考）

/**
	 * 更新索引： Lucene其实并未提供更新索引的方法,这里的更新操作内部是先删除再添加的方式
	 */
	public static void updateIndex(IndexWriter indexWriter,Document doc, Term term) {
		try {
			indexWriter.updateDocument(term, doc);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	/**
	 * 删除索引: 在执行完该方法后,再执行本类的TermQuery()方法,得到数据
	 * 这说明此时删除的文档并没有被完全删除,而是存储在一个回收站中,它是可以恢复的 ，将回车站数据清空即可
	 */
	public void deleteIndex(IndexWriter indexWriter,Term term) {
		try {
			indexWriter.deleteDocuments(term);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	/**
	 * 提交索引内容的变更情况
	 */
	public void commitIndex(IndexWriter indexWriter) {
		try {
			indexWriter.commit();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

5.执行流程：先执行testCreateIndex，然后执行SearchTest

其他参考：

http://blog.csdn.net/starjuly/article/details/74625748；

Lucene查询：http://blog.csdn.net/napoay/article/details/51227794；

文档案例：https://www.yiibai.com/lucene/lucene_first_application.html