Lucene教程（四）- 基本索引操作

最新推荐文章于 2022-09-28 01:01:26 发布

橘猫吃不胖胖

最新推荐文章于 2022-09-28 01:01:26 发布

阅读量1.4k

点赞数

分类专栏： Lucene 文章标签： Lucene 索引搜索

本文链接：https://blog.csdn.net/yuguiyang1990/article/details/14110831

版权

Lucene 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

要实现搜索功能的话，首先要做的是，进行索引（Indexing），在这里分享一下索引过程的相关知识。

1. 索引过程

Lucene索引过程主要分为3个操作阶段：将数据转换成文本，分析文本，并将分析过的文本保存到数据库。

2. 基本索引

之前的索引，是从文件遍历，添加到索引，在这里，我们手动的创建一个索引。

package org.ygy.lucene.index;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class BookIndex {
	
	private List<BookEntity> books = new ArrayList<BookEntity>();
	
	public static final String INDEX_BOOK = "/home/yuguiyang/Documents/lucene_temp";
	
	/**
	 * initial book data
	 */
	private void initial() {
		BookEntity book = null;
		for(int i=0; i<10; i++) {
			book = new BookEntity();
			book.setId(100 + i);
			book.setTitle("title_" + i);
			book.setAuthor("author_" + i);
			book.setPrice(20.00 + i);
			book.setPublishDate(new Date());
			
			books.add(book);
		}
	}
	
	//create books index
	public void indexing(){
		try {
			Directory dir = FSDirectory.open(new File(BookIndex.INDEX_BOOK));
			
			addDocuments(dir);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	private void addDocuments(Directory dir) throws IOException {
		//初始化分析器
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
		//IndexWriter配置信息
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_45 , analyzer);
		iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
		
		IndexWriter writer = new IndexWriter(dir , iwc);
		
		
		//初始化数据源
		initial();
		//遍历数据源，加入索引
		for(int i=0; i<books.size(); i++) {
			Document doc = new Document();
			
			BookEntity book = books.get(i);
			System.out.println(i + "->" + book);
			
			doc.add(new IntField("id" , book.getId(), Store.YES));
			doc.add(new StringField("title" , book.getTitle() , Store.YES));
			doc.add(new StringField("author" , book.getAuthor() , Store.YES));
			doc.add(new DoubleField("price" , book.getPrice(), Store.NO));
			doc.add(new StringField("publishDate" , book.getPublishDate().toString() , Store.NO));
			
			writer.addDocument(doc);
		}
		
		writer.close();
	}
	
	public static void main(String[] args) {
		BookIndex bookIndex = new BookIndex();
		bookIndex.indexing();
	}
	
}

基本的代码都是一样的，只是，在这里，我们构造了一个BookEntity类，并初始化了一些book，并将书籍的信息写入索引中。

package org.ygy.lucene.index;

import java.util.Date;

public class BookEntity {

	private Integer id; // 书籍ID
	private String title; // 书籍名称
	private String author; // 作者
	private Date publishDate; // 出版日期
	private Double price; // 单价

	public Integer getId() {
		return id;
	}

	public void setId(Integer id) {
		this.id = id;
	}

	public String getTitle() {
		return title;
	}

	public void setTitle(String title) {
		this.title = title;
	}

	public String getAuthor() {
		return author;
	}

	public void setAuthor(String author) {
		this.author = author;
	}

	public Date getPublishDate() {
		return publishDate;
	}

	public void setPublishDate(Date publishDate) {
		this.publishDate = publishDate;
	}

	public Double getPrice() {
		return price;
	}

	public void setPrice(Double price) {
		this.price = price;
	}

	@Override
	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + ((id == null) ? 0 : id.hashCode());
		return result;
	}

	@Override
	public boolean equals(Object obj) {
		if (this == obj)
			return true;
		if (obj == null)
			return false;
		if (getClass() != obj.getClass())
			return false;
		BookEntity other = (BookEntity) obj;
		if (id == null) {
			if (other.id != null)
				return false;
		} else if (!id.equals(other.id))
			return false;
		return true;
	}

	@Override
	public String toString() {
		return "BookEntity [id=" + id + ", title=" + title + ", author=" + author + ", publishDate=" + publishDate
				+ ", price=" + price + "]";
	}
	
}

然后，我们把之前的搜索的代码简单改一下，让她更加适合我们的索引：

package org.ygy.lucene.index;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class BookSearch {

	public static void searching(String field, String queryString) {
		
		// 读取索引
		IndexReader reader = null;
		try {
			reader = DirectoryReader.open(FSDirectory.open(new File(BookIndex.INDEX_BOOK)));
		} catch (IOException e) {
			e.printStackTrace();
		}

		// 分析器
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);

		// 解析器
		QueryParser parser = new QueryParser(Version.LUCENE_45, field, analyzer);

		try {
			//根据关键字初始化Query
			Query query = parser.parse(queryString);
			System.out.println("Searching for:" + field + "->" + queryString);
			// 查询索引
			IndexSearcher searcher = new IndexSearcher(reader);
			
			doSearch(searcher, query);
			
			reader.close();
		} catch (ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}


	}

	private static void doSearch(IndexSearcher searcher, Query query) throws IOException {
		TopDocs results = searcher.search(query, 100);
		ScoreDoc[] hits = results.scoreDocs;

		int numTotalHits = results.totalHits;
		System.out.println("总条数:" + numTotalHits);

		int start = 0;
		int end = Math.min(numTotalHits, 100);

		// 遍历查询结果
		for (int i = start; i < end; i++) {
			Document doc = searcher.doc(hits[i].doc);

			//show the book detail
			System.out.println("id->" + doc.get("id"));
			System.out.println("title->" + doc.get("title"));
			System.out.println("author->" + doc.get("author"));
			System.out.println("price->" + doc.get("price"));
			System.out.println("publishDate->" + doc.get("publishDate"));
		}
	}
}

好了，下面，我们运行一下，索引，然后查询一下：

package org.ygy.lucene.index;


public class BookClient {
	public static void main(String[] args) {
		//1.indexing
		
		//2.search
		
		BookSearch.searching("id", "100");
		BookSearch.searching("title", "title_0");
		BookSearch.searching("author", "author_0");
		BookSearch.searching("price" , "20.0");
		
	}
}

结果：

这里，就发现了一个问题，我们使用id，price 搜索时，没有找到数据；但是使用title和author搜索时，就可以查询到数据。暂时，无法理解，有待学习。

在测试时，还发现一个问题，就是有关大小写的问题：

在这里，我们把原来的"author_“改为大写的"Author_"，重新运行一遍索引程序，再重新查询。

好吧，悲剧了，不管是查询“author_0”，还是“Author_0”，都查不到数据了，这里的大小写，不知道是什么问题。

这两个问题先记着，在学习中解决。

3. 可追加的域

有的时候，应用程序可以产生一个给定词语的一系列同义词，我们当然希望，在搜索同义词的时候，也把该条记录也查出来，在Lucene中可以很简单的实现。

在BookIndex中修改：

我们在第一条记录中，的title字段中，追加“op"和"ygy"，然后再查询：

我们，看一下，能否查询到结果：

好了，这样就实现了。

4. 删除索引中的文档

5. 回复被删除的文档

6. 更新索引中的文档

橘猫吃不胖胖

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Lucene教程（四）- 基本索引操作

要实现搜索功能的话，首先要做的是，进行索引（Indexing），在这里分享一下索引过程的相关知识。1. 索引过程Lucene索引过程主要分为3个操作阶段：将数据转换成文本，分析文本，并将分析过的文本保存到数据库。2. 基本索引之前的索引，是从文件遍历，添加到索引，在这里，我们手动的创建一个索引。package org.ygy.lucene.index;import
复制链接

扫一扫