要实现搜索功能的话,首先要做的是,进行索引(Indexing),在这里分享一下索引过程的相关知识。
1. 索引过程
Lucene索引过程主要分为3个操作阶段:将数据转换成文本,分析文本,并将分析过的文本保存到数据库。
2. 基本索引
之前的索引,是从文件遍历,添加到索引,在这里,我们手动的创建一个索引。
package org.ygy.lucene.index;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class BookIndex {
private List<BookEntity> books = new ArrayList<BookEntity>();
public static final String INDEX_BOOK = "/home/yuguiyang/Documents/lucene_temp";
/**
* initial book data
*/
private void initial() {
BookEntity book = null;
for(int i=0; i<10; i++) {
book = new BookEntity();
book.setId(100 + i);
book.setTitle("title_" + i);
book.setAuthor("author_" + i);
book.setPrice(20.00 + i);
book.setPublishDate(new Date());
books.add(book);
}
}
//create books index
public void indexing(){
try {
Directory dir = FSDirectory.open(new File(BookIndex.INDEX_BOOK));
addDocuments(dir);
} catch (IOException e) {
e.printStackTrace();
}
}
private void addDocuments(Directory dir) throws IOException {
//初始化分析器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
//IndexWriter配置信息
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_45 , analyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir , iwc);
//初始化数据源
initial();
//遍历数据源,加入索引
for(int i=0; i<books.size(); i++) {
Document doc = new Document();
BookEntity book = books.get(i);
System.out.println(i + "->" + book);
doc.add(new IntField("id" , book.getId(), Store.YES));
doc.add(new StringField("title" , book.getTitle() , Store.YES));
doc.add(new StringField("author" , book.getAuthor() , Store.YES));
doc.add(new DoubleField("price" , book.getPrice(), Store.NO));
doc.add(new StringField("publishDate" , book.getPublishDate().toString() , Store.NO));
writer.addDocument(doc);
}
writer.close();
}
public static void main(String[] args) {
BookIndex bookIndex = new BookIndex();
bookIndex.indexing();
}
}
基本的代码都是一样的,只是,在这里,我们构造了一个BookEntity类,并初始化了一些book,并将书籍的信息写入索引中。
package org.ygy.lucene.index;
import java.util.Date;
public class BookEntity {
private Integer id; // 书籍ID
private String title; // 书籍名称
private String author; // 作者
private Date publishDate; // 出版日期
private Double price; // 单价
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public Date getPublishDate() {
return publishDate;
}
public void setPublishDate(Date publishDate) {
this.publishDate = publishDate;
}
public Double getPrice() {
return price;
}
public void setPrice(Double price) {
this.price = price;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((id == null) ? 0 : id.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
BookEntity other = (BookEntity) obj;
if (id == null) {
if (other.id != null)
return false;
} else if (!id.equals(other.id))
return false;
return true;
}
@Override
public String toString() {
return "BookEntity [id=" + id + ", title=" + title + ", author=" + author + ", publishDate=" + publishDate
+ ", price=" + price + "]";
}
}
然后,我们把之前的搜索的代码简单改一下,让她更加适合我们的索引:
package org.ygy.lucene.index;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class BookSearch {
public static void searching(String field, String queryString) {
// 读取索引
IndexReader reader = null;
try {
reader = DirectoryReader.open(FSDirectory.open(new File(BookIndex.INDEX_BOOK)));
} catch (IOException e) {
e.printStackTrace();
}
// 分析器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
// 解析器
QueryParser parser = new QueryParser(Version.LUCENE_45, field, analyzer);
try {
//根据关键字初始化Query
Query query = parser.parse(queryString);
System.out.println("Searching for:" + field + "->" + queryString);
// 查询索引
IndexSearcher searcher = new IndexSearcher(reader);
doSearch(searcher, query);
reader.close();
} catch (ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private static void doSearch(IndexSearcher searcher, Query query) throws IOException {
TopDocs results = searcher.search(query, 100);
ScoreDoc[] hits = results.scoreDocs;
int numTotalHits = results.totalHits;
System.out.println("总条数:" + numTotalHits);
int start = 0;
int end = Math.min(numTotalHits, 100);
// 遍历查询结果
for (int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
//show the book detail
System.out.println("id->" + doc.get("id"));
System.out.println("title->" + doc.get("title"));
System.out.println("author->" + doc.get("author"));
System.out.println("price->" + doc.get("price"));
System.out.println("publishDate->" + doc.get("publishDate"));
}
}
}
好了,下面,我们运行一下,索引,然后查询一下:
package org.ygy.lucene.index;
public class BookClient {
public static void main(String[] args) {
//1.indexing
//2.search
BookSearch.searching("id", "100");
BookSearch.searching("title", "title_0");
BookSearch.searching("author", "author_0");
BookSearch.searching("price" , "20.0");
}
}
结果:
这里,就发现了一个问题,我们使用id,price 搜索时,没有找到数据;但是使用title和author搜索时,就可以查询到数据。暂时,无法理解,有待学习。
在测试时,还发现一个问题,就是有关大小写的问题:
在这里,我们把原来的"author_“改为大写的"Author_",重新运行一遍索引程序,再重新查询。
好吧,悲剧了,不管是查询“author_0”,还是“Author_0”,都查不到数据了,这里的大小写,不知道是什么问题。
这两个问题先记着,在学习中解决。
3. 可追加的域
有的时候,应用程序可以产生一个给定词语的一系列同义词,我们当然希望,在搜索同义词的时候,也把该条记录也查出来,在Lucene中可以很简单的实现。
在BookIndex中修改:
我们在第一条记录中,的title字段中,追加“op"和"ygy",然后再查询:
我们,看一下,能否查询到结果:
好了,这样就实现了。
4. 删除索引中的文档
5. 回复被删除的文档
6. 更新索引中的文档