Lucene是一个软件库,一个开发工具包,而不是一个具有完整特征的搜索应用程序。
它采用的是一种称为反向索引(invertedindex)的机制。反向索引简单理解就是维护一个词/短语表,对于这个表中的每个词/短语,都有一个相关信息描述了有哪些文档包含了这个词/短语。这样在用户输入查询条件的时候,就能非常快的得到搜索结果,它本身只关注文本的索引和搜索。Lucene使你可以为你的应用程序添加索引和搜索能力。通过lucene学习,我们就可以为自已的项目增加全文检索的功能。
接下来我们开始Lucene环境开发案例:
1,创建一个java工程导入包(下载地址:http://pan.baidu.com/s/1slROGd3),新建实体类Goods,下面是我的工程目录
2,Goods的代码如下 :
package cn.wxz.entity;
import java.io.Serializable;
public class Goods implements Serializable {
private static final long serialVersionUID = 6341267507850856097L;
private Integer goodsId;//商品的id
private String goodsName;//商品的名称
private Double goodsPrice;//商品的价格
private String goodsRemark;//商品的备注
@Override
public String toString() {
return "Goods [goodsId=" + goodsId + ", goodsName=" + goodsName
+ ", goodsPrice=" + goodsPrice + ", goodsRemark=" + goodsRemark
+ "]";
}
public Integer getGoodsId() {
return goodsId;
}
public void setGoodsId(Integer goodsId) {
this.goodsId = goodsId;
}
public String getGoodsName() {
return goodsName;
}
public void setGoodsName(String goodsName) {
this.goodsName = goodsName;
}
public Double getGoodsPrice() {
return goodsPrice;
}
public void setGoodsPrice(Double goodsPrice) {
this.goodsPrice = goodsPrice;
}
public String getGoodsRemark() {
return goodsRemark;
}
public void setGoodsRemark(String goodsRemark) {
this.goodsRemark = goodsRemark;
}
}
LuceneDao代码如下:
package cn.wxz.entity;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class LuceneDao {
/* 1.构建索引库
Directory directory = FSDirectory.open(new File("索引库目录"));
2.指定分词器,版本一般指定为最高
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
3.创建文档对象,并添加相关字段值
Document doc = new Document();
doc.add(new Field("goodsId",goods.getGoodsId().toString(),Store.YES,Index.NOT_ANALYZED));
4.创建增删改索引库的操作对象,添加文档并提交
IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);
indexWriter.addDocument(doc);
indexWriter.commit();
5.关闭操作对象*/
public void saveGoods(Goods goods){
IndexWriter indexWriter = null;
try {
Directory directory = FSDirectory.open(new File("d:\\wxz\\luceneDir"));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
Document document = new Document();
//Store.YES表示数据在存到文档库,
//Index.ANALYZED表示按规则进行分词;Index.NO_ANALYZED表示把整体的值作为关键字;Index.NO表示不作为索引
document.add(new Field("goodsId",goods.getGoodsId().toString(),Store.YES,Index.ANALYZED));
document.add(new Field("goodsName",goods.getGoodsName(),Store.YES,Index.ANALYZED));
document.add(new Field("goodsPrice",goods.getGoodsPrice().toString(),Store.YES,Index.ANALYZED));
document.add(new Field("goodsRemark",goods.getGoodsRemark(),Store.YES,Index.ANALYZED));
//通过操作类进行数据的存放
indexWriter = new IndexWriter(directory,analyzer,MaxFieldLength.LIMITED);
indexWriter.addDocument(document);
indexWriter.commit();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
if(indexWriter!=null){
try {
indexWriter.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
/* 1.打开索引库
directory = FSDirectory.open(new File("索引库目录"));
2。创建查询分词器,版本号与写入文档的查询分词器一样
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
3。创建查询解析器,参数为版本号,查询字段名,分词器
QueryParser parser = new QueryParser(Version.LUCENE_30, "goodsName", analyzer);
4。构建查询信息对象
Query query = parser.parse(keyWord);
5。构建查询工具
searcher = new IndexSearcher(directory);
6。通过查询工具执行查询。参数1,查询信息对象;参数2。返回记录数;TopDocs包括总记录数、文档编号等
TopDocs topDocx=searcher.search(query, 20);
7。根据文档编号遍历真正的文档
ScoreDoc sd[] = topDocx.scoreDocs;
for(ScoreDoc scoreDoc:sd){
。。。
Document doc = searcher.doc(scoreDoc.doc);
8。转为java对象 goods.setGoodsId(Integer.parseInt(doc.get("goodsId")));
lists.add(goods);
9.关闭查询操作对象*/
public List<Goods> selectGoods(String keyWord){
List<Goods> list= new ArrayList<Goods>();
IndexSearcher indexSearcher = null;
try {
Directory directory = FSDirectory.open(new File("d:\\wxz\\luceneDir"));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
QueryParser parser = new QueryParser(Version.LUCENE_30,"goodsName",analyzer);
Query query = parser.parse(keyWord);
indexSearcher = new IndexSearcher(directory);
TopDocs topDocs = indexSearcher.search(query, 20);
System.out.println("总记录数:"+topDocs.totalHits);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for(ScoreDoc s:scoreDocs){
System.out.println("文档编号:"+s.doc);
//通过文档编号取出文档
Document document = indexSearcher.doc(s.doc);
//把文档对象的值给bean对象
Goods goods = new Goods();
goods.setGoodsId(Integer.parseInt(document.get("goodsId")));
goods.setGoodsName(document.get("goodsName"));
goods.setGoodsPrice(Double.parseDouble(document.get("goodsPrice")));
goods.setGoodsRemark(document.get("goodsRemark"));
list.add(goods);
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
if(indexSearcher!=null){
try {
indexSearcher.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
return list;
}
}
接下来 进行测试 测试代码如下:
package cn.wxz.entity;
import static org.junit.Assert.*;
import java.util.List;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestLucene {
private static LuceneDao luceneDao;
@BeforeClass
public static void setUpBeforeClass() throws Exception {
luceneDao = new LuceneDao();
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
luceneDao = null;
}
@Test
public void testSaveGoods() {
Goods goods = new Goods();
goods.setGoodsId(1);
goods.setGoodsName("milk");
goods.setGoodsPrice(20.4);
goods.setGoodsRemark("the milk is good");
luceneDao.saveGoods(goods);
Goods goods2 = new Goods();
goods2.setGoodsId(1);
goods2.setGoodsName("beef");
goods2.setGoodsPrice(50.3);
goods2.setGoodsRemark("beef is good");
luceneDao.saveGoods(goods2);
Goods goods3 = new Goods();
goods3.setGoodsId(1);
goods3.setGoodsName("milk");
goods3.setGoodsPrice(20.4);
goods3.setGoodsRemark("the milk is good");
luceneDao.saveGoods(goods3);
}
@Test
public void testSelectGoods() {
List<Goods> list = luceneDao.selectGoods("milk");
for(Goods goods: list){
System.out.println("商品信息:"+goods);
}
}
}
依次运行testSaveGoods方法和testSelectGoods方法,在控制台可以 看到 如下信息