最近在写一个项目的时候用到了luncene全文检索,为避免以后会忘记,现在将配置方案记录如下
1.导入luncene所使用的jar包,如下图
解释下每个包的作用:IKAnalyzer2012_u6.jar,这是一个中文用分词包,具有对中文进行分词的功能
lucene-analyzers-3.6.0.jar,这是lucene用于分词的包,
luncene-core-3.6.0.jar,这是lucene的核心包,具有lucence核心功能,
lucene-highlighter-3.6.0.jar,这是lucene用于高亮显示所搜索的关键词的包,
lucene-memory-3.6.0.jar,这是lucene用于高亮显示所搜索的关键词的包
2.导入配置文件,在src目录下导入,在这里我导入到我所建立的source folder中,
即config,该目录和src是同级的
解释下上面的三个配置文件:
1.ext.dic:用于存储扩展分词的词典,比如用户根据需要定义的一些词,比如人名
2.stopword.dic:用于存储停止词的词典,比如"的","和","了","都",这些连词,感叹词,不能作为搜索的关键词
3.IKAnalyzer.cfg.xml:用于加载上面两个词典的配置文件
下面三张图,介绍了这三个配置文件的内容
这是ext.dic的配置,配置了三个分词,比如程序员,牛X,搞基
这是stopword.dic的配置,你可以加入中文的停用词,比如"都",“了”,“和”等
这是IKAnalyzer.xml配置,这里加载了ext.dic,stopword.dic配置文件
3.接下来准备三个工具类,这三个工具类需要自己来写
1.Configuration.java,用于打开和索引库的连接,和取得分词器,下面贴出代码,可以直接使用
package com.itheima.elec.util;
import java.io.File;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class Configuration {
//索引库的目录位置
private static Directory directory;
//分词器
private static Analyzer analyzer;
static{
try {
/**索引库目录为D盘indexDir*/
directory = FSDirectory.open(new File("D:/indexDir/"));
/**词库分词*/
analyzer = new IKAnalyzer();
} catch (Exception e) {
e.printStackTrace();
}
}
public static Directory getDirectory() {
return directory;
}
public static Analyzer getAnalyzer() {
return analyzer;
}
}
2.FileUploadDocument.java,因为lucene是使用Document对象来存储的,所以需要将javaBean对象转换成Docuemnt(lucene中的)才能存储到索引库中
这里使用ElecFileUpload这个对象举例,请根据你的具体对象来写这个工具类,这里提供两个方法,javaBean对象转成Document对象,Document对象
转换成javaBean对象
package com.itheima.elec.util;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.util.NumericUtils;
import com.itheima.elec.domain.ElecFileUpload;
public class FileUploadDocument {
/**将ElecFileUpload对象转换成Document对象*/
public static Document FileUploadToDocument(ElecFileUpload elecFileUpload){
Document document = new Document();
String seqId = NumericUtils.intToPrefixCoded(elecFileUpload.getSeqID());
//主键ID
document.add(new Field("seqId",seqId,Store.YES,Index.NOT_ANALYZED));
//文件名
document.add(new Field("fileName", elecFileUpload.getFileName(), Store.YES, Index.ANALYZED));
//文件描述
document.add(new Field("comment", elecFileUpload.getComment(), Store.YES, Index.ANALYZED));
//所属单位
document.add(new Field("projId",elecFileUpload.getProjID(),Store.YES,Index.NOT_ANALYZED));
//图纸类别
document.add(new Field("belongTo",elecFileUpload.getBelongTo(),Store.YES,Index.NOT_ANALYZED));
return document;
}
/**将Document对象转换成ElecFileUpload对象*/
public static ElecFileUpload documentToFileUpload(Document document){
ElecFileUpload elecFileUpload = new ElecFileUpload();
Integer seqId = NumericUtils.prefixCodedToInt(document.get("seqId"));
//主键ID
elecFileUpload.setSeqID(seqId);
//文件名
elecFileUpload.setFileName(document.get("fileName"));
//文件描述
elecFileUpload.setComment(document.get("comment"));
//所属单位
elecFileUpload.setProjID(document.get("projId"));
//图纸类别
elecFileUpload.setBelongTo(document.get("belongTo"));
return elecFileUpload;
}
}
3. LuceneUtils.java 这个方法,这个方法是核心方法,支持 对Lucene索引库的CURD操。
package com.itheima.elec.util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.Version;
import com.itheima.elec.domain.ElecFileUpload;
public class LuceneUtils {
/**
* 向索引库中添加数据
* @param elecFileUpload
*/
public static void addIndex(ElecFileUpload elecFileUpload){
//使用ElecFileUpload对象转换成Document对象
Document document = FileUploadDocument.FileUploadToDocument(elecFileUpload);
/*新增,修改,删除,查询都会使用分词器*/
try {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(
Version.LUCENE_36, Configuration.getAnalyzer());
IndexWriter indexWriter = new IndexWriter(
Configuration.getDirectory(), indexWriterConfig);
//添加数据
indexWriter.addDocument(document);
//注意用完需要关闭流
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static List<ElecFileUpload> searchIndexByCondition(String projID,
String belongTo, String queryString) {
//存放结果集
List<ElecFileUpload> list = new ArrayList<ElecFileUpload>();
try {
//创建IndexSearcher对象,用于查询索引库
IndexSearcher indexSearcher = new IndexSearcher(IndexReader.open(Configuration.getDirectory()));
//创建BooleanQuery查询,用于将多个条件连接在一起进行查询
BooleanQuery booleanQuery = new BooleanQuery();
//条件一:所属单位
if(StringUtils.isNotBlank(projID)){
//词条查询(创建一个词条,将查询条件放入到词条中,再放入到词条查询中)
TermQuery query1 = new TermQuery(new Term("projId",projID));
booleanQuery.add(query1, Occur.MUST);//Ocur.MUST相当于sql中的AND
}
//图书类别
if(StringUtils.isNotBlank(belongTo)){
//词条查询(创建一个词条,将查询条件放入到词条中,再放入到词条查询中)
TermQuery query2 = new TermQuery(new Term("belongTo",belongTo));
booleanQuery.add(query2,Occur.MUST);//Ocur.MUST相当于sql中的AND
}
//其他查询条件(文件名称和文件描述)
if(StringUtils.isNotBlank(queryString)){
//将多个字段进行查询时,使用QueryPaser
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36,new String[]{"fileName","comment"}, Configuration.getAnalyzer());
Query query3 = queryParser.parse(queryString);
booleanQuery.add(query3,Occur.MUST);//Ocur.MUST相当于sql中的AND
}
//向索引库中搜索数据
/**
* 参数一:指定的查询条件(Luncene的写法)
* 参数二:返回前100条数据
*/
TopDocs topDocs = indexSearcher.search(booleanQuery,100);
System.out.println("查询的总记录数:"+topDocs.totalHits);
//表示返回的结果集
ScoreDoc[] scorers = topDocs.scoreDocs;
/**添加设置文字的高亮begin*/
//html页面高亮显示的格式化,默认是<b></b>
Formatter formatter = new SimpleHTMLFormatter("<font color='red'><b>","</b></font>");
//执行查询,因为高亮显示的就是查询条件
Scorer screr = new QueryScorer(booleanQuery);
Highlighter highlighter = new Highlighter(formatter, screr);
//创建文字摘要,设置文字摘要的大小
int fragmentSize =20;
Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
highlighter.setTextFragmenter(fragmenter);
/**添加设置文字的高亮end*/
if(scorers!=null&&scorers.length>0){
//遍历scorers对象
for(ScoreDoc scoreDoc : scorers){
System.out.println("相关都得分:"+scoreDoc.score);//默认得分高的对象排在前面
//获得查询的文档的唯一编号,只有获得查询文档的唯一编号,才能查询到对应的文档
int doc = scoreDoc.doc;
//使用编号,查询唯一的文档编号
Document document = indexSearcher.doc(doc);
/**获取文字高亮的信息 begin*/
//获取文字的高亮,一次只可以获得一个字段高亮的结果,如果获得不到返回null
String fileName = highlighter.getBestFragment(Configuration.getAnalyzer(),"fileName",document.get("fileName"));
//如果返回值是null,表示没有高亮,如果没有高亮的结果,就返回原来的结果
if(StringUtils.isBlank(fileName)){
fileName = document.get("fileName");
if(fileName!=null&&fileName.length()>fragmentSize){
//截串,从0开始
fileName = fileName.substring(0, fragmentSize);
}
}
//将高亮后的结果放到Doucment中
document.getField("fileName").setValue(fileName);
//查询索引字段为comment的数据
String comment = highlighter.getBestFragment(Configuration.getAnalyzer(),"comment", document.get("comment"));
if(StringUtils.isBlank(comment)){
comment = document.get("comment");
if(fileName!=null&&fileName.length()>fragmentSize){
// 取子串,从0开始
comment = comment.substring(0,fragmentSize);
}
}
//将高亮后的结果放到Doucment中
document.getField("comment").setValue(comment);
/**获取文字高亮的信息 end*/
//document对象转换成javaBean对象
ElecFileUpload elecFileUpload = FileUploadDocument.documentToFileUpload(document);
list.add(elecFileUpload);
}
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return list;
}
/**
* 根据ID删除索引
* @param seqID
*/
public static void deleteIndex(int seqID) {
String seqId = NumericUtils.intToPrefixCoded(seqID);
Term term = new Term("seqId",seqId);
/**新增、修改、删除、查询都会使用分词器*/
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36,Configuration.getAnalyzer());
try {
IndexWriter indexWriter = new IndexWriter(Configuration.getDirectory(),indexWriterConfig);
//删除文档
indexWriter.deleteDocuments(term);
indexWriter.close();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
这里写了向索引库添加数据的方法和查询索引库(同时给查询的关键词加上高亮,所以代码较多),以及删除索引库的方法
注意两个类IndexWriter和Store,Store:表示是否将数据存储到索引库的数据区域
indexWirter是是否将数据更新到索引库的目录区域
4.在需要应用Lucene的地方来应用Lucene,举例
同步数据库数据到索引库中
public List<ElecFileUpload> findElecFileUploadByLuceneCondition(
ElecFileUpload elecFileUpload) {
//结果集
List<ElecFileUpload> list = new ArrayList<ElecFileUpload>();
//创建用户存储从s
//所属单位
String projID = elecFileUpload.getProjID();
//图书类别
String belongTo = elecFileUpload.getBelongTo();
//其他查询条件
String queryString = elecFileUpload.getQueryString();
//根据页面传来的条件,先组织查询条件,再查询Lucence索引库,根据索引库,返回查询结果集 List<ElecFileUpload>
List<ElecFileUpload> elecFileUploadList = LuceneUtils.searchIndexByCondition(projID,belongTo,queryString);
//遍历每个List<ElecUpload>,取出每个seqID,通过seqID取出数据库中的对象
if(elecFileUploadList!=null&&elecFileUploadList.size()>0){
for(ElecFileUpload fileUpload : elecFileUploadList){
//取出seqID
int seqID = fileUpload.getSeqID();
//组织查询条件
String condition = " and o.seqID=?";
Object[] param = {seqID};
List<ElecFileUpload> uploads = elecFileUploadDao.findElecFileUploadByCondtionalWithSql(condition, param, null);
//让fileName和comment添加高亮
ElecFileUpload upload = uploads.get(0);
upload.setFileName(fileUpload.getFileName());
upload.setComment(fileUpload.getComment());
list.addAll(uploads);
}
}
return list;
}
使用Lucene查询数据